diff --git a/.gitignore b/.gitignore
index f619e91..ed8abb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -152,3 +152,10 @@ com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
+
+# Generated dataset files
+datasets/*
+!datasets/pix2code_datasets.z*
+
+# Training output
+bin/
diff --git a/README.md b/README.md
index 8bc6fb0..101ee2a 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,18 @@ The current implementation is not, in any way, intended, nor able to generate co
We could not emphasize enough that this project is experimental and shared for educational purposes only.
Both the source code and the datasets are provided to foster future research in machine intelligence and are not designed for end users.
+## Setup
+### Prerequisites
+
+- Python 2 or 3
+- pip
+
+### Install dependencies
+
+```sh
+pip install -r requirements.txt
+```
+
## Usage
Prepare the data:
@@ -39,7 +51,7 @@ cd datasets
zip -F pix2code_datasets.zip --out datasets.zip
unzip datasets.zip
-cd model
+cd ../model
# split training set and evaluation set while ensuring no training example in the evaluation set
# usage: build_datasets.py
diff --git a/compiler/android-compiler.py b/compiler/android-compiler.py
index 6bfb14e..07d00e7 100755
--- a/compiler/android-compiler.py
+++ b/compiler/android-compiler.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import sys
@@ -13,8 +14,8 @@
if length != 0:
input_file = argv[0]
else:
- print "Error: not enough argument supplied:"
- print "android-compiler.py "
+ print("Error: not enough argument supplied:")
+ print("android-compiler.py ")
exit(0)
TEXT_PLACE_HOLDER = "[TEXT]"
diff --git a/compiler/classes/Node.py b/compiler/classes/Node.py
index d767733..69d774c 100644
--- a/compiler/classes/Node.py
+++ b/compiler/classes/Node.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
@@ -13,7 +14,7 @@ def add_child(self, child):
self.children.append(child)
def show(self):
- print self.key
+ print(self.key)
for child in self.children:
child.show()
diff --git a/compiler/classes/Utils.py b/compiler/classes/Utils.py
index 4f675de..ddabbbf 100644
--- a/compiler/classes/Utils.py
+++ b/compiler/classes/Utils.py
@@ -9,7 +9,7 @@ class Utils:
def get_random_text(length_text=10, space_number=1, with_upper_case=True):
results = []
while len(results) < length_text:
- char = random.choice(string.letters[:26])
+ char = random.choice(string.ascii_letters[:26])
results.append(char)
if with_upper_case:
results[0] = results[0].upper()
@@ -32,7 +32,7 @@ def get_ios_id(length=10):
results = []
while len(results) < length:
- char = random.choice(string.digits + string.letters)
+ char = random.choice(string.digits + string.ascii_letters)
results.append(char)
results[3] = "-"
@@ -45,7 +45,7 @@ def get_android_id(length=10):
results = []
while len(results) < length:
- char = random.choice(string.letters)
+ char = random.choice(string.ascii_letters)
results.append(char)
return ''.join(results)
diff --git a/compiler/ios-compiler.py b/compiler/ios-compiler.py
index 07b072a..b95f665 100755
--- a/compiler/ios-compiler.py
+++ b/compiler/ios-compiler.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import sys
@@ -13,8 +14,8 @@
if length != 0:
input_file = argv[0]
else:
- print "Error: not enough argument supplied:"
- print "ios-compiler.py "
+ print("Error: not enough argument supplied:")
+ print("ios-compiler.py ")
exit(0)
TEXT_PLACE_HOLDER = "[TEXT]"
diff --git a/compiler/web-compiler.py b/compiler/web-compiler.py
index ea8ec0f..d365a48 100755
--- a/compiler/web-compiler.py
+++ b/compiler/web-compiler.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import sys
@@ -13,8 +14,8 @@
if length != 0:
input_file = argv[0]
else:
- print "Error: not enough argument supplied:"
- print "web-compiler.py "
+ print("Error: not enough argument supplied:")
+ print("web-compiler.py ")
exit(0)
FILL_WITH_RANDOM_TEXT = True
diff --git a/datasets/pix2code_datasets.z01 b/datasets/pix2code_datasets.z01
index c21d0d2..c03d638 100644
Binary files a/datasets/pix2code_datasets.z01 and b/datasets/pix2code_datasets.z01 differ
diff --git a/datasets/pix2code_datasets.z02 b/datasets/pix2code_datasets.z02
index 8437fec..cae402f 100644
Binary files a/datasets/pix2code_datasets.z02 and b/datasets/pix2code_datasets.z02 differ
diff --git a/datasets/pix2code_datasets.z03 b/datasets/pix2code_datasets.z03
index 22cf9c4..53ca1ba 100644
Binary files a/datasets/pix2code_datasets.z03 and b/datasets/pix2code_datasets.z03 differ
diff --git a/datasets/pix2code_datasets.z04 b/datasets/pix2code_datasets.z04
index 2054b2c..153725d 100644
Binary files a/datasets/pix2code_datasets.z04 and b/datasets/pix2code_datasets.z04 differ
diff --git a/datasets/pix2code_datasets.z05 b/datasets/pix2code_datasets.z05
index a5ba271..3df1516 100644
Binary files a/datasets/pix2code_datasets.z05 and b/datasets/pix2code_datasets.z05 differ
diff --git a/datasets/pix2code_datasets.z06 b/datasets/pix2code_datasets.z06
index dd5d4ac..cd6188e 100644
Binary files a/datasets/pix2code_datasets.z06 and b/datasets/pix2code_datasets.z06 differ
diff --git a/datasets/pix2code_datasets.z07 b/datasets/pix2code_datasets.z07
index 97b8bf5..8ced725 100644
Binary files a/datasets/pix2code_datasets.z07 and b/datasets/pix2code_datasets.z07 differ
diff --git a/datasets/pix2code_datasets.z08 b/datasets/pix2code_datasets.z08
index 8239b79..8be8269 100644
Binary files a/datasets/pix2code_datasets.z08 and b/datasets/pix2code_datasets.z08 differ
diff --git a/datasets/pix2code_datasets.z09 b/datasets/pix2code_datasets.z09
index b36dcd0..101b7de 100644
Binary files a/datasets/pix2code_datasets.z09 and b/datasets/pix2code_datasets.z09 differ
diff --git a/datasets/pix2code_datasets.zip b/datasets/pix2code_datasets.zip
index d10eb90..cc92b93 100644
Binary files a/datasets/pix2code_datasets.zip and b/datasets/pix2code_datasets.zip differ
diff --git a/model/build_datasets.py b/model/build_datasets.py
index 3d39e00..513d883 100755
--- a/model/build_datasets.py
+++ b/model/build_datasets.py
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+from __future__ import print_function
+from __future__ import absolute_import
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import os
@@ -11,8 +13,8 @@
argv = sys.argv[1:]
if len(argv) < 1:
- print "Error: not enough argument supplied:"
- print "build_datasets.py "
+ print("Error: not enough argument supplied:")
+ print("build_datasets.py ")
exit(0)
else:
input_path = argv[0]
@@ -37,7 +39,7 @@
assert training_samples_number + evaluation_samples_number == len(paths)
-print "Splitting datasets, training samples: {}, evaluation samples: {}".format(training_samples_number, evaluation_samples_number)
+print("Splitting datasets, training samples: {}, evaluation samples: {}".format(training_samples_number, evaluation_samples_number))
np.random.shuffle(paths)
@@ -46,12 +48,17 @@
hashes = []
for path in paths:
- with open("{}/{}.gui".format(input_path, path), 'r') as f:
+ if sys.version_info >= (3,):
+ f = open("{}/{}.gui".format(input_path, path), 'r', encoding='utf-8')
+ else:
+ f = open("{}/{}.gui".format(input_path, path), 'r')
+
+ with f:
chars = ""
for line in f:
chars += line
content_hash = chars.replace(" ", "").replace("\n", "")
- content_hash = hashlib.sha256(content_hash).hexdigest()
+ content_hash = hashlib.sha256(content_hash.encode('utf-8')).hexdigest()
if len(eval_set) == evaluation_samples_number:
train_set.append(path)
@@ -86,5 +93,5 @@
shutil.copyfile("{}/{}.png".format(input_path, path), "{}/{}/{}.png".format(os.path.dirname(input_path), TRAINING_SET_NAME, path))
shutil.copyfile("{}/{}.gui".format(input_path, path), "{}/{}/{}.gui".format(os.path.dirname(input_path), TRAINING_SET_NAME, path))
-print "Training dataset: {}/training_set".format(os.path.dirname(input_path), path)
-print "Evaluation dataset: {}/eval_set".format(os.path.dirname(input_path), path)
+print("Training dataset: {}/training_set".format(os.path.dirname(input_path), path))
+print("Evaluation dataset: {}/eval_set".format(os.path.dirname(input_path), path))
diff --git a/model/classes/BeamSearch.py b/model/classes/BeamSearch.py
index 6dfa893..a28e6a1 100644
--- a/model/classes/BeamSearch.py
+++ b/model/classes/BeamSearch.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
@@ -44,7 +45,7 @@ def max_child(self):
return nodes[0]
def show(self, depth=0):
- print " " * depth, self.key, self.value, self.level
+ print(" " * depth, self.key, self.value, self.level)
for child in self.children:
child.show(depth + 2)
diff --git a/model/classes/Sampler.py b/model/classes/Sampler.py
index 2b5b11d..aaf483a 100644
--- a/model/classes/Sampler.py
+++ b/model/classes/Sampler.py
@@ -1,8 +1,10 @@
+from __future__ import print_function
+from __future__ import absolute_import
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
-from Vocabulary import *
-from BeamSearch import *
-from Utils import *
+from .Vocabulary import *
+from .BeamSearch import *
+from .Utils import *
class Sampler:
@@ -13,9 +15,9 @@ def __init__(self, voc_path, input_shape, output_size, context_length):
self.input_shape = input_shape
self.output_size = output_size
- print "Vocabulary size: {}".format(self.voc.size)
- print "Input shape: {}".format(self.input_shape)
- print "Output size: {}".format(self.output_size)
+ print("Vocabulary size: {}".format(self.voc.size))
+ print("Input shape: {}".format(self.input_shape))
+ print("Output size: {}".format(self.output_size))
self.context_length = context_length
@@ -30,7 +32,7 @@ def predict_greedy(self, model, input_img, require_sparse_label=True, sequence_l
for i in range(0, sequence_length):
if verbose:
- print "predicting {}/{}...".format(i, sequence_length)
+ print("predicting {}/{}...".format(i, sequence_length))
probas = model.predict(input_img, np.array([current_context]))
prediction = np.argmax(probas)
diff --git a/model/classes/Vocabulary.py b/model/classes/Vocabulary.py
index fa2e3fb..26cb1ac 100644
--- a/model/classes/Vocabulary.py
+++ b/model/classes/Vocabulary.py
@@ -1,5 +1,6 @@
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
+import sys
import numpy as np
START_TOKEN = ""
@@ -26,7 +27,11 @@ def append(self, token):
self.size += 1
def create_binary_representation(self):
- for key, value in self.vocabulary.iteritems():
+ if sys.version_info >= (3,):
+ items = self.vocabulary.items()
+ else:
+ items = self.vocabulary.iteritems()
+ for key, value in items:
binary = np.zeros(self.size)
binary[value] = 1
self.binary_vocabulary[key] = binary
@@ -36,7 +41,11 @@ def get_serialized_binary_representation(self):
self.create_binary_representation()
string = ""
- for key, value in self.binary_vocabulary.iteritems():
+ if sys.version_info >= (3,):
+ items = self.binary_vocabulary.items()
+ else:
+ items = self.binary_vocabulary.iteritems()
+ for key, value in items:
array_as_string = np.array2string(value, separator=',', max_line_width=self.size * self.size)
string += "{}{}{}\n".format(key, SEPARATOR, array_as_string[1:len(array_as_string) - 1])
return string
diff --git a/model/classes/dataset/Dataset.py b/model/classes/dataset/Dataset.py
index 128716e..af470b8 100644
--- a/model/classes/dataset/Dataset.py
+++ b/model/classes/dataset/Dataset.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import os
@@ -22,7 +23,7 @@ def __init__(self):
@staticmethod
def load_paths_only(path):
- print "Parsing data..."
+ print("Parsing data...")
gui_paths = []
img_paths = []
for f in os.listdir(path):
@@ -42,7 +43,7 @@ def load_paths_only(path):
return gui_paths, img_paths
def load(self, path, generate_binary_sequences=False):
- print "Loading data..."
+ print("Loading data...")
for f in os.listdir(path):
if f.find(".gui") != -1:
gui = open("{}/{}".format(path, f), 'r')
@@ -55,7 +56,7 @@ def load(self, path, generate_binary_sequences=False):
img = np.load("{}/{}.npz".format(path, file_name))["features"]
self.append(file_name, gui, img)
- print "Generating sparse vectors..."
+ print("Generating sparse vectors...")
self.voc.create_binary_representation()
self.next_words = self.sparsify_labels(self.next_words, self.voc)
if generate_binary_sequences:
@@ -67,17 +68,17 @@ def load(self, path, generate_binary_sequences=False):
assert self.size == len(self.input_images) == len(self.partial_sequences) == len(self.next_words)
assert self.voc.size == len(self.voc.vocabulary)
- print "Dataset size: {}".format(self.size)
- print "Vocabulary size: {}".format(self.voc.size)
+ print("Dataset size: {}".format(self.size))
+ print("Vocabulary size: {}".format(self.voc.size))
self.input_shape = self.input_images[0].shape
self.output_size = self.voc.size
- print "Input shape: {}".format(self.input_shape)
- print "Output size: {}".format(self.output_size)
+ print("Input shape: {}".format(self.input_shape))
+ print("Output size: {}".format(self.output_size))
def convert_arrays(self):
- print "Convert arrays..."
+ print("Convert arrays...")
self.input_images = np.array(self.input_images)
self.partial_sequences = np.array(self.partial_sequences)
self.next_words = np.array(self.next_words)
diff --git a/model/classes/dataset/Generator.py b/model/classes/dataset/Generator.py
index a635633..2fc8e43 100644
--- a/model/classes/dataset/Generator.py
+++ b/model/classes/dataset/Generator.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import numpy as np
@@ -49,7 +50,7 @@ def data_generator(voc, gui_paths, img_paths, batch_size, generate_binary_sequen
if sample_in_batch_counter == batch_size or (loop_only_one and i == len(gui_paths) - 1):
if verbose:
- print "Generating sparse vectors..."
+ print("Generating sparse vectors...")
batch_next_words = Dataset.sparsify_labels(batch_next_words, voc)
if generate_binary_sequences:
batch_partial_sequences = Dataset.binarize(batch_partial_sequences, voc)
@@ -57,13 +58,13 @@ def data_generator(voc, gui_paths, img_paths, batch_size, generate_binary_sequen
batch_partial_sequences = Dataset.indexify(batch_partial_sequences, voc)
if verbose:
- print "Convert arrays..."
+ print("Convert arrays...")
batch_input_images = np.array(batch_input_images)
batch_partial_sequences = np.array(batch_partial_sequences)
batch_next_words = np.array(batch_next_words)
if verbose:
- print "Yield batch"
+ print("Yield batch")
yield ([batch_input_images, batch_partial_sequences], batch_next_words)
batch_input_images = []
diff --git a/model/classes/model/pix2code.py b/model/classes/model/pix2code.py
index 6ad5684..e8b2bcd 100644
--- a/model/classes/model/pix2code.py
+++ b/model/classes/model/pix2code.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
from keras.layers import Input, Dense, Dropout, \
@@ -6,8 +7,8 @@
from keras.models import Sequential, Model
from keras.optimizers import RMSprop
from keras import *
-from Config import *
-from AModel import *
+from .Config import *
+from .AModel import *
class pix2code(AModel):
@@ -69,7 +70,7 @@ def fit_generator(self, generator, steps_per_epoch):
self.save()
def predict(self, image, partial_caption):
- return self.model.predict_proba([image, partial_caption], verbose=0)[0]
+ return self.model.predict([image, partial_caption], verbose=0)[0]
def predict_batch(self, images, partial_captions):
- return self.model.predict_proba([images, partial_captions], verbose=1)
+ return self.model.predict([images, partial_captions], verbose=1)
diff --git a/model/convert_imgs_to_arrays.py b/model/convert_imgs_to_arrays.py
index 98421ab..b29bda8 100755
--- a/model/convert_imgs_to_arrays.py
+++ b/model/convert_imgs_to_arrays.py
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+from __future__ import print_function
+from __future__ import absolute_import
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
import os
@@ -11,8 +13,8 @@
argv = sys.argv[1:]
if len(argv) < 2:
- print "Error: not enough argument supplied:"
- print "convert_imgs_to_arrays.py