forked from tonybeltramelli/pix2code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpix2code.py
More file actions
76 lines (57 loc) · 3.16 KB
/
Copy pathpix2code.py
File metadata and controls
76 lines (57 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from __future__ import absolute_import
__author__ = 'Tony Beltramelli - www.tonybeltramelli.com'
from keras.layers import Input, Dense, Dropout, \
RepeatVector, LSTM, concatenate, \
Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential, Model
from keras.optimizers import RMSprop
from keras import *
from .Config import *
from .AModel import *
class pix2code(AModel):
def __init__(self, input_shape, output_size, output_path):
AModel.__init__(self, input_shape, output_size, output_path)
self.name = "pix2code"
image_model = Sequential()
image_model.add(Conv2D(32, (3, 3), padding='valid', activation='relu', input_shape=input_shape))
image_model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
image_model.add(MaxPooling2D(pool_size=(2, 2)))
image_model.add(Dropout(0.25))
image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu'))
image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu'))
image_model.add(MaxPooling2D(pool_size=(2, 2)))
image_model.add(Dropout(0.25))
image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu'))
image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu'))
image_model.add(MaxPooling2D(pool_size=(2, 2)))
image_model.add(Dropout(0.25))
image_model.add(Flatten())
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(RepeatVector(CONTEXT_LENGTH))
visual_input = Input(shape=input_shape)
encoded_image = image_model(visual_input)
language_model = Sequential()
language_model.add(LSTM(128, return_sequences=True, input_shape=(CONTEXT_LENGTH, output_size)))
language_model.add(LSTM(128, return_sequences=True))
textual_input = Input(shape=(CONTEXT_LENGTH, output_size))
encoded_text = language_model(textual_input)
decoder = concatenate([encoded_image, encoded_text])
decoder = LSTM(512, return_sequences=True)(decoder)
decoder = LSTM(512, return_sequences=False)(decoder)
decoder = Dense(output_size, activation='softmax')(decoder)
self.model = Model(inputs=[visual_input, textual_input], outputs=decoder)
optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
self.model.compile(loss='categorical_crossentropy', optimizer=optimizer)
def fit(self, images, partial_captions, next_words):
self.model.fit([images, partial_captions], next_words, shuffle=False, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)
self.save()
def fit_generator(self, generator, steps_per_epoch):
self.model.fit_generator(generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
self.save()
def predict(self, image, partial_caption):
return self.model.predict([image, partial_caption], verbose=0)[0]
def predict_batch(self, images, partial_captions):
return self.model.predict([images, partial_captions], verbose=1)