Skip to content

Commit c022a49

Browse files
author
Glyn Normington
committed
Retry failed downloads
HTTP GETs for downloads are retried up to 5 times. This includes both the initial GET of a buildpack invocation (detect, compile, or release), which doubles as an internet availability check, and subsequent GETs when the internet has already been checked and is deemed to be available. The retry limits for the initial GET and the subsequent GETs are coded as two distinct constants in case we want to make the values diverge. However, I think it makes sense to keep them the same because a user who got spurious internet unavailable behaviour because of a transient error would probably be just as put out as a user for whom a transiet error cropped up during a subsequent GET. The downside of retrying the initial GET is that when the internet really isn't available, it takes longer for the buildpack to realise. This shouldn't be an issue unless the GET request takes a significant time to fail, in which case the effect is to multiply the total such delay per buildpack invocation by a factor of 5. Reducing the retry limit for the initial GET would alleviate this somewhat at the risk of not handling transient errors so well. A better solution, when the delay is unacceptable, is for the user to fork the buildpack and disable remote downloads in config/cache.yml. [#57311364]
1 parent 3cb88f0 commit c022a49

2 files changed

Lines changed: 77 additions & 20 deletions

File tree

lib/java_buildpack/util/download_cache.rb

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ module JavaBuildpack::Util
2929
# A cache for downloaded files that is configured to use a filesystem as the backing store. This cache uses standard
3030
# file locking (<tt>File.flock()</tt>) in order ensure that mutation of files in the cache is non-concurrent across
3131
# processes. Reading files (once they've been downloaded) happens concurrently so read performance is not impacted.
32-
class DownloadCache
32+
class DownloadCache # rubocop:disable ClassLength
3333

3434
# Creates an instance of the cache that is backed by the filesystem rooted at +cache_root+
3535
#
@@ -127,6 +127,12 @@ def evict(uri)
127127

128128
HTTP_OK = '200'.freeze
129129

130+
TIMEOUT_SECONDS = 10
131+
132+
INTERNET_DETECTION_RETRY_LIMIT = 5
133+
134+
DOWNLOAD_RETRY_LIMIT = 5
135+
130136
@@monitor = Monitor.new
131137
@@internet_checked = false
132138
@@internet_up = true
@@ -136,8 +142,6 @@ def self.get_configuration
136142
YAML.load_file(expanded_path)
137143
end
138144

139-
TIMEOUT_SECONDS = 10
140-
141145
def self.internet_available?(filenames, uri, logger)
142146
@@monitor.synchronize do
143147
return @@internet_up, false if @@internet_checked # rubocop:disable RedundantReturn
@@ -147,16 +151,12 @@ def self.internet_available?(filenames, uri, logger)
147151
return store_internet_availability(false), false # rubocop:disable RedundantReturn
148152
elsif cache_configuration['remote_downloads'] == 'enabled'
149153
begin
150-
rich_uri = URI(uri)
151-
152154
# Beware known problems with timeouts: https://www.ruby-forum.com/topic/143840
153-
Net::HTTP.start(rich_uri.host, rich_uri.port, use_ssl: DownloadCache.use_ssl?(rich_uri), read_timeout: TIMEOUT_SECONDS, connect_timeout: TIMEOUT_SECONDS, open_timeout: TIMEOUT_SECONDS) do |http|
154-
request = Net::HTTP::Get.new(uri)
155-
http.request request do |response|
156-
internet_up = response.code == HTTP_OK
157-
write_response(filenames, response) if internet_up
158-
return store_internet_availability(internet_up), internet_up # rubocop:disable RedundantReturn
159-
end
155+
opts = { read_timeout: TIMEOUT_SECONDS, connect_timeout: TIMEOUT_SECONDS, open_timeout: TIMEOUT_SECONDS }
156+
return http_get(uri, INTERNET_DETECTION_RETRY_LIMIT, logger, opts) do |response|
157+
internet_up = response.code == HTTP_OK
158+
write_response(filenames, response) if internet_up
159+
return store_internet_availability(internet_up), internet_up # rubocop:disable RedundantReturn
160160
end
161161
rescue *HTTP_ERRORS => ex
162162
logger.debug { "Internet detection failed with #{ex}" }
@@ -167,6 +167,26 @@ def self.internet_available?(filenames, uri, logger)
167167
end
168168
end
169169

170+
def self.http_get(uri, retry_limit, logger, opts = {})
171+
rich_uri = URI(uri)
172+
options = opts.merge(use_ssl: DownloadCache.use_ssl?(rich_uri))
173+
Net::HTTP.start(rich_uri.host, rich_uri.port, options) do |http|
174+
request = Net::HTTP::Get.new(uri)
175+
1.upto(retry_limit) do |try|
176+
begin
177+
http.request request do |response|
178+
if response.code == HTTP_OK || try == retry_limit
179+
return yield response
180+
end
181+
end
182+
rescue *HTTP_ERRORS => ex
183+
logger.debug { "HTTP get attempt #{try} of #{retry_limit} failed: #{ex}" }
184+
raise ex if try == retry_limit
185+
end
186+
end
187+
end
188+
end
189+
170190
def self.store_internet_availability(internet_up)
171191
@@monitor.synchronize do
172192
@@internet_up = internet_up
@@ -188,15 +208,9 @@ def delete_file(filename)
188208
def download(filenames, uri, internet_up)
189209
if internet_up
190210
begin
191-
rich_uri = URI(uri)
192-
193-
Net::HTTP.start(rich_uri.host, rich_uri.port, use_ssl: DownloadCache.use_ssl?(rich_uri)) do |http|
194-
request = Net::HTTP::Get.new(uri)
195-
http.request request do |response|
196-
DownloadCache.write_response(filenames, response)
197-
end
211+
DownloadCache.http_get(uri, DOWNLOAD_RETRY_LIMIT, @logger) do |response|
212+
DownloadCache.write_response(filenames, response)
198213
end
199-
200214
rescue *HTTP_ERRORS => ex
201215
puts 'FAIL'
202216
error_message = "Unable to download from #{uri} due to #{ex}"

spec/java_buildpack/util/download_cache_spec.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,25 @@ def suppress_internet_availability_check
9494
end
9595
end
9696

97+
it 'should not raise error if download cannot be completed but retrying succeeds' do
98+
stub_request(:get, 'http://foo-uri/').to_raise(SocketError).then.to_return(
99+
status: 200,
100+
body: 'foo-cached',
101+
headers: {
102+
Etag: 'foo-etag',
103+
'Last-Modified' => 'foo-last-modified'
104+
}
105+
)
106+
107+
Dir.mktmpdir do |root|
108+
DownloadCache.new(root).get('http://foo-uri/') {}
109+
110+
expect_file_content root, 'cached', 'foo-cached'
111+
expect_file_content root, 'etag', 'foo-etag'
112+
expect_file_content root, 'last_modified', 'foo-last-modified'
113+
end
114+
end
115+
97116
it 'should download from a uri if the cached file exists and etag exists' do
98117
suppress_internet_availability_check
99118

@@ -355,6 +374,30 @@ def suppress_internet_availability_check
355374
end
356375
end
357376

377+
it 'should not use the buildpack cache if the download cannot be completed but a retry succeeds' do
378+
stub_request(:get, 'http://foo-uri/').to_raise(SocketError).then.to_return(
379+
status: 200,
380+
body: 'foo-cached',
381+
headers: {
382+
Etag: 'foo-etag',
383+
'Last-Modified' => 'foo-last-modified'
384+
}
385+
)
386+
387+
Dir.mktmpdir do |root|
388+
Dir.mktmpdir do |buildpack_cache|
389+
java_buildpack_cache = File.join(buildpack_cache, 'java-buildpack')
390+
FileUtils.mkdir_p java_buildpack_cache
391+
touch java_buildpack_cache, 'cached', 'foo-stashed'
392+
with_buildpack_cache(buildpack_cache) do
393+
DownloadCache.new(root).get('http://foo-uri/') do |file|
394+
expect(file.read).to eq('foo-cached')
395+
end
396+
end
397+
end
398+
end
399+
end
400+
358401
it 'should use the buildpack cache if the download cannot be completed because Errno::ENETUNREACH is raised' do
359402
stub_request(:get, 'http://foo-uri/').to_raise(Errno::ENETUNREACH)
360403

0 commit comments

Comments
 (0)