Skip to content

Commit e27c307

Browse files
authored
Add retries to DataSourceGCE.py when connecting to GCE (canonical#1005)
Add retries to DatasourceGCE when connecting to GCE. Sometimes when the trying to fetch the metadata, cloud-init fails and the fallback datasource NoCloud is used which is not expected. Add retries to ensure loading of the data source.
1 parent dc22786 commit e27c307

File tree

5 files changed

+65
-17
lines changed

5 files changed

+65
-17
lines changed

cloudinit/sources/DataSourceGCE.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,20 @@
2727

2828
class GoogleMetadataFetcher(object):
2929

30-
def __init__(self, metadata_address):
30+
def __init__(self, metadata_address, num_retries, sec_between_retries):
3131
self.metadata_address = metadata_address
32+
self.num_retries = num_retries
33+
self.sec_between_retries = sec_between_retries
3234

3335
def get_value(self, path, is_text, is_recursive=False):
3436
value = None
3537
try:
3638
url = self.metadata_address + path
3739
if is_recursive:
3840
url += '/?recursive=True'
39-
resp = url_helper.readurl(url=url, headers=HEADERS)
41+
resp = url_helper.readurl(url=url, headers=HEADERS,
42+
retries=self.num_retries,
43+
sec_between=self.sec_between_retries)
4044
except url_helper.UrlError as exc:
4145
msg = "url %s raised exception %s"
4246
LOG.debug(msg, path, exc)
@@ -68,9 +72,11 @@ def __init__(self, sys_cfg, distro, paths):
6872
self.metadata_address = self.ds_cfg['metadata_url']
6973

7074
def _get_data(self):
75+
url_params = self.get_url_params()
7176
ret = util.log_time(
7277
LOG.debug, 'Crawl of GCE metadata service',
73-
read_md, kwargs={'address': self.metadata_address})
78+
read_md, kwargs={'address': self.metadata_address,
79+
'url_params': url_params})
7480

7581
if not ret['success']:
7682
if ret['platform_reports_gce']:
@@ -176,7 +182,7 @@ def _parse_public_keys(public_keys_data, default_user=None):
176182
return public_keys
177183

178184

179-
def read_md(address=None, platform_check=True):
185+
def read_md(address=None, url_params=None, platform_check=True):
180186

181187
if address is None:
182188
address = MD_V1_URL
@@ -203,8 +209,9 @@ def read_md(address=None, platform_check=True):
203209
('instance-data', ('instance/attributes',), False, False, True),
204210
('project-data', ('project/attributes',), False, False, True),
205211
]
206-
207-
metadata_fetcher = GoogleMetadataFetcher(address)
212+
metadata_fetcher = GoogleMetadataFetcher(address,
213+
url_params.num_retries,
214+
url_params.sec_between_retries)
208215
md = {}
209216
# Iterate over url_map keys to get metadata items.
210217
for (mkey, paths, required, is_text, is_recursive) in url_map:

cloudinit/sources/__init__.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
138138

139139

140140
URLParams = namedtuple(
141-
'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries'])
141+
'URLParms', ['max_wait_seconds', 'timeout_seconds',
142+
'num_retries', 'sec_between_retries'])
142143

143144

144145
class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta):
@@ -175,9 +176,10 @@ class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta):
175176
NetworkConfigSource.ds)
176177

177178
# read_url_params
178-
url_max_wait = -1 # max_wait < 0 means do not wait
179-
url_timeout = 10 # timeout for each metadata url read attempt
180-
url_retries = 5 # number of times to retry url upon 404
179+
url_max_wait = -1 # max_wait < 0 means do not wait
180+
url_timeout = 10 # timeout for each metadata url read attempt
181+
url_retries = 5 # number of times to retry url upon 404
182+
url_sec_between_retries = 1 # amount of seconds to wait between retries
181183

182184
# The datasource defines a set of supported EventTypes during which
183185
# the datasource can react to changes in metadata and regenerate
@@ -422,7 +424,18 @@ def get_url_params(self):
422424
LOG, "Config retries '%s' is not an int, using default '%s'",
423425
self.ds_cfg.get('retries'), retries)
424426

425-
return URLParams(max_wait, timeout, retries)
427+
sec_between_retries = self.url_sec_between_retries
428+
try:
429+
sec_between_retries = int(self.ds_cfg.get(
430+
"sec_between_retries",
431+
self.url_sec_between_retries))
432+
except Exception:
433+
util.logexc(
434+
LOG, "Config sec_between_retries '%s' is not an int,"
435+
" using default '%s'",
436+
self.ds_cfg.get("sec_between_retries"), sec_between_retries)
437+
438+
return URLParams(max_wait, timeout, retries, sec_between_retries)
426439

427440
def get_userdata(self, apply_filter=False):
428441
if self.userdata is None:

cloudinit/sources/tests/test_init.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,16 @@ def test_datasource_get_url_params_defaults(self):
9797
self.assertEqual(params.max_wait_seconds, self.datasource.url_max_wait)
9898
self.assertEqual(params.timeout_seconds, self.datasource.url_timeout)
9999
self.assertEqual(params.num_retries, self.datasource.url_retries)
100+
self.assertEqual(params.sec_between_retries,
101+
self.datasource.url_sec_between_retries)
100102

101103
def test_datasource_get_url_params_subclassed(self):
102104
"""Subclasses can override get_url_params defaults."""
103105
sys_cfg = {'datasource': {'MyTestSubclass': {'key2': False}}}
104106
distro = 'distrotest' # generally should be a Distro object
105107
datasource = DataSourceTestSubclassNet(sys_cfg, distro, self.paths)
106108
expected = (datasource.url_max_wait, datasource.url_timeout,
107-
datasource.url_retries)
109+
datasource.url_retries, datasource.url_sec_between_retries)
108110
url_params = datasource.get_url_params()
109111
self.assertNotEqual(self.datasource.get_url_params(), url_params)
110112
self.assertEqual(expected, url_params)
@@ -114,14 +116,16 @@ def test_datasource_get_url_params_ds_config_override(self):
114116
sys_cfg = {
115117
'datasource': {
116118
'MyTestSubclass': {
117-
'max_wait': '1', 'timeout': '2', 'retries': '3'}}}
119+
'max_wait': '1', 'timeout': '2',
120+
'retries': '3', 'sec_between_retries': 4
121+
}}}
118122
datasource = DataSourceTestSubclassNet(
119123
sys_cfg, self.distro, self.paths)
120-
expected = (1, 2, 3)
124+
expected = (1, 2, 3, 4)
121125
url_params = datasource.get_url_params()
122126
self.assertNotEqual(
123127
(datasource.url_max_wait, datasource.url_timeout,
124-
datasource.url_retries),
128+
datasource.url_retries, datasource.url_sec_between_retries),
125129
url_params)
126130
self.assertEqual(expected, url_params)
127131

@@ -130,7 +134,8 @@ def test_datasource_get_url_params_is_zero_or_greater(self):
130134
# Set an override that is below 0 which gets ignored.
131135
sys_cfg = {'datasource': {'_undef': {'timeout': '-1'}}}
132136
datasource = DataSource(sys_cfg, self.distro, self.paths)
133-
(_max_wait, timeout, _retries) = datasource.get_url_params()
137+
(_max_wait, timeout, _retries,
138+
_sec_between_retries) = datasource.get_url_params()
134139
self.assertEqual(0, timeout)
135140

136141
def test_datasource_get_url_uses_defaults_on_errors(self):
@@ -142,7 +147,7 @@ def test_datasource_get_url_uses_defaults_on_errors(self):
142147
datasource = DataSource(sys_cfg, self.distro, self.paths)
143148
url_params = datasource.get_url_params()
144149
expected = (datasource.url_max_wait, datasource.url_timeout,
145-
datasource.url_retries)
150+
datasource.url_retries, datasource.url_sec_between_retries)
146151
self.assertEqual(expected, url_params)
147152
logs = self.logs.getvalue()
148153
expected_logs = [

doc/rtd/topics/datasources/gce.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,28 @@ to provide ``public-keys``.
1515
``user-data`` and ``user-data-encoding`` can be provided to cloud-init by
1616
setting those custom metadata keys for an *instance*.
1717

18+
Configuration
19+
-------------
20+
The following configuration can be set for the datasource in system
21+
configuration (in `/etc/cloud/cloud.cfg` or `/etc/cloud/cloud.cfg.d/`).
22+
23+
The settings that may be configured are:
24+
25+
* **retries**: The number of retries that should be done for an http request.
26+
This value is used only after metadata_url is selected. (default: 5)
27+
* **sec_between_retries**: The amount of wait time between the retries when
28+
crawling the metadata service. (default: 1)
29+
30+
31+
An example configuration with the default values is provided below:
32+
33+
.. sourcecode:: yaml
34+
35+
datasource:
36+
GCE:
37+
retries: 5
38+
sec_between_retries: 1
39+
1840
.. _GCE metadata docs: https://cloud.google.com/compute/docs/storing-retrieving-metadata#querying
1941

2042
.. vi: textwidth=78

tools/.github-cla-signers

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ timothegenzmer
6666
tnt-dev
6767
tomponline
6868
tsanghan
69+
vteratipally
6970
Vultaire
7071
WebSpider
7172
xiachen-rh

0 commit comments

Comments
 (0)