From 8010bdca09ddaee9095bda5444421bbf71f4c29b Mon Sep 17 00:00:00 2001 From: OpenStack Release Bot Date: Tue, 18 Mar 2025 16:27:22 +0000 Subject: [PATCH 1/7] Update .gitreview for stable/2025.1 Change-Id: I8a9fbcac2aa4090b913f8b2e7953fd8397435433 --- .gitreview | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitreview b/.gitreview index c2b7eef7078..bc50fb9256a 100644 --- a/.gitreview +++ b/.gitreview @@ -2,3 +2,4 @@ host=review.opendev.org port=29418 project=openstack/nova.git +defaultbranch=stable/2025.1 From 6422ebea0b152a2b34abdacf4453c70bf0ee37cf Mon Sep 17 00:00:00 2001 From: OpenStack Release Bot Date: Tue, 18 Mar 2025 16:27:26 +0000 Subject: [PATCH 2/7] Update TOX_CONSTRAINTS_FILE for stable/2025.1 Update the URL to the upper-constraints file to point to the redirect rule on releases.openstack.org so that anyone working on this branch will switch to the correct upper-constraints list automatically when the requirements repository branches. Until the requirements repository has as stable/2025.1 branch, tests will continue to use the upper-constraints list on master. Change-Id: I9829aa7887fc67a7693c134ef5fe302207e1a58a --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index d402501920f..9026f14ab1d 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ allowlist_externals = rm env make -install_command = python -I -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} {opts} {packages} +install_command = python -I -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2025.1} {opts} {packages} setenv = VIRTUAL_ENV={envdir} LANGUAGE=en_US From 91f6f7e30e4b5615322a4218fbb368b461274ee4 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 14 Feb 2025 16:20:08 +0100 Subject: [PATCH 3/7] Reproduce bug/2098496 Related-Bug: #2098496 Change-Id: I9a3091662fb5d1d0a41dbeff56d9680a748fe312 (cherry picked from commit 32afd0c644f1b9e378d113fba11bb189682e1740) --- .../regressions/test_bug_2098496.py | 181 ++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_2098496.py diff --git a/nova/tests/functional/regressions/test_bug_2098496.py b/nova/tests/functional/regressions/test_bug_2098496.py new file mode 100644 index 00000000000..731d9f441a4 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_2098496.py @@ -0,0 +1,181 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import copy + +from nova import objects +from nova.tests.fixtures import libvirt as fakelibvirt +from nova.tests.functional.libvirt import test_pci_in_placement as base + + +class PCIInPlacementCreateAfterDeleteTestCase(base.PlacementPCIReportingTests): + def setUp(self): + super().setUp() + self.flags(group='filter_scheduler', pci_in_placement=True) + + def test_create_after_delete(self): + device_spec = self._to_list_of_json_str( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + }, + ] + ) + pci_alias = { + "name": "a-vf", + "product_id": fakelibvirt.VF_PROD_ID, + "vendor_id": fakelibvirt.PCI_VEND_ID, + } + self._test_create_second_vm_after_first_is_deleted( + device_spec, pci_alias, self.VF_RC, traits=[]) + + def test_create_after_delete_with_rc_and_traits(self): + device_spec = self._to_list_of_json_str( + [ + { + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": fakelibvirt.VF_PROD_ID, + "resource_class": "vf", + "traits": "blue,big", + }, + ] + ) + pci_alias = { + "name": "a-vf", + "resource_class": "vf", + "traits": "big", + } + self._test_create_second_vm_after_first_is_deleted( + device_spec, pci_alias, + "CUSTOM_VF", traits=["CUSTOM_BLUE", "CUSTOM_BIG"]) + + def _test_create_second_vm_after_first_is_deleted( + self, device_spec, pci_alias, rc, traits + ): + # The fake libvirt will emulate on the host: + # * one PF with 2 VFs + pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pci=0, num_pfs=1, num_vfs=2) + # make the VFs available + self.flags(group='pci', device_spec=device_spec) + self.start_compute(hostname="compute1", pci_info=pci_info) + + compute1_expected_placement_view = { + "inventories": { + "0000:81:00.0": {rc: 2}, + }, + "traits": {"0000:81:00.0": traits, + }, + "usages": { + "0000:81:00.0": {rc: 0}, + }, + "allocations": {}, + } + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + self.assertPCIDeviceCounts('compute1', total=2, free=2) + # assert that the two VFs from the same PF are in the same pool + pools = objects.ComputeNode.get_first_node_by_host_for_old_compat( + self.ctxt, "compute1").pci_device_pools + self.assertEqual(len(pools), 1) + self.assertEqual(pools[0].count, 2) + compute1_empty = copy.deepcopy(compute1_expected_placement_view) + + self.flags( + group="pci", + alias=self._to_list_of_json_str([pci_alias]), + ) + extra_spec = {"pci_passthrough:alias": "a-vf:1"} + flavor_id = self._create_flavor(extra_spec=extra_spec) + server_1vf = self._create_server(flavor_id=flavor_id, networks=[]) + + self.assertPCIDeviceCounts('compute1', total=2, free=1) + compute1_expected_placement_view["usages"] = { + "0000:81:00.0": {rc: 1} + } + compute1_expected_placement_view["allocations"][server_1vf["id"]] = { + "0000:81:00.0": {rc: 1}, + } + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + self.assert_no_pci_healing("compute1") + + self._delete_server(server_1vf) + self.assertPCIDeviceCounts('compute1', total=2, free=2) + self.assert_placement_pci_view("compute1", **compute1_empty) + + # This is already shows the potential issue behind bug/2098496. + # After the server is deleted we are not back to a single pool of 2 + # VFs but instead two separate pools with one device each but both + # related to the same rp_uuid. This breaks an assumption of the pool + # allocation logic later. + pools = objects.ComputeNode.get_first_node_by_host_for_old_compat( + self.ctxt, "compute1").pci_device_pools + self.assertEqual(len(pools), 2) + self.assertEqual(pools[0].count, 1) + self.assertEqual(pools[1].count, 1) + self.assertEqual(pools[0].tags['rp_uuid'], pools[1].tags['rp_uuid']) + + # This is the expected state after the bug is fixed + # assert that the single pool is not broken into two during the + # de-allocation + # pools = objects.ComputeNode.get_first_node_by_host_for_old_compat( + # self.ctxt, "compute1").pci_device_pools + # self.assertEqual(len(pools), 1) + # self.assertEqual(pools[0].count, 2) + + server_1vf = self._create_server(flavor_id=flavor_id, networks=[]) + # This is bug/2098496 as the VM now consumes 2 VFs instead of one it + # requested + self.assertPCIDeviceCounts('compute1', total=2, free=0) + compute1_expected_placement_view = copy.deepcopy(compute1_empty) + compute1_expected_placement_view["usages"] = { + "0000:81:00.0": {rc: 2} + } + compute1_expected_placement_view["allocations"][server_1vf["id"]] = { + "0000:81:00.0": {rc: 2}, + } + self.assert_placement_pci_view( + "compute1", **compute1_expected_placement_view) + # prove that the placement view got corrupted only after the claim + # on the compute due the allocation healing logic + last_healing = self.pci_healing_fixture.last_healing("compute1") + alloc_before = last_healing[0] + alloc_after = last_healing[1] + + def alloc_by_rc(allocations, rc): + return [ + alloc["resources"][rc] + for alloc in allocations.values() + if rc in alloc["resources"] + ] + + pci_alloc_before = alloc_by_rc( + alloc_before[server_1vf["id"]]["allocations"], rc) + self.assertEqual(1, sum(pci_alloc_before)) + + pci_alloc_after = alloc_by_rc( + alloc_after[server_1vf["id"]]["allocations"], rc) + self.assertEqual(2, sum(pci_alloc_after)) + + # This is the expected result after the bug is fixed + # self.assertPCIDeviceCounts('compute1', total=2, free=1) + # compute1_expected_placement_view = copy.deepcopy(compute1_empty) + # compute1_expected_placement_view["usages"] = { + # "0000:81:00.0": {rc: 1} + # } + # compute1_expected_placement_view["allocations"][server_1vf["id"]] = { + # "0000:81:00.0": {rc: 1}, + # } + # self.assert_placement_pci_view( + # "compute1", **compute1_expected_placement_view) + # self.assert_no_pci_healing("compute1") From 7f1b13ea40bec66af0707cab227fc14c55cdd1b3 Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: Tue, 18 Mar 2025 17:42:07 +0000 Subject: [PATCH 4/7] wrap wsgi_app.init_application with latch_error_on_raise This change adds a latch_error_on_raise decorator which is applied to the init_applciation function in our common wsgi_app module. This decorator will catch all non retryable exceptions and cause future invocations of the function to always return that same exception forever. a reset function is also added to the decorated function which should be called in our bases test class to prevent cross test interactons. Closes-Bug: #2103811 Related-Bug: #1882094 Change-Id: I44b1f7e2acc36a5b557d6d8788f6099f52bbdfb8 (cherry picked from commit 8dcbbe43e71b1528777fd5a905ad340e916d94ac) --- nova/api/openstack/wsgi_app.py | 2 + nova/test.py | 1 + .../tests/unit/api/openstack/test_wsgi_app.py | 22 ++++++++ nova/tests/unit/test_utils.py | 55 +++++++++++++++++++ nova/utils.py | 39 +++++++++++++ ...latch-error-on-raise-cf2da71a12b5f55f.yaml | 9 +++ 6 files changed, 128 insertions(+) create mode 100644 releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml diff --git a/nova/api/openstack/wsgi_app.py b/nova/api/openstack/wsgi_app.py index 6a2b72a6111..d6f1030f835 100644 --- a/nova/api/openstack/wsgi_app.py +++ b/nova/api/openstack/wsgi_app.py @@ -15,6 +15,7 @@ import sys from oslo_config import cfg +from oslo_db import exception as odbe from oslo_log import log as logging from oslo_reports import guru_meditation_report as gmr from oslo_reports import opts as gmr_opts @@ -116,6 +117,7 @@ def init_global_data(conf_files, service_name): logging.DEBUG) +@utils.latch_error_on_raise(retryable=(odbe.DBConnectionError,)) def init_application(name): conf_files = _get_config_files() diff --git a/nova/test.py b/nova/test.py index 0ddd928904c..e084f5c14f7 100644 --- a/nova/test.py +++ b/nova/test.py @@ -304,6 +304,7 @@ def setUp(self): # make sure that the wsgi app is fully initialized for all testcase # instead of only once initialized for test worker wsgi_app.init_global_data.reset() + wsgi_app.init_application.reset() # Reset the placement client singleton report.PLACEMENTCLIENT = None diff --git a/nova/tests/unit/api/openstack/test_wsgi_app.py b/nova/tests/unit/api/openstack/test_wsgi_app.py index 0eb7011c116..7fa1eacc623 100644 --- a/nova/tests/unit/api/openstack/test_wsgi_app.py +++ b/nova/tests/unit/api/openstack/test_wsgi_app.py @@ -82,6 +82,8 @@ def test_init_application_called_twice( # raised during it. self.assertRaises(test.TestingException, wsgi_app.init_application, 'nova-api') + # reset the latch_error_on_raise decorator + wsgi_app.init_application.reset() # Now run init_application a second time, it should succeed since no # exception is being raised (the init of global data should not be # re-attempted). @@ -89,6 +91,26 @@ def test_init_application_called_twice( self.assertIn('Global data already initialized, not re-initializing.', self.stdlog.logger.output) + @mock.patch( + 'sys.argv', new=mock.MagicMock(return_value=mock.sentinel.argv)) + @mock.patch('nova.api.openstack.wsgi_app._get_config_files') + def test_init_application_called_unrecoverable(self, mock_get_files): + """Test that init_application can tolerate being called more than once + in a single python interpreter instance and raises the same exception + forever if its unrecoverable. + """ + error = ValueError("unrecoverable config error") + excepted_type = type(error) + mock_get_files.side_effect = [ + error, test.TestingException, test.TestingException] + for i in range(3): + e = self.assertRaises( + excepted_type, wsgi_app.init_application, 'nova-api') + self.assertIs(e, error) + # since the expction is latched on the first raise mock_get_files + # should not be called again on each iteration + mock_get_files.assert_called_once() + @mock.patch('nova.objects.Service.get_by_host_and_binary') @mock.patch('nova.utils.raise_if_old_compute') def test_setup_service_version_workaround(self, mock_check_old, mock_get): diff --git a/nova/tests/unit/test_utils.py b/nova/tests/unit/test_utils.py index 1ee8ba93658..b34136b0522 100644 --- a/nova/tests/unit/test_utils.py +++ b/nova/tests/unit/test_utils.py @@ -1398,3 +1398,58 @@ def f(): self.assertRaises(ValueError, f.reset) self.assertFalse(f.called) mock_clean.assert_called_once_with() + + +class LatchErrorOnRaiseTests(test.NoDBTestCase): + + error = test.TestingException() + unrecoverable = ValueError('some error') + + @utils.latch_error_on_raise(retryable=(test.TestingException,)) + def dummy_test_func(self, error=None): + if error: + raise error + return True + + def setUp(self): + super().setUp() + self.dummy_test_func.reset() + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_success(self, fake_logger): + self.assertTrue(self.dummy_test_func()) + fake_logger.assert_not_called() + self.assertIsNone(self.dummy_test_func.error) + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_raises_recoverable(self, fake_logger): + expected = LatchErrorOnRaiseTests.error + e = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, e) + # we just leave recoverable exception flow though the decorator + # without catching them so the logger should not be called by the + # decorator + fake_logger.assert_not_called() + self.assertIsNone(self.dummy_test_func.error) + self.assertTrue(self.dummy_test_func()) + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_raises_unrecoverable(self, fake_logger): + expected = LatchErrorOnRaiseTests.unrecoverable + e = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, e) + fake_logger.assert_called_once_with(expected) + self.assertIsNotNone(self.dummy_test_func.error) + self.assertIs(self.dummy_test_func.error, expected) + + @mock.patch.object(utils.LOG, 'exception', new=mock.MagicMock()) + def test_wrapped_raises_forever(self): + expected = LatchErrorOnRaiseTests.unrecoverable + first = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, first) + second = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(first, second) diff --git a/nova/utils.py b/nova/utils.py index b6b37853311..a3b2353c5e0 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -1194,3 +1194,42 @@ def reset(wrapper, *args, **kwargs): wrapper.reset = functools.partial(reset, wrapper) return wrapper return outer_wrapper + + +class _SentinelException(Exception): + """This type exists to act as a placeholder and will never be raised""" + + +def latch_error_on_raise(retryable=(_SentinelException,)): + """This is a utility decorator to ensure if a function ever raises + it will always raise the same exception going forward. + + The only exception we know is safe to ignore is an oslo db connection + error as the db may be temporarily unavailable and we should allow + mod_wsgi to retry + """ + + def outer_wrapper(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if wrapper.error: + raise wrapper.error + try: + return func(*args, **kwargs) + except retryable: + # reraise any retryable exception to allow them to be handled + # by the caller. + raise + except Exception as e: + wrapper.error = e + LOG.exception(e) + raise + + wrapper.error = None + + def reset(wrapper): + wrapper.error = None + + wrapper.reset = functools.partial(reset, wrapper) + return wrapper + return outer_wrapper diff --git a/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml new file mode 100644 index 00000000000..3c364b8b99a --- /dev/null +++ b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The nova (metadata)api wsgi application will now detect fatal errors + (configuration, et al) on startup and lock into a permanent error state + until fixed and restarted. This solves a problem with some wsgi runtimes + ignoring initialization errors and continuing to send requests to the + half-initialized service. See https://bugs.launchpad.net/nova/+bug/2103811 + for more details. From 3242cde34232d0c65629f8c6866d0f2ddef7cda3 Mon Sep 17 00:00:00 2001 From: Doug Goldstein Date: Mon, 17 Feb 2025 17:19:16 -0600 Subject: [PATCH 5/7] ironic: fix logging of validation errors When validation of the node fails, since switching to the SDK the address of the ValidationResult object is displayed instead of the actual message. This has been broken since patch Ibb5b168ee0944463b996e96f033bd3dfb498e304. Closes-Bug: 2100009 Change-Id: I8fbdaadd125ece6a3050b2fbb772a7bd5d7e5304 Signed-off-by: Doug Goldstein (cherry picked from commit 37888e875f0cef8350991b6a9cb81da9dacc1bac) --- nova/tests/unit/virt/ironic/test_driver.py | 11 +++++++---- nova/virt/ironic/driver.py | 12 +++++++++--- ...ronic-validate-node-message-6a8b1eedbddd06fd.yaml | 7 +++++++ 3 files changed, 23 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index 137c53d1fed..e18aa4558d4 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -1582,15 +1582,18 @@ def test_spawn_node_driver_validation_fail(self, mock_avti, self.mock_conn.validate_node.return_value = \ ironic_utils.get_test_validation( - power=_node.ValidationResult(result=False, reason=None), + power=_node.ValidationResult(result=False, reason='OVERVOLT'), deploy=_node.ValidationResult(result=False, reason=None), - storage=_node.ValidationResult(result=False, reason=None), + storage=_node.ValidationResult(result=True, reason=None), ) self.mock_conn.get_node.return_value = node image_meta = ironic_utils.get_test_image_meta() - self.assertRaises(exception.ValidationError, self.driver.spawn, - self.ctx, instance, image_meta, [], None, {}) + msgre = '.*deploy: None, power: OVERVOLT, storage: No Error.*' + + self.assertRaisesRegex(exception.ValidationError, msgre, + self.driver.spawn, self.ctx, instance, image_meta, + [], None, {}) self.mock_conn.get_node.assert_called_once_with( node_id, fields=ironic_driver._NODE_FIELDS) mock_avti.assert_called_once_with(self.ctx, instance, None) diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index 3e58cf93a34..6da6c457ee5 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -1213,14 +1213,20 @@ def spawn(self, context, instance, image_meta, injected_files, ): # something is wrong. undo what we have done self._cleanup_deploy(node, instance, network_info) + deploy_msg = ("No Error" if validate_chk['deploy'].result + else validate_chk['deploy'].reason) + power_msg = ("No Error" if validate_chk['power'].result + else validate_chk['power'].reason) + storage_msg = ("No Error" if validate_chk['storage'].result + else validate_chk['storage'].reason) raise exception.ValidationError(_( "Ironic node: %(id)s failed to validate. " "(deploy: %(deploy)s, power: %(power)s, " "storage: %(storage)s)") % {'id': node.id, - 'deploy': validate_chk['deploy'], - 'power': validate_chk['power'], - 'storage': validate_chk['storage']}) + 'deploy': deploy_msg, + 'power': power_msg, + 'storage': storage_msg}) # Config drive configdrive_value = None diff --git a/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml b/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml new file mode 100644 index 00000000000..2ac2e894011 --- /dev/null +++ b/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fix displaying the reason messages from the Ironic validate node operation that + is called just before the instance is deployed on the bare metal node. The + message from Ironic is now correctly logged. + Fixes `bug 2100009 _`. From 957d97ea30c52292e4105dbcb2ed10a509521ac5 Mon Sep 17 00:00:00 2001 From: Masahito Muroi Date: Tue, 21 Jan 2025 10:03:24 +0900 Subject: [PATCH 6/7] Use dict object for request_specs_dict in the _list_view The request_specs_dict in the _list_view is initialized as a defaultdict object in order to return empty string as default. But the request_spec_dict is replaced with a normal dict object in the v2.96 microversion, then if server list and RequestSpec missmatch happens by any reason, the List Server API and the List Server Detail API hit 500 Internal server error because of key error. This commit updates the req_spec_dict to use normal dict object, then it returns sentinel object if there is no appropriate request_spec object. Closes-Bug: #2095364 Change-Id: If282b8709954f276cb5d48114437809d771a9958 (cherry picked from commit 509820f156e339e825f799d84b06cb11ac6b6096) --- api-ref/source/parameters.yaml | 1 + nova/api/openstack/compute/views/servers.py | 23 +++--- .../api/openstack/compute/test_servers.py | 70 ++++++++++++++++++- .../notes/bug-2095364-ffbf67c0ae3f53b5.yaml | 15 ++++ 4 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 9a530cffbe7..f644c84f9ea 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -5721,6 +5721,7 @@ pinned_availability_zone: Also when default_schedule_zone config option set to specific AZ, in that case, instance would be pinned to that specific AZ, and instance will be scheduled on host belonging to pinned AZ. + In case of no pinned availability zone, this value is set to `null`. in: body type: string min_version: 2.96 diff --git a/nova/api/openstack/compute/views/servers.py b/nova/api/openstack/compute/views/servers.py index 8452d6bb102..023669406eb 100644 --- a/nova/api/openstack/compute/views/servers.py +++ b/nova/api/openstack/compute/views/servers.py @@ -14,8 +14,6 @@ # License for the specific language governing permissions and limitations # under the License. -import collections - from oslo_log import log as logging from oslo_serialization import jsonutils @@ -41,6 +39,7 @@ LOG = logging.getLogger(__name__) +AZ_NOT_IN_REQUEST_SPEC = object() SCHED_HINTS_NOT_IN_REQUEST_SPEC = object() @@ -225,16 +224,20 @@ def _get_host_status_unknown_only(context, instance=None): return unknown_only def _get_pinned_az(self, context, instance, provided_az): - pinned_az = '' - if provided_az is not None: + if provided_az is AZ_NOT_IN_REQUEST_SPEC: + # Case the provided_az is pre fetched, but not specified + pinned_az = None + elif provided_az is not None: + # Case the provided_az is pre fetched, and specified pinned_az = provided_az else: + # Case the provided_az is not pre fethed. try: req_spec = objects.RequestSpec.get_by_instance_uuid( context, instance.uuid) pinned_az = req_spec.availability_zone except exception.RequestSpecNotFound: - pinned_az = '' + pinned_az = None return pinned_az def _get_scheduler_hints(self, context, instance, provided_sched_hints): @@ -543,15 +546,16 @@ def _list_view(self, func, request, servers, coll_name, show_extra_specs, :returns: Server data in dictionary format """ req_specs = None - req_specs_dict = collections.defaultdict(str) + req_specs_dict = {} sched_hints_dict = {} if api_version_request.is_supported(request, min_version='2.96'): context = request.environ['nova.context'] instance_uuids = [s.uuid for s in servers] req_specs = objects.RequestSpec.get_by_instance_uuids( context, instance_uuids) - req_specs_dict = {req.instance_uuid: req.availability_zone - for req in req_specs} + req_specs_dict.update({req.instance_uuid: req.availability_zone + for req in req_specs + if req.availability_zone is not None}) if api_version_request.is_supported(request, min_version='2.100'): sched_hints_dict.update({ req.instance_uuid: req.scheduler_hints @@ -565,7 +569,8 @@ def _list_view(self, func, request, servers, coll_name, show_extra_specs, show_host_status=show_host_status, show_sec_grp=show_sec_grp, bdms=bdms, cell_down_support=cell_down_support, - provided_az=req_specs_dict[server.uuid], + provided_az=req_specs_dict.get( + server.uuid, AZ_NOT_IN_REQUEST_SPEC), provided_sched_hints=sched_hints_dict.get( server.uuid, SCHED_HINTS_NOT_IN_REQUEST_SPEC) )["server"] diff --git a/nova/tests/unit/api/openstack/compute/test_servers.py b/nova/tests/unit/api/openstack/compute/test_servers.py index cf165fde250..745bc6f891b 100644 --- a/nova/tests/unit/api/openstack/compute/test_servers.py +++ b/nova/tests/unit/api/openstack/compute/test_servers.py @@ -8466,8 +8466,74 @@ def test_list_view_with_missing_request_specs(self, m_rs): availability_zone=self.instance.availability_zone)] req = self.req('/%s/servers' % self.project_id) - self.assertRaises(KeyError, self.view_builder.index, - req, self.instances, False) + output = self.view_builder.index(req, self.instances, False) + + self.assertEqual(2, len(output['servers'])) + + @mock.patch('nova.objects.RequestSpec.get_by_instance_uuids') + def test_list_detail_view_with_missing_request_specs(self, m_rs): + + self.instances = [ + self.instance, + self.create_instance(2, uuids.fake1, 'fake-server'), + self.create_instance(3, uuids.fake2, 'fake-server2') + ] + # First instance's request spec has pinned availability zone + # Second instance's request spec has no pinned availability zone + m_rs.return_value = [ + objects.RequestSpec( + instance_uuid=self.instance.uuid, + availability_zone=self.instance.availability_zone), + objects.RequestSpec( + instance_uuid=self.instance.uuid, + availability_zone=None) + ] + + req = self.req('/%s/servers/detail' % self.project_id) + output = self.view_builder.detail(req, self.instances, False) + + self.assertEqual(3, len(output['servers'])) + # first instance has pinned az + self.assertEqual('nova', + output['servers'][0]['pinned_availability_zone']) + # second or later has no pinned az + for s in output['servers'][1:]: + self.assertIsNone(s['pinned_availability_zone']) + + @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid') + def test_show_view_with_missing_request_specs(self, m_rs): + + self.instances = [ + self.instance, + self.create_instance(2, uuids.fake1, 'fake-server'), + self.create_instance(3, uuids.fake2, 'fake-server2') + ] + # First instance's request spec has pinned availability zone + # Second instance's request spec has no pinned availability zone + m_rs.side_effect = [ + objects.RequestSpec( + instance_uuid=self.instance.uuid, + availability_zone=self.instance.availability_zone), + objects.RequestSpec( + instance_uuid=self.instance.uuid, + availability_zone=None), + exception.RequestSpecNotFound(instance_uuid='3') + ] + + # Instance show with request spec and pinned az + req = self.req('/%s/servers/1' % self.project_id) + output = self.view_builder.show(req, self.instances[0]) + self.assertEqual('nova', output['server']['pinned_availability_zone']) + + # Instance show with request spec and no pinned az + req = self.req('/%s/servers/2' % self.project_id) + output = self.view_builder.show(req, self.instances[1]) + self.assertIsNone(output['server']['pinned_availability_zone']) + + # Instance show without request spec + req = self.req('/%s/servers/3' % self.project_id) + output = self.view_builder.show(req, self.instances[2]) + self.assertIsNone(output['server']['pinned_availability_zone']) class ServersViewBuilderTestV2100(_ServersViewBuilderTest): diff --git a/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml b/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml new file mode 100644 index 00000000000..672863666d5 --- /dev/null +++ b/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml @@ -0,0 +1,15 @@ +--- +fixes: + - | + `Bug #2095364`_: Fixed the List Server API and the List Server Detail API + 500 Internal Server Error issue in v2.96 or later API microversion if + one or more instance has no request spec object. One usecase was when cloud + user tried to create instance which exceeded their quota, the request does + not create instance request spec. Once the no request spec instance is + created in cloud user project, the server list API and the list server + details API return 500 Internal Server Error for the project until the + cloud user deletes the no request spec object instance. + After this fix, the v2.96 or later returns `null` at the + `pinned_availability_zone` value if not specified. + + .. _Bug #2095364: https://launchpad.net/bugs/2095364 From 78bbd3cdfc157e31a491a77615cd6fa2af1a6b4e Mon Sep 17 00:00:00 2001 From: Daniel Niasoff <4070285+dniasoff@users.noreply.github.com> Date: Sun, 18 May 2025 12:56:16 +0000 Subject: [PATCH 7/7] qumulus initial --- .github/workflows/build.yaml | 201 +++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 .github/workflows/build.yaml diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000000..dfd24c8d07e --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,201 @@ +name: 'Build and Push Kolla Images' + +on: + push: + branches: + - main + - staging + - qa + - dev + - experimental + + workflow_dispatch: + inputs: + branch: + type: choice + description: 'Branch to build' + options: + - main + - staging + - qa + - dev + - experimental + +permissions: + contents: read + id-token: write + +concurrency: + group: '${{ github.workflow }} @ ${{ github.repository }} @ ${{ github.event.inputs.branch || github.base_ref || github.ref_name }}' + cancel-in-progress: true + +jobs: + kolla_build: + runs-on: + group: prod + + steps: + + - uses: QumulusTechnology/vault-setup-action@v2 + with: + aws_account_data: ${{ secrets.AWS_ACCOUNT_DATA }} + vault_addr: ${{ secrets.VAULT_ADDR }} + platform: qcp + secrets: | + secret/data/qcp/global/harbor/prod/github-user username | REPO_USERNAME ; + secret/data/qcp/global/harbor/prod/github-user password | REPO_PASSWORD ; + secret/data/qcp/global/harbor/prod/github-user address | REPO_ADDRESS + + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ env.BRANCH }} + + - name: Set Release version + run: | + + if [ "$BRANCH" == "main" ]; then + echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV + echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV + elif [ "$BRANCH" == "staging" ]; then + echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV + echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV + elif [ "$BRANCH" == "qa" ]; then + echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV + echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV + elif [ "$BRANCH" == "experimental" ]; then + echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV + echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV + else + echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV + echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV + fi + + + if [ "$BRANCH" == "main" ]; then + TAG=main + elif [ "$BRANCH" == "staging" ]; then + TAG=staging + elif [ "$BRANCH" == "qa" ]; then + TAG=qa + elif [ "$BRANCH" == "dev" ]; then + TAG=dev + elif [ "$BRANCH" == "experimental" ]; then + TAG=experimental + else + TAG=dev_${BRANCH} + fi + + echo "TAG=$TAG" >> $GITHUB_ENV + + - name: Set GITHUB Environment Variables + run: | + echo "GITHUB_ACTIONS_BRANCH=${{ github.base_ref || github.ref_name }}" >> $GITHUB_ENV + echo "GITHUB_ACTIONS_WORKFLOW_ID=${{ github.run_id }}" >> $GITHUB_ENV + echo "GITHUB_ACTIONS_WORKFLOW_ATEMPT=${{ github.run_attempt }}" >> $GITHUB_ENV + echo "GITHUB_ACTIONS_WORKFLOW_RUN_NUMBER=${{ github.run_number }}" >> $GITHUB_ENV + echo "GITHUB_ACTIONS_AUTHOR=${{ github.actor }}" >> $GITHUB_ENV + + - name: Login to Harbor Hub + uses: docker/login-action@v2 + with: + registry: ${{ env.REPO_ADDRESS }} + username: ${{ env.REPO_USERNAME }} + password: ${{ env.REPO_PASSWORD }} + + - name: Install Kolla + run: | + sudo apt update + sudo apt install -y bash python3 python3-pip git python3-dev python3-docker libffi-dev gcc libssl-dev python3-venv + python3 -m venv kolla + source kolla/bin/activate + pip install -U pip + pip install docker setuptools + pip install git+https://github.com/QumulusTechnology/kolla@${BRANCH} + + - name: Create kolla-build.conf + run: | + CWD="$(pwd)" + sudo mkdir -p /etc/kolla + sudo bash -c "cat << EOF > /etc/kolla/kolla-build.conf + + [DEFAULT] + base = ubuntu + namespace = kolla + base_tag = ${BASE_OS_TAG} + openstack_release = ${OPENSTACK_VERSION} + registry = ${REPO_ADDRESS}/qcp-${BRANCH} + push = true + skip_existing = false + threads = 16 + push_threads = 4 + install_type = source + tag = latest + template_override = /etc/kolla/template-overrides.j2 + docker_healthchecks = true + network_mode = host + + [${REPOSITORY}-base] + type = local + location = ${CWD} + EOF" + + + - name: Create template-overrides.j2 + run: | + sudo bash -c "cat << EOF > /etc/kolla/template-overrides.j2 + {% extends parent_template %} + + {% block base_ubuntu_package_sources_list %} + {% endblock %} + + {% block openstack_base_footer %} + RUN pip install jaeger-client + {% endblock %} + + {% block ${REPOSITORY}_base_footer %} + {% endblock %} + + {% block nova_compute_header %} + RUN apt clean && \ + apt update && \ + apt install -y swtpm swtpm-tools && \ + groupadd tss && \ + useradd -g tss tss && \ + mkdir /home/tss && \ + chown -R tss:tss /home/tss &&\ + chown -R tss:tss /var/lib/swtpm-localca + {% endblock %} + + {% block nova_compute_footer %} + RUN pip install git+https://github.com/openstack/oslo.utils.git@stable/${OPENSTACK_VERSION} + {% endblock %} + + {% block nova_libvirt_footer %} + RUN apt clean && \ + apt update && \ + apt install -y swtpm swtpm-tools && \ + mkdir /home/tss && \ + chown -R tss:tss /home/tss && \ + chown -R tss:tss /var/lib/swtpm-localca + + USER tss + RUN /usr/share/swtpm/swtpm-create-user-config-files + USER root + {% endblock %} + + EOF" + + - name: Build docker images + run: | + source kolla/bin/activate + kolla-build ${REPOSITORY} + + - name: Tag and push docker images + run: | + timestamp=$(date +%Y%m%d%H%M%S) + for i in $(docker images --format '{{.Repository}}' | grep "/qcp-${BRANCH}/"); do + image=${i##*/} + docker tag ${i}:latest ${REPO_ADDRESS}/qcp-${BRANCH}/kolla/${image}:${timestamp} + docker image push --all-tags ${i} + done