From 8010bdca09ddaee9095bda5444421bbf71f4c29b Mon Sep 17 00:00:00 2001
From: OpenStack Release Bot <infra-root@openstack.org>
Date: Tue, 18 Mar 2025 16:27:22 +0000
Subject: [PATCH 1/7] Update .gitreview for stable/2025.1

Change-Id: I8a9fbcac2aa4090b913f8b2e7953fd8397435433
---
 .gitreview | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitreview b/.gitreview
index c2b7eef7078..bc50fb9256a 100644
--- a/.gitreview
+++ b/.gitreview
@@ -2,3 +2,4 @@
 host=review.opendev.org
 port=29418
 project=openstack/nova.git
+defaultbranch=stable/2025.1

From 6422ebea0b152a2b34abdacf4453c70bf0ee37cf Mon Sep 17 00:00:00 2001
From: OpenStack Release Bot <infra-root@openstack.org>
Date: Tue, 18 Mar 2025 16:27:26 +0000
Subject: [PATCH 2/7] Update TOX_CONSTRAINTS_FILE for stable/2025.1

Update the URL to the upper-constraints file to point to the redirect
rule on releases.openstack.org so that anyone working on this branch
will switch to the correct upper-constraints list automatically when
the requirements repository branches.

Until the requirements repository has as stable/2025.1 branch, tests will
continue to use the upper-constraints list on master.

Change-Id: I9829aa7887fc67a7693c134ef5fe302207e1a58a
---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index d402501920f..9026f14ab1d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@ allowlist_externals =
   rm
   env
   make
-install_command = python -I -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} {opts} {packages}
+install_command = python -I -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2025.1} {opts} {packages}
 setenv =
   VIRTUAL_ENV={envdir}
   LANGUAGE=en_US

From 91f6f7e30e4b5615322a4218fbb368b461274ee4 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <gibi@redhat.com>
Date: Fri, 14 Feb 2025 16:20:08 +0100
Subject: [PATCH 3/7] Reproduce bug/2098496

Related-Bug: #2098496
Change-Id: I9a3091662fb5d1d0a41dbeff56d9680a748fe312
(cherry picked from commit 32afd0c644f1b9e378d113fba11bb189682e1740)
---
 .../regressions/test_bug_2098496.py           | 181 ++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100644 nova/tests/functional/regressions/test_bug_2098496.py

diff --git a/nova/tests/functional/regressions/test_bug_2098496.py b/nova/tests/functional/regressions/test_bug_2098496.py
new file mode 100644
index 00000000000..731d9f441a4
--- /dev/null
+++ b/nova/tests/functional/regressions/test_bug_2098496.py
@@ -0,0 +1,181 @@
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import copy
+
+from nova import objects
+from nova.tests.fixtures import libvirt as fakelibvirt
+from nova.tests.functional.libvirt import test_pci_in_placement as base
+
+
+class PCIInPlacementCreateAfterDeleteTestCase(base.PlacementPCIReportingTests):
+    def setUp(self):
+        super().setUp()
+        self.flags(group='filter_scheduler', pci_in_placement=True)
+
+    def test_create_after_delete(self):
+        device_spec = self._to_list_of_json_str(
+            [
+                {
+                    "vendor_id": fakelibvirt.PCI_VEND_ID,
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                },
+            ]
+        )
+        pci_alias = {
+            "name": "a-vf",
+            "product_id": fakelibvirt.VF_PROD_ID,
+            "vendor_id": fakelibvirt.PCI_VEND_ID,
+        }
+        self._test_create_second_vm_after_first_is_deleted(
+            device_spec, pci_alias, self.VF_RC, traits=[])
+
+    def test_create_after_delete_with_rc_and_traits(self):
+        device_spec = self._to_list_of_json_str(
+            [
+                {
+                    "vendor_id": fakelibvirt.PCI_VEND_ID,
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                    "resource_class": "vf",
+                    "traits": "blue,big",
+                },
+            ]
+        )
+        pci_alias = {
+            "name": "a-vf",
+            "resource_class": "vf",
+            "traits": "big",
+        }
+        self._test_create_second_vm_after_first_is_deleted(
+            device_spec, pci_alias,
+            "CUSTOM_VF", traits=["CUSTOM_BLUE", "CUSTOM_BIG"])
+
+    def _test_create_second_vm_after_first_is_deleted(
+        self, device_spec, pci_alias, rc, traits
+    ):
+        # The fake libvirt will emulate on the host:
+        # * one PF with 2 VFs
+        pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=0, num_pfs=1, num_vfs=2)
+        # make the VFs available
+        self.flags(group='pci', device_spec=device_spec)
+        self.start_compute(hostname="compute1", pci_info=pci_info)
+
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {rc: 2},
+            },
+            "traits": {"0000:81:00.0": traits,
+            },
+            "usages": {
+                "0000:81:00.0": {rc: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assertPCIDeviceCounts('compute1', total=2, free=2)
+        # assert that the two VFs from the same PF are in the same pool
+        pools = objects.ComputeNode.get_first_node_by_host_for_old_compat(
+            self.ctxt, "compute1").pci_device_pools
+        self.assertEqual(len(pools), 1)
+        self.assertEqual(pools[0].count, 2)
+        compute1_empty = copy.deepcopy(compute1_expected_placement_view)
+
+        self.flags(
+            group="pci",
+            alias=self._to_list_of_json_str([pci_alias]),
+        )
+        extra_spec = {"pci_passthrough:alias": "a-vf:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server_1vf = self._create_server(flavor_id=flavor_id, networks=[])
+
+        self.assertPCIDeviceCounts('compute1', total=2, free=1)
+        compute1_expected_placement_view["usages"] = {
+            "0000:81:00.0": {rc: 1}
+        }
+        compute1_expected_placement_view["allocations"][server_1vf["id"]] = {
+            "0000:81:00.0": {rc: 1},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self.assert_no_pci_healing("compute1")
+
+        self._delete_server(server_1vf)
+        self.assertPCIDeviceCounts('compute1', total=2, free=2)
+        self.assert_placement_pci_view("compute1", **compute1_empty)
+
+        # This is already shows the potential issue behind bug/2098496.
+        # After the server is deleted we are not back to a single pool of 2
+        # VFs but instead two separate pools with one device each but both
+        # related to the same rp_uuid. This breaks an assumption of the pool
+        # allocation logic later.
+        pools = objects.ComputeNode.get_first_node_by_host_for_old_compat(
+            self.ctxt, "compute1").pci_device_pools
+        self.assertEqual(len(pools), 2)
+        self.assertEqual(pools[0].count, 1)
+        self.assertEqual(pools[1].count, 1)
+        self.assertEqual(pools[0].tags['rp_uuid'], pools[1].tags['rp_uuid'])
+
+        # This is the expected state after the bug is fixed
+        # assert that the single pool is not broken into two during the
+        # de-allocation
+        # pools = objects.ComputeNode.get_first_node_by_host_for_old_compat(
+        #     self.ctxt, "compute1").pci_device_pools
+        # self.assertEqual(len(pools), 1)
+        # self.assertEqual(pools[0].count, 2)
+
+        server_1vf = self._create_server(flavor_id=flavor_id, networks=[])
+        # This is bug/2098496 as the VM now consumes 2 VFs instead of one it
+        # requested
+        self.assertPCIDeviceCounts('compute1', total=2, free=0)
+        compute1_expected_placement_view = copy.deepcopy(compute1_empty)
+        compute1_expected_placement_view["usages"] = {
+            "0000:81:00.0": {rc: 2}
+        }
+        compute1_expected_placement_view["allocations"][server_1vf["id"]] = {
+            "0000:81:00.0": {rc: 2},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        # prove that the placement view got corrupted only after the claim
+        # on the compute due the allocation healing logic
+        last_healing = self.pci_healing_fixture.last_healing("compute1")
+        alloc_before = last_healing[0]
+        alloc_after = last_healing[1]
+
+        def alloc_by_rc(allocations, rc):
+            return [
+                alloc["resources"][rc]
+                for alloc in allocations.values()
+                if rc in alloc["resources"]
+            ]
+
+        pci_alloc_before = alloc_by_rc(
+            alloc_before[server_1vf["id"]]["allocations"], rc)
+        self.assertEqual(1, sum(pci_alloc_before))
+
+        pci_alloc_after = alloc_by_rc(
+            alloc_after[server_1vf["id"]]["allocations"], rc)
+        self.assertEqual(2, sum(pci_alloc_after))
+
+        # This is the expected result after the bug is fixed
+        # self.assertPCIDeviceCounts('compute1', total=2, free=1)
+        # compute1_expected_placement_view = copy.deepcopy(compute1_empty)
+        # compute1_expected_placement_view["usages"] = {
+        #     "0000:81:00.0": {rc: 1}
+        # }
+        # compute1_expected_placement_view["allocations"][server_1vf["id"]] = {
+        #     "0000:81:00.0": {rc: 1},
+        # }
+        # self.assert_placement_pci_view(
+        #     "compute1", **compute1_expected_placement_view)
+        # self.assert_no_pci_healing("compute1")

From 7f1b13ea40bec66af0707cab227fc14c55cdd1b3 Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Tue, 18 Mar 2025 17:42:07 +0000
Subject: [PATCH 4/7] wrap wsgi_app.init_application with latch_error_on_raise

This change adds a latch_error_on_raise decorator which
is applied to the init_applciation function in our
common wsgi_app module.

This decorator will catch all non retryable exceptions
and cause future invocations of the function to always
return that same exception forever.

a reset function is also added to the decorated function
which should be called in our bases test class to
prevent cross test interactons.

Closes-Bug: #2103811
Related-Bug: #1882094
Change-Id: I44b1f7e2acc36a5b557d6d8788f6099f52bbdfb8
(cherry picked from commit 8dcbbe43e71b1528777fd5a905ad340e916d94ac)
---
 nova/api/openstack/wsgi_app.py                |  2 +
 nova/test.py                                  |  1 +
 .../tests/unit/api/openstack/test_wsgi_app.py | 22 ++++++++
 nova/tests/unit/test_utils.py                 | 55 +++++++++++++++++++
 nova/utils.py                                 | 39 +++++++++++++
 ...latch-error-on-raise-cf2da71a12b5f55f.yaml |  9 +++
 6 files changed, 128 insertions(+)
 create mode 100644 releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml

diff --git a/nova/api/openstack/wsgi_app.py b/nova/api/openstack/wsgi_app.py
index 6a2b72a6111..d6f1030f835 100644
--- a/nova/api/openstack/wsgi_app.py
+++ b/nova/api/openstack/wsgi_app.py
@@ -15,6 +15,7 @@
 import sys
 
 from oslo_config import cfg
+from oslo_db import exception as odbe
 from oslo_log import log as logging
 from oslo_reports import guru_meditation_report as gmr
 from oslo_reports import opts as gmr_opts
@@ -116,6 +117,7 @@ def init_global_data(conf_files, service_name):
             logging.DEBUG)
 
 
+@utils.latch_error_on_raise(retryable=(odbe.DBConnectionError,))
 def init_application(name):
     conf_files = _get_config_files()
 
diff --git a/nova/test.py b/nova/test.py
index 0ddd928904c..e084f5c14f7 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -304,6 +304,7 @@ def setUp(self):
         # make sure that the wsgi app is fully initialized for all testcase
         # instead of only once initialized for test worker
         wsgi_app.init_global_data.reset()
+        wsgi_app.init_application.reset()
 
         # Reset the placement client singleton
         report.PLACEMENTCLIENT = None
diff --git a/nova/tests/unit/api/openstack/test_wsgi_app.py b/nova/tests/unit/api/openstack/test_wsgi_app.py
index 0eb7011c116..7fa1eacc623 100644
--- a/nova/tests/unit/api/openstack/test_wsgi_app.py
+++ b/nova/tests/unit/api/openstack/test_wsgi_app.py
@@ -82,6 +82,8 @@ def test_init_application_called_twice(
         # raised during it.
         self.assertRaises(test.TestingException, wsgi_app.init_application,
                           'nova-api')
+        # reset the latch_error_on_raise decorator
+        wsgi_app.init_application.reset()
         # Now run init_application a second time, it should succeed since no
         # exception is being raised (the init of global data should not be
         # re-attempted).
@@ -89,6 +91,26 @@ def test_init_application_called_twice(
         self.assertIn('Global data already initialized, not re-initializing.',
                       self.stdlog.logger.output)
 
+    @mock.patch(
+        'sys.argv', new=mock.MagicMock(return_value=mock.sentinel.argv))
+    @mock.patch('nova.api.openstack.wsgi_app._get_config_files')
+    def test_init_application_called_unrecoverable(self, mock_get_files):
+        """Test that init_application can tolerate being called more than once
+        in a single python interpreter instance and raises the same exception
+        forever if its unrecoverable.
+        """
+        error = ValueError("unrecoverable config error")
+        excepted_type = type(error)
+        mock_get_files.side_effect = [
+            error, test.TestingException, test.TestingException]
+        for i in range(3):
+            e = self.assertRaises(
+                excepted_type, wsgi_app.init_application, 'nova-api')
+            self.assertIs(e, error)
+        # since the expction is latched on the first raise mock_get_files
+        # should not be called again on each iteration
+        mock_get_files.assert_called_once()
+
     @mock.patch('nova.objects.Service.get_by_host_and_binary')
     @mock.patch('nova.utils.raise_if_old_compute')
     def test_setup_service_version_workaround(self, mock_check_old, mock_get):
diff --git a/nova/tests/unit/test_utils.py b/nova/tests/unit/test_utils.py
index 1ee8ba93658..b34136b0522 100644
--- a/nova/tests/unit/test_utils.py
+++ b/nova/tests/unit/test_utils.py
@@ -1398,3 +1398,58 @@ def f():
         self.assertRaises(ValueError, f.reset)
         self.assertFalse(f.called)
         mock_clean.assert_called_once_with()
+
+
+class LatchErrorOnRaiseTests(test.NoDBTestCase):
+
+    error = test.TestingException()
+    unrecoverable = ValueError('some error')
+
+    @utils.latch_error_on_raise(retryable=(test.TestingException,))
+    def dummy_test_func(self, error=None):
+        if error:
+            raise error
+        return True
+
+    def setUp(self):
+        super().setUp()
+        self.dummy_test_func.reset()
+
+    @mock.patch.object(utils.LOG, 'exception')
+    def test_wrapped_success(self, fake_logger):
+        self.assertTrue(self.dummy_test_func())
+        fake_logger.assert_not_called()
+        self.assertIsNone(self.dummy_test_func.error)
+
+    @mock.patch.object(utils.LOG, 'exception')
+    def test_wrapped_raises_recoverable(self, fake_logger):
+        expected = LatchErrorOnRaiseTests.error
+        e = self.assertRaises(
+            type(expected), self.dummy_test_func, error=expected)
+        self.assertIs(expected, e)
+        # we just leave recoverable exception flow though the decorator
+        # without catching them so the logger should not be called by the
+        # decorator
+        fake_logger.assert_not_called()
+        self.assertIsNone(self.dummy_test_func.error)
+        self.assertTrue(self.dummy_test_func())
+
+    @mock.patch.object(utils.LOG, 'exception')
+    def test_wrapped_raises_unrecoverable(self, fake_logger):
+        expected = LatchErrorOnRaiseTests.unrecoverable
+        e = self.assertRaises(
+            type(expected), self.dummy_test_func, error=expected)
+        self.assertIs(expected, e)
+        fake_logger.assert_called_once_with(expected)
+        self.assertIsNotNone(self.dummy_test_func.error)
+        self.assertIs(self.dummy_test_func.error, expected)
+
+    @mock.patch.object(utils.LOG, 'exception', new=mock.MagicMock())
+    def test_wrapped_raises_forever(self):
+        expected = LatchErrorOnRaiseTests.unrecoverable
+        first = self.assertRaises(
+            type(expected), self.dummy_test_func, error=expected)
+        self.assertIs(expected, first)
+        second = self.assertRaises(
+            type(expected), self.dummy_test_func, error=expected)
+        self.assertIs(first, second)
diff --git a/nova/utils.py b/nova/utils.py
index b6b37853311..a3b2353c5e0 100644
--- a/nova/utils.py
+++ b/nova/utils.py
@@ -1194,3 +1194,42 @@ def reset(wrapper, *args, **kwargs):
         wrapper.reset = functools.partial(reset, wrapper)
         return wrapper
     return outer_wrapper
+
+
+class _SentinelException(Exception):
+    """This type exists to act as a placeholder and will never be raised"""
+
+
+def latch_error_on_raise(retryable=(_SentinelException,)):
+    """This is a utility decorator to ensure if a function ever raises
+    it will always raise the same exception going forward.
+
+    The only exception we know is safe to ignore is an oslo db connection
+    error as the db may be temporarily unavailable and we should allow
+    mod_wsgi to retry
+    """
+
+    def outer_wrapper(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            if wrapper.error:
+                raise wrapper.error
+            try:
+                return func(*args, **kwargs)
+            except retryable:
+                # reraise any retryable exception to allow them to be handled
+                # by the caller.
+                raise
+            except Exception as e:
+                wrapper.error = e
+                LOG.exception(e)
+                raise
+
+        wrapper.error = None
+
+        def reset(wrapper):
+            wrapper.error = None
+
+        wrapper.reset = functools.partial(reset, wrapper)
+        return wrapper
+    return outer_wrapper
diff --git a/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml
new file mode 100644
index 00000000000..3c364b8b99a
--- /dev/null
+++ b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml
@@ -0,0 +1,9 @@
+---
+fixes:
+  - |
+    The nova (metadata)api wsgi application will now detect fatal errors
+    (configuration, et al) on startup and lock into a permanent error state
+    until fixed and restarted. This solves a problem with some wsgi runtimes
+    ignoring initialization errors and continuing to send requests to the
+    half-initialized service. See https://bugs.launchpad.net/nova/+bug/2103811
+    for more details.

From 3242cde34232d0c65629f8c6866d0f2ddef7cda3 Mon Sep 17 00:00:00 2001
From: Doug Goldstein <cardoe@cardoe.com>
Date: Mon, 17 Feb 2025 17:19:16 -0600
Subject: [PATCH 5/7] ironic: fix logging of validation errors

When validation of the node fails, since switching to the SDK the
address of the ValidationResult object is displayed instead of the
actual message. This has been broken since patch
Ibb5b168ee0944463b996e96f033bd3dfb498e304.

Closes-Bug: 2100009
Change-Id: I8fbdaadd125ece6a3050b2fbb772a7bd5d7e5304
Signed-off-by: Doug Goldstein <cardoe@cardoe.com>
(cherry picked from commit 37888e875f0cef8350991b6a9cb81da9dacc1bac)
---
 nova/tests/unit/virt/ironic/test_driver.py           | 11 +++++++----
 nova/virt/ironic/driver.py                           | 12 +++++++++---
 ...ronic-validate-node-message-6a8b1eedbddd06fd.yaml |  7 +++++++
 3 files changed, 23 insertions(+), 7 deletions(-)
 create mode 100644 releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml

diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py
index 137c53d1fed..e18aa4558d4 100644
--- a/nova/tests/unit/virt/ironic/test_driver.py
+++ b/nova/tests/unit/virt/ironic/test_driver.py
@@ -1582,15 +1582,18 @@ def test_spawn_node_driver_validation_fail(self, mock_avti,
 
         self.mock_conn.validate_node.return_value = \
             ironic_utils.get_test_validation(
-                power=_node.ValidationResult(result=False, reason=None),
+                power=_node.ValidationResult(result=False, reason='OVERVOLT'),
                 deploy=_node.ValidationResult(result=False, reason=None),
-                storage=_node.ValidationResult(result=False, reason=None),
+                storage=_node.ValidationResult(result=True, reason=None),
             )
         self.mock_conn.get_node.return_value = node
         image_meta = ironic_utils.get_test_image_meta()
 
-        self.assertRaises(exception.ValidationError, self.driver.spawn,
-                          self.ctx, instance, image_meta, [], None, {})
+        msgre = '.*deploy: None, power: OVERVOLT, storage: No Error.*'
+
+        self.assertRaisesRegex(exception.ValidationError, msgre,
+            self.driver.spawn, self.ctx, instance, image_meta,
+            [], None, {})
         self.mock_conn.get_node.assert_called_once_with(
             node_id, fields=ironic_driver._NODE_FIELDS)
         mock_avti.assert_called_once_with(self.ctx, instance, None)
diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py
index 3e58cf93a34..6da6c457ee5 100644
--- a/nova/virt/ironic/driver.py
+++ b/nova/virt/ironic/driver.py
@@ -1213,14 +1213,20 @@ def spawn(self, context, instance, image_meta, injected_files,
         ):
             # something is wrong. undo what we have done
             self._cleanup_deploy(node, instance, network_info)
+            deploy_msg = ("No Error" if validate_chk['deploy'].result
+                          else validate_chk['deploy'].reason)
+            power_msg = ("No Error" if validate_chk['power'].result
+                         else validate_chk['power'].reason)
+            storage_msg = ("No Error" if validate_chk['storage'].result
+                           else validate_chk['storage'].reason)
             raise exception.ValidationError(_(
                 "Ironic node: %(id)s failed to validate. "
                 "(deploy: %(deploy)s, power: %(power)s, "
                 "storage: %(storage)s)")
                 % {'id': node.id,
-                   'deploy': validate_chk['deploy'],
-                   'power': validate_chk['power'],
-                   'storage': validate_chk['storage']})
+                   'deploy': deploy_msg,
+                   'power': power_msg,
+                   'storage': storage_msg})
 
         # Config drive
         configdrive_value = None
diff --git a/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml b/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml
new file mode 100644
index 00000000000..2ac2e894011
--- /dev/null
+++ b/releasenotes/notes/ironic-validate-node-message-6a8b1eedbddd06fd.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    Fix displaying the reason messages from the Ironic validate node operation that
+    is called just before the instance is deployed on the bare metal node. The
+    message from Ironic is now correctly logged.
+    Fixes `bug 2100009 <https://bugs.launchpad.net/nova/+bug/2100009>_`.

From 957d97ea30c52292e4105dbcb2ed10a509521ac5 Mon Sep 17 00:00:00 2001
From: Masahito Muroi <masahito.muroi@linecorp.com>
Date: Tue, 21 Jan 2025 10:03:24 +0900
Subject: [PATCH 6/7] Use dict object for request_specs_dict in the _list_view

The request_specs_dict in the _list_view is initialized as a
defaultdict object in order to return empty string as default.
But the request_spec_dict is replaced with a normal dict object in
the v2.96 microversion, then if server list and RequestSpec missmatch
happens by any reason, the List Server API and the List
Server Detail API hit 500 Internal server error because of key error.

This commit updates the req_spec_dict to use normal dict object, then
it returns sentinel object if there is no appropriate
request_spec object.

Closes-Bug: #2095364
Change-Id: If282b8709954f276cb5d48114437809d771a9958
(cherry picked from commit 509820f156e339e825f799d84b06cb11ac6b6096)
---
 api-ref/source/parameters.yaml                |  1 +
 nova/api/openstack/compute/views/servers.py   | 23 +++---
 .../api/openstack/compute/test_servers.py     | 70 ++++++++++++++++++-
 .../notes/bug-2095364-ffbf67c0ae3f53b5.yaml   | 15 ++++
 4 files changed, 98 insertions(+), 11 deletions(-)
 create mode 100644 releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml

diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml
index 9a530cffbe7..f644c84f9ea 100644
--- a/api-ref/source/parameters.yaml
+++ b/api-ref/source/parameters.yaml
@@ -5721,6 +5721,7 @@ pinned_availability_zone:
     Also when default_schedule_zone config option set to specific AZ, in that
     case, instance would be pinned to that specific AZ, and instance will be
     scheduled on host belonging to pinned AZ.
+    In case of no pinned availability zone, this value is set to `null`.
   in: body
   type: string
   min_version: 2.96
diff --git a/nova/api/openstack/compute/views/servers.py b/nova/api/openstack/compute/views/servers.py
index 8452d6bb102..023669406eb 100644
--- a/nova/api/openstack/compute/views/servers.py
+++ b/nova/api/openstack/compute/views/servers.py
@@ -14,8 +14,6 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
-import collections
-
 from oslo_log import log as logging
 from oslo_serialization import jsonutils
 
@@ -41,6 +39,7 @@
 LOG = logging.getLogger(__name__)
 
 
+AZ_NOT_IN_REQUEST_SPEC = object()
 SCHED_HINTS_NOT_IN_REQUEST_SPEC = object()
 
 
@@ -225,16 +224,20 @@ def _get_host_status_unknown_only(context, instance=None):
         return unknown_only
 
     def _get_pinned_az(self, context, instance, provided_az):
-        pinned_az = ''
-        if provided_az is not None:
+        if provided_az is AZ_NOT_IN_REQUEST_SPEC:
+            # Case the provided_az is pre fetched, but not specified
+            pinned_az = None
+        elif provided_az is not None:
+            # Case the provided_az is pre fetched, and specified
             pinned_az = provided_az
         else:
+            # Case the provided_az is not pre fethed.
             try:
                 req_spec = objects.RequestSpec.get_by_instance_uuid(
                         context, instance.uuid)
                 pinned_az = req_spec.availability_zone
             except exception.RequestSpecNotFound:
-                pinned_az = ''
+                pinned_az = None
         return pinned_az
 
     def _get_scheduler_hints(self, context, instance, provided_sched_hints):
@@ -543,15 +546,16 @@ def _list_view(self, func, request, servers, coll_name, show_extra_specs,
         :returns: Server data in dictionary format
         """
         req_specs = None
-        req_specs_dict = collections.defaultdict(str)
+        req_specs_dict = {}
         sched_hints_dict = {}
         if api_version_request.is_supported(request, min_version='2.96'):
             context = request.environ['nova.context']
             instance_uuids = [s.uuid for s in servers]
             req_specs = objects.RequestSpec.get_by_instance_uuids(
                     context, instance_uuids)
-            req_specs_dict = {req.instance_uuid: req.availability_zone
-                              for req in req_specs}
+            req_specs_dict.update({req.instance_uuid: req.availability_zone
+                                   for req in req_specs
+                                   if req.availability_zone is not None})
             if api_version_request.is_supported(request, min_version='2.100'):
                 sched_hints_dict.update({
                     req.instance_uuid: req.scheduler_hints
@@ -565,7 +569,8 @@ def _list_view(self, func, request, servers, coll_name, show_extra_specs,
                  show_host_status=show_host_status,
                  show_sec_grp=show_sec_grp, bdms=bdms,
                  cell_down_support=cell_down_support,
-                 provided_az=req_specs_dict[server.uuid],
+                 provided_az=req_specs_dict.get(
+                     server.uuid, AZ_NOT_IN_REQUEST_SPEC),
                  provided_sched_hints=sched_hints_dict.get(
                      server.uuid, SCHED_HINTS_NOT_IN_REQUEST_SPEC)
                  )["server"]
diff --git a/nova/tests/unit/api/openstack/compute/test_servers.py b/nova/tests/unit/api/openstack/compute/test_servers.py
index cf165fde250..745bc6f891b 100644
--- a/nova/tests/unit/api/openstack/compute/test_servers.py
+++ b/nova/tests/unit/api/openstack/compute/test_servers.py
@@ -8466,8 +8466,74 @@ def test_list_view_with_missing_request_specs(self, m_rs):
             availability_zone=self.instance.availability_zone)]
 
         req = self.req('/%s/servers' % self.project_id)
-        self.assertRaises(KeyError, self.view_builder.index,
-                          req, self.instances, False)
+        output = self.view_builder.index(req, self.instances, False)
+
+        self.assertEqual(2, len(output['servers']))
+
+    @mock.patch('nova.objects.RequestSpec.get_by_instance_uuids')
+    def test_list_detail_view_with_missing_request_specs(self, m_rs):
+
+        self.instances = [
+            self.instance,
+            self.create_instance(2, uuids.fake1, 'fake-server'),
+            self.create_instance(3, uuids.fake2, 'fake-server2')
+        ]
+        # First instance's request spec has pinned availability zone
+        # Second instance's request spec has no pinned availability zone
+        m_rs.return_value = [
+            objects.RequestSpec(
+                instance_uuid=self.instance.uuid,
+                availability_zone=self.instance.availability_zone),
+            objects.RequestSpec(
+                instance_uuid=self.instance.uuid,
+                availability_zone=None)
+        ]
+
+        req = self.req('/%s/servers/detail' % self.project_id)
+        output = self.view_builder.detail(req, self.instances, False)
+
+        self.assertEqual(3, len(output['servers']))
+        # first instance has pinned az
+        self.assertEqual('nova',
+                         output['servers'][0]['pinned_availability_zone'])
+        # second or later has no pinned az
+        for s in output['servers'][1:]:
+            self.assertIsNone(s['pinned_availability_zone'])
+
+    @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid')
+    def test_show_view_with_missing_request_specs(self, m_rs):
+
+        self.instances = [
+            self.instance,
+            self.create_instance(2, uuids.fake1, 'fake-server'),
+            self.create_instance(3, uuids.fake2, 'fake-server2')
+        ]
+        # First instance's request spec has pinned availability zone
+        # Second instance's request spec has no pinned availability zone
+        m_rs.side_effect = [
+            objects.RequestSpec(
+                instance_uuid=self.instance.uuid,
+                availability_zone=self.instance.availability_zone),
+            objects.RequestSpec(
+                instance_uuid=self.instance.uuid,
+                availability_zone=None),
+            exception.RequestSpecNotFound(instance_uuid='3')
+        ]
+
+        # Instance show with request spec and pinned az
+        req = self.req('/%s/servers/1' % self.project_id)
+        output = self.view_builder.show(req, self.instances[0])
+        self.assertEqual('nova', output['server']['pinned_availability_zone'])
+
+        # Instance show with request spec and no pinned az
+        req = self.req('/%s/servers/2' % self.project_id)
+        output = self.view_builder.show(req, self.instances[1])
+        self.assertIsNone(output['server']['pinned_availability_zone'])
+
+        # Instance show without request spec
+        req = self.req('/%s/servers/3' % self.project_id)
+        output = self.view_builder.show(req, self.instances[2])
+        self.assertIsNone(output['server']['pinned_availability_zone'])
 
 
 class ServersViewBuilderTestV2100(_ServersViewBuilderTest):
diff --git a/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml b/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml
new file mode 100644
index 00000000000..672863666d5
--- /dev/null
+++ b/releasenotes/notes/bug-2095364-ffbf67c0ae3f53b5.yaml
@@ -0,0 +1,15 @@
+---
+fixes:
+  - |
+    `Bug #2095364`_: Fixed the List Server API and the List Server Detail API
+    500 Internal Server Error issue in v2.96 or later API microversion if
+    one or more instance has no request spec object. One usecase was when cloud
+    user tried to create instance which exceeded their quota, the request does
+    not create instance request spec. Once the no request spec instance is
+    created in cloud user project, the server list API and the list server
+    details API return 500 Internal Server Error for the project until the
+    cloud user deletes the no request spec object instance.
+    After this fix, the v2.96 or later returns `null` at the
+    `pinned_availability_zone` value if not specified.
+
+    .. _Bug #2095364: https://launchpad.net/bugs/2095364

From 78bbd3cdfc157e31a491a77615cd6fa2af1a6b4e Mon Sep 17 00:00:00 2001
From: Daniel Niasoff <4070285+dniasoff@users.noreply.github.com>
Date: Sun, 18 May 2025 12:56:16 +0000
Subject: [PATCH 7/7] qumulus initial

---
 .github/workflows/build.yaml | 201 +++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 .github/workflows/build.yaml

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 00000000000..dfd24c8d07e
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,201 @@
+name: 'Build and Push Kolla Images'
+
+on:
+  push:
+    branches:
+      - main
+      - staging
+      - qa
+      - dev
+      - experimental
+
+  workflow_dispatch:
+    inputs:
+      branch:
+        type: choice
+        description: 'Branch to build'
+        options:
+        - main
+        - staging
+        - qa
+        - dev
+        - experimental
+
+permissions:
+  contents: read
+  id-token: write
+
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.repository }} @ ${{ github.event.inputs.branch || github.base_ref || github.ref_name }}'
+  cancel-in-progress: true
+
+jobs:
+  kolla_build:
+    runs-on:
+      group: prod
+
+    steps:
+
+      - uses: QumulusTechnology/vault-setup-action@v2
+        with:
+          aws_account_data: ${{ secrets.AWS_ACCOUNT_DATA }}
+          vault_addr: ${{ secrets.VAULT_ADDR }}
+          platform: qcp
+          secrets: |
+            secret/data/qcp/global/harbor/prod/github-user username | REPO_USERNAME ;
+            secret/data/qcp/global/harbor/prod/github-user password | REPO_PASSWORD ;
+            secret/data/qcp/global/harbor/prod/github-user address | REPO_ADDRESS
+
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.BRANCH }}
+
+      - name: Set Release version
+        run: |
+
+          if [ "$BRANCH" == "main" ]; then
+            echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV
+            echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV
+          elif [ "$BRANCH" == "staging" ]; then
+            echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV
+            echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV
+          elif [ "$BRANCH" == "qa" ]; then
+            echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV
+            echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV
+          elif [ "$BRANCH" == "experimental" ]; then
+            echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV
+            echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV
+          else
+            echo "OPENSTACK_VERSION=2025.1" >> $GITHUB_ENV
+            echo "BASE_OS_TAG=24.04" >> $GITHUB_ENV
+          fi
+
+
+          if [ "$BRANCH" == "main" ]; then
+            TAG=main
+          elif [ "$BRANCH" == "staging" ]; then
+            TAG=staging
+          elif [ "$BRANCH" == "qa" ]; then
+            TAG=qa
+          elif [ "$BRANCH" == "dev" ]; then
+            TAG=dev
+          elif [ "$BRANCH" == "experimental" ]; then
+            TAG=experimental
+          else
+            TAG=dev_${BRANCH}
+          fi
+
+          echo "TAG=$TAG" >> $GITHUB_ENV
+
+      - name: Set GITHUB Environment Variables
+        run: |
+          echo "GITHUB_ACTIONS_BRANCH=${{ github.base_ref || github.ref_name }}" >> $GITHUB_ENV
+          echo "GITHUB_ACTIONS_WORKFLOW_ID=${{ github.run_id }}" >> $GITHUB_ENV
+          echo "GITHUB_ACTIONS_WORKFLOW_ATEMPT=${{ github.run_attempt }}" >> $GITHUB_ENV
+          echo "GITHUB_ACTIONS_WORKFLOW_RUN_NUMBER=${{ github.run_number }}" >> $GITHUB_ENV
+          echo "GITHUB_ACTIONS_AUTHOR=${{ github.actor }}" >> $GITHUB_ENV
+
+      - name: Login to Harbor Hub
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REPO_ADDRESS }}
+          username: ${{ env.REPO_USERNAME }}
+          password: ${{ env.REPO_PASSWORD }}
+
+      - name: Install Kolla
+        run: |
+          sudo apt update
+          sudo apt install -y bash python3 python3-pip git python3-dev python3-docker libffi-dev gcc libssl-dev python3-venv
+          python3 -m venv kolla
+          source kolla/bin/activate
+          pip install -U pip
+          pip install docker setuptools
+          pip install git+https://github.com/QumulusTechnology/kolla@${BRANCH}
+
+      - name: Create kolla-build.conf
+        run: |
+          CWD="$(pwd)"
+          sudo mkdir -p /etc/kolla
+          sudo bash -c "cat << EOF > /etc/kolla/kolla-build.conf
+
+          [DEFAULT]
+          base = ubuntu
+          namespace = kolla
+          base_tag = ${BASE_OS_TAG}
+          openstack_release = ${OPENSTACK_VERSION}
+          registry = ${REPO_ADDRESS}/qcp-${BRANCH}
+          push = true
+          skip_existing = false
+          threads = 16
+          push_threads = 4
+          install_type = source
+          tag = latest
+          template_override = /etc/kolla/template-overrides.j2
+          docker_healthchecks = true
+          network_mode = host
+
+          [${REPOSITORY}-base]
+          type = local
+          location = ${CWD}
+          EOF"
+
+
+      - name: Create template-overrides.j2
+        run: |
+          sudo bash -c "cat << EOF > /etc/kolla/template-overrides.j2
+          {% extends parent_template %}
+
+          {% block base_ubuntu_package_sources_list %}
+          {% endblock %}
+
+          {% block openstack_base_footer %}
+          RUN pip install jaeger-client
+          {% endblock %}
+
+          {% block ${REPOSITORY}_base_footer %}
+          {% endblock %}
+
+          {% block nova_compute_header %}
+          RUN apt clean && \
+              apt update && \
+              apt install -y swtpm swtpm-tools && \
+              groupadd tss && \
+              useradd -g tss tss && \
+              mkdir /home/tss && \
+              chown -R tss:tss /home/tss &&\
+              chown -R tss:tss /var/lib/swtpm-localca
+          {% endblock %}
+
+          {% block nova_compute_footer %}
+          RUN pip install git+https://github.com/openstack/oslo.utils.git@stable/${OPENSTACK_VERSION}
+          {% endblock %}
+
+          {% block nova_libvirt_footer %}
+          RUN apt clean && \
+              apt update && \
+              apt install -y swtpm swtpm-tools && \
+              mkdir /home/tss && \
+              chown -R tss:tss /home/tss && \
+              chown -R tss:tss /var/lib/swtpm-localca
+
+          USER tss
+          RUN /usr/share/swtpm/swtpm-create-user-config-files
+          USER root
+          {% endblock %}
+
+          EOF"
+
+      - name: Build docker images
+        run: |
+          source kolla/bin/activate
+          kolla-build ${REPOSITORY}
+
+      - name: Tag and push docker images
+        run: |
+          timestamp=$(date +%Y%m%d%H%M%S)
+          for i in $(docker images --format '{{.Repository}}' | grep "/qcp-${BRANCH}/"); do
+            image=${i##*/}
+            docker tag ${i}:latest ${REPO_ADDRESS}/qcp-${BRANCH}/kolla/${image}:${timestamp}
+            docker image push --all-tags ${i}
+          done