From 35e5f81e014ae8232b9cbe1743b3b86aaab91b90 Mon Sep 17 00:00:00 2001 From: David Bailey Date: Mon, 22 Jun 2026 18:03:07 +0000 Subject: [PATCH] Drop Cdo wrapper from AWS::DeviceFarm module device_farm.rb declared `module Cdo; module AWS; module DeviceFarm`, out of step with its siblings cloud_formation.rb, ec2.rb, s3.rb, and cloudfront.rb, which all declare a top-level `module AWS`. Collapse to `module AWS; module DeviceFarm` to match. device_farm.rb was the only file creating a Cdo::AWS namespace, so its removal lets two now-unnecessary `::AWS` scope-resolution operators in playwright_report.rb fall back to the bare `AWS` (which resolves to the top-level module from inside `module Cdo`); the comments explaining the defense go with them. connect.rb's callers move from Cdo::AWS::DeviceFarm to AWS::DeviceFarm, matching the AWS::EC2 reference already in that file. The SDK's `::Aws::DeviceFarm` references (case-distinct, lowercase) stay explicit and untouched. Co-Authored-By: Claude Opus 4.8 (1M context) --- dashboard/test/ui/features/support/connect.rb | 22 +- lib/cdo/aws/device_farm.rb | 484 +++++++++--------- lib/cdo/playwright_report.rb | 7 +- 3 files changed, 254 insertions(+), 259 deletions(-) diff --git a/dashboard/test/ui/features/support/connect.rb b/dashboard/test/ui/features/support/connect.rb index 4b8099dc7883f..69e1882229d53 100644 --- a/dashboard/test/ui/features/support/connect.rb +++ b/dashboard/test/ui/features/support/connect.rb @@ -89,10 +89,10 @@ def saucelabs_browser(test_run_name, http_client: nil) def device_farm_desktop_browser(http_client: nil) # One-shot TestGrid URL, ready immediately. - url = Cdo::AWS::DeviceFarm.create_test_grid_url + url = AWS::DeviceFarm.create_test_grid_url capabilities = Selenium::WebDriver::Remote::Capabilities.new( - $device_farm_browser_config.except(*Cdo::AWS::DeviceFarm::INTERNAL_KEYS) + $device_farm_browser_config.except(*AWS::DeviceFarm::INTERNAL_KEYS) ) # In CI, use Chrome's --host-resolver-rules to map localhost-studio.code.org @@ -127,20 +127,20 @@ def device_farm_desktop_browser(http_client: nil) # Inspector disabled, etc.), tear it down and pick a fresh device from # the pool. def device_farm_mobile_browser(http_client: nil) - Retryable.retryable(tries: Cdo::AWS::DeviceFarm::MOBILE_DEVICE_TRIES) do |try, exception| + Retryable.retryable(tries: AWS::DeviceFarm::MOBILE_DEVICE_TRIES) do |try, exception| if exception puts "Device Farm: previous mobile session attempt failed " \ "(#{exception.class}: #{exception.message.lines.first&.strip}); " \ "provisioning a fresh device (attempt #{try})..." end - session = Cdo::AWS::DeviceFarm.create_mobile_session( + session = AWS::DeviceFarm.create_mobile_session( device_arns: $device_farm_browser_config['device_arns'] ) $device_farm_mobile_session_arn = session[:session_arn] $device_farm_mobile_device = session[:device] capabilities = Selenium::WebDriver::Remote::Capabilities.new( - $device_farm_browser_config.except(*Cdo::AWS::DeviceFarm::INTERNAL_KEYS) + $device_farm_browser_config.except(*AWS::DeviceFarm::INTERNAL_KEYS) ) # Anything after a successful create_mobile_session leaks a running @@ -149,8 +149,8 @@ def device_farm_mobile_browser(http_client: nil) # the next attempt, and lets the outer Retryable pick a different one. begin browser = Retryable.retryable( - tries: Cdo::AWS::DeviceFarm::MOBILE_CONNECT_TRIES, - sleep: Cdo::AWS::DeviceFarm::MOBILE_CONNECT_RETRY_SLEEP, + tries: AWS::DeviceFarm::MOBILE_CONNECT_TRIES, + sleep: AWS::DeviceFarm::MOBILE_CONNECT_RETRY_SLEEP, ) do SeleniumBrowser.remote( session[:url], @@ -172,7 +172,7 @@ def device_farm_mobile_browser(http_client: nil) browser rescue - Cdo::AWS::DeviceFarm.stop_mobile_session($device_farm_mobile_session_arn) + AWS::DeviceFarm.stop_mobile_session($device_farm_mobile_session_arn) $device_farm_mobile_session_arn = nil raise end @@ -214,9 +214,9 @@ def get_device_farm_browser end console_url = if is_mobile - Cdo::AWS::DeviceFarm.mobile_session_url($device_farm_mobile_session_arn) + AWS::DeviceFarm.mobile_session_url($device_farm_mobile_session_arn) else - Cdo::AWS::DeviceFarm.desktop_session_url(browser.session_id) + AWS::DeviceFarm.desktop_session_url(browser.session_id) end if console_url account_suffix = CI::Utils.running_on_ci? ? ' (codeorg-dev AWS account)' : '' @@ -315,7 +315,7 @@ def quit_browser # Release the Device Farm device so subsequent sessions don't block # on PENDING_CONCURRENCY. if $device_farm_mobile_session_arn - Cdo::AWS::DeviceFarm.stop_mobile_session($device_farm_mobile_session_arn) + AWS::DeviceFarm.stop_mobile_session($device_farm_mobile_session_arn) $device_farm_mobile_session_arn = nil end $browser = @browser = nil diff --git a/lib/cdo/aws/device_farm.rb b/lib/cdo/aws/device_farm.rb index 62ff947d1c11a..fca2911818d02 100644 --- a/lib/cdo/aws/device_farm.rb +++ b/lib/cdo/aws/device_farm.rb @@ -18,273 +18,271 @@ # Set device_farm_mobile_project_arn in AWS Secrets Manager or locals.yml. # 3. Ensure AWS credentials are available (instance profile, env vars, etc.). -module Cdo - module AWS - module DeviceFarm - # A test session that runs longer than this fails with - # "URL has expired". Picked to comfortably exceed the AWS-side - # per-session runtime cap (~1h) so this isn't the limit a long - # test hits first. AWS API accepts 60..86400. - SESSION_EXPIRY_SECONDS = 7200 # 2 hours - - # Polling parameters for mobile session provisioning. The three timeouts - # correspond to the three gating statuses AWS walks a session through: - # PENDING_CONCURRENCY -- waiting on a free concurrency slot (quota) - # PENDING_DEVICE -- slot acquired, waiting for a free device - # PREPARING (and any -- device found, booting / installing - # other intermediate - # non-terminal state) - # Isolating PENDING_DEVICE lets us tune the PREPARING timeout to actual - # device boot time rather than absorbing queue-wait time. - MOBILE_CONCURRENCY_TIMEOUT = 600 # 10 minutes -- time waiting for a slot - MOBILE_PENDING_DEVICE_TIMEOUT = 300 # 5 minutes -- time waiting for a device - MOBILE_PREPARING_TIMEOUT = 600 # 10 minutes -- time for device to boot - MOBILE_SESSION_POLL_INTERVAL = 10 # seconds - - # Retry budget for the WebDriver connect (not the session provisioning) - # after a mobile session reaches RUNNING. The Appium endpoint can return - # 400 briefly while the server finishes binding to the device; - # 6 tries * 10s covers roughly 1 minute. - MOBILE_CONNECT_TRIES = 6 - MOBILE_CONNECT_RETRY_SLEEP = 10 # seconds - - # Retry budget for the whole mobile session at the device level. - # Some physical devices in a pool have persistent problems (e.g. Web - # Inspector disabled -- Appium times out waiting for web apps, no - # amount of Appium-level retrying helps). Tearing down the DF session - # and picking a fresh device from the pool side-steps those. - MOBILE_DEVICE_TRIES = 3 - - # AWS region where Device Farm projects live. - # As of April 2026, Device Farm only available in us-west-2. - REGION = 'us-west-2'.freeze - - # Internal keys in browser configs that are not Selenium capabilities. - # Device Farm also rejects `appium:orientation` as a session capability - # (reserved), so we strip it from caps and apply it after session-start - # via the WebDriver /orientation endpoint. - INTERNAL_KEYS = %w[name mobile device_arns appium:orientation].freeze - - # ---- Desktop (TestGrid) ------------------------------------------------- - - # Returns a one-time WebDriver endpoint URL for a desktop browser session. - def self.create_test_grid_url - arn = project_arn_for(mobile: false) - resp = client.create_test_grid_url( - project_arn: arn, - expires_in_seconds: SESSION_EXPIRY_SECONDS - ) - resp.url - end +module AWS + module DeviceFarm + # A test session that runs longer than this fails with + # "URL has expired". Picked to comfortably exceed the AWS-side + # per-session runtime cap (~1h) so this isn't the limit a long + # test hits first. AWS API accepts 60..86400. + SESSION_EXPIRY_SECONDS = 7200 # 2 hours + + # Polling parameters for mobile session provisioning. The three timeouts + # correspond to the three gating statuses AWS walks a session through: + # PENDING_CONCURRENCY -- waiting on a free concurrency slot (quota) + # PENDING_DEVICE -- slot acquired, waiting for a free device + # PREPARING (and any -- device found, booting / installing + # other intermediate + # non-terminal state) + # Isolating PENDING_DEVICE lets us tune the PREPARING timeout to actual + # device boot time rather than absorbing queue-wait time. + MOBILE_CONCURRENCY_TIMEOUT = 600 # 10 minutes -- time waiting for a slot + MOBILE_PENDING_DEVICE_TIMEOUT = 300 # 5 minutes -- time waiting for a device + MOBILE_PREPARING_TIMEOUT = 600 # 10 minutes -- time for device to boot + MOBILE_SESSION_POLL_INTERVAL = 10 # seconds + + # Retry budget for the WebDriver connect (not the session provisioning) + # after a mobile session reaches RUNNING. The Appium endpoint can return + # 400 briefly while the server finishes binding to the device; + # 6 tries * 10s covers roughly 1 minute. + MOBILE_CONNECT_TRIES = 6 + MOBILE_CONNECT_RETRY_SLEEP = 10 # seconds + + # Retry budget for the whole mobile session at the device level. + # Some physical devices in a pool have persistent problems (e.g. Web + # Inspector disabled -- Appium times out waiting for web apps, no + # amount of Appium-level retrying helps). Tearing down the DF session + # and picking a fresh device from the pool side-steps those. + MOBILE_DEVICE_TRIES = 3 + + # AWS region where Device Farm projects live. + # As of April 2026, Device Farm only available in us-west-2. + REGION = 'us-west-2'.freeze + + # Internal keys in browser configs that are not Selenium capabilities. + # Device Farm also rejects `appium:orientation` as a session capability + # (reserved), so we strip it from caps and apply it after session-start + # via the WebDriver /orientation endpoint. + INTERNAL_KEYS = %w[name mobile device_arns appium:orientation].freeze + + # ---- Desktop (TestGrid) ------------------------------------------------- + + # Returns a one-time WebDriver endpoint URL for a desktop browser session. + def self.create_test_grid_url + arn = project_arn_for(mobile: false) + resp = client.create_test_grid_url( + project_arn: arn, + expires_in_seconds: SESSION_EXPIRY_SECONDS + ) + resp.url + end + + # AWS console URL for a desktop TestGrid session's Selenium logs. + def self.desktop_session_url(selenium_session_id) + arn = CDO.device_farm_desktop_project_arn + return nil if arn.blank? + project_uuid = arn.split(':')[6] + "https://#{REGION}.console.aws.amazon.com/devicefarm/home" \ + "#/browser/projects/#{project_uuid}/runsselenium/logs/#{selenium_session_id}" + end - # AWS console URL for a desktop TestGrid session's Selenium logs. - def self.desktop_session_url(selenium_session_id) - arn = CDO.device_farm_desktop_project_arn - return nil if arn.blank? - project_uuid = arn.split(':')[6] - "https://#{REGION}.console.aws.amazon.com/devicefarm/home" \ - "#/browser/projects/#{project_uuid}/runsselenium/logs/#{selenium_session_id}" + # ---- Mobile (Remote Access Session) ------------------------------------- + + # Provisions a real device from a candidate list, waits for it to be + # ready, and returns the Appium WebDriver endpoint URL plus the session + # ARN (for later logging). AWS doesn't accept a device pool on + # create_remote_access_session (pools are a create_run concept), so the + # caller passes the candidate ARNs directly from configuration. We + # query AWS for each device's current availability and pick from the + # healthiest tier to avoid queueing behind a BUSY device. + # + # @param device_arns [Array] candidate device ARNs. One is + # picked client-side based on current availability. + # @return [Hash] { url: String, session_arn: String, device: Aws::DeviceFarm::Types::Device } + def self.create_mobile_session(device_arns:) + raise "Device Farm: no candidate device ARNs provided" if device_arns.blank? + project_arn = project_arn_for(mobile: true) + devices = lookup_devices(device_arns) + device = pick_best_device(devices) + puts "Device Farm: provisioning device #{device.arn} (#{device.availability}) " \ + "from #{device_arns.size} candidate(s)..." + + resp = client.create_remote_access_session( + project_arn: project_arn, + device_arn: device.arn, + name: "ui-test-#{Time.now.to_i}" + ) + session_arn = resp.remote_access_session.arn + + # If waiting for the endpoint fails (typically a PENDING_CONCURRENCY / + # PENDING_DEVICE timeout), stop the session before re-raising so we + # release the device back to AWS -- otherwise the next run queues + # behind this stuck session and hits the same timeout. + endpoint = begin + wait_for_mobile_session_endpoint(session_arn) + rescue + stop_mobile_session(session_arn) + raise end - # ---- Mobile (Remote Access Session) ------------------------------------- + {url: endpoint, session_arn: session_arn, device: device} + end - # Provisions a real device from a candidate list, waits for it to be - # ready, and returns the Appium WebDriver endpoint URL plus the session - # ARN (for later logging). AWS doesn't accept a device pool on - # create_remote_access_session (pools are a create_run concept), so the - # caller passes the candidate ARNs directly from configuration. We - # query AWS for each device's current availability and pick from the - # healthiest tier to avoid queueing behind a BUSY device. - # - # @param device_arns [Array] candidate device ARNs. One is - # picked client-side based on current availability. - # @return [Hash] { url: String, session_arn: String, device: Aws::DeviceFarm::Types::Device } - def self.create_mobile_session(device_arns:) - raise "Device Farm: no candidate device ARNs provided" if device_arns.blank? - project_arn = project_arn_for(mobile: true) - devices = lookup_devices(device_arns) - device = pick_best_device(devices) - puts "Device Farm: provisioning device #{device.arn} (#{device.availability}) " \ - "from #{device_arns.size} candidate(s)..." - - resp = client.create_remote_access_session( - project_arn: project_arn, - device_arn: device.arn, - name: "ui-test-#{Time.now.to_i}" - ) - session_arn = resp.remote_access_session.arn - - # If waiting for the endpoint fails (typically a PENDING_CONCURRENCY / - # PENDING_DEVICE timeout), stop the session before re-raising so we - # release the device back to AWS -- otherwise the next run queues - # behind this stuck session and hits the same timeout. - endpoint = begin - wait_for_mobile_session_endpoint(session_arn) - rescue - stop_mobile_session(session_arn) - raise - end + # AWS console URL for a mobile remote access session's files/logs view. + # Parses session ARN tail // into the console path. + def self.mobile_session_url(session_arn) + return nil if session_arn.blank? + tail = session_arn.split(':', 7)[6] + return nil if tail.blank? + project_uuid, session_uuid, sub_id = tail.split('/') + return nil unless project_uuid && session_uuid && sub_id + "https://#{REGION}.console.aws.amazon.com/devicefarm/home" \ + "#/mobile/projects/#{project_uuid}/sessions/#{session_uuid}/#{sub_id}/files" + end - {url: endpoint, session_arn: session_arn, device: device} - end + # Stops a mobile remote access session so the device is released back to + # the pool. Without this, the session stays open until it times out + # (defaultJobTimeoutMinutes) and blocks subsequent sessions with + # PENDING_CONCURRENCY. + def self.stop_mobile_session(session_arn) + return unless session_arn + puts "Device Farm: stopping mobile session #{session_arn}" + client.stop_remote_access_session(arn: session_arn) + rescue => exception + puts "Error stopping Device Farm mobile session: #{exception}" + end - # AWS console URL for a mobile remote access session's files/logs view. - # Parses session ARN tail // into the console path. - def self.mobile_session_url(session_arn) - return nil if session_arn.blank? - tail = session_arn.split(':', 7)[6] - return nil if tail.blank? - project_uuid, session_uuid, sub_id = tail.split('/') - return nil unless project_uuid && session_uuid && sub_id - "https://#{REGION}.console.aws.amazon.com/devicefarm/home" \ - "#/mobile/projects/#{project_uuid}/sessions/#{session_uuid}/#{sub_id}/files" + # ---- Private helpers ---------------------------------------------------- + + # Fetches Device objects for the given ARNs so we can inspect current + # availability. Uses get_device per ARN rather than list_devices with + # an ARN-IN filter; list_devices silently returned empty results for + # some iPad ARNs despite those ARNs being valid and visible in the + # console (suspected fleet-scope / pagination nuance). Per-ARN + # lookups are ~3 API calls per session but give explicit per-ARN + # errors on mismatch. + def self.lookup_devices(device_arns) + device_arns.filter_map do |arn| + client.get_device(arn: arn).device + rescue ::Aws::DeviceFarm::Errors::NotFoundException => exception + puts "Device Farm: skipping unresolved device #{arn}: #{exception.message}" + nil end + end - # Stops a mobile remote access session so the device is released back to - # the pool. Without this, the session stays open until it times out - # (defaultJobTimeoutMinutes) and blocks subsequent sessions with - # PENDING_CONCURRENCY. - def self.stop_mobile_session(session_arn) - return unless session_arn - puts "Device Farm: stopping mobile session #{session_arn}" - client.stop_remote_access_session(arn: session_arn) - rescue => exception - puts "Error stopping Device Farm mobile session: #{exception}" + # Availability tiers AWS reports for each device, in descending order + # of desirability. TEMPORARY_NOT_AVAILABLE is excluded entirely -- the + # device is out of service and requesting it wastes time. + AVAILABILITY_PREFERENCE = %w[HIGHLY_AVAILABLE AVAILABLE BUSY].freeze + + # Groups candidate devices by availability, picks the best-tier group, + # and samples within it at random to spread load across equal-priority + # devices. Raises if no device is in any usable tier. + def self.pick_best_device(devices) + by_tier = devices.group_by(&:availability) + AVAILABILITY_PREFERENCE.each do |tier| + candidates = by_tier[tier] + return candidates.sample if candidates&.any? end + raise "Device Farm: no usable device among candidates " \ + "(availabilities: #{by_tier.keys.sort})" + end - # ---- Private helpers ---------------------------------------------------- - - # Fetches Device objects for the given ARNs so we can inspect current - # availability. Uses get_device per ARN rather than list_devices with - # an ARN-IN filter; list_devices silently returned empty results for - # some iPad ARNs despite those ARNs being valid and visible in the - # console (suspected fleet-scope / pagination nuance). Per-ARN - # lookups are ~3 API calls per session but give explicit per-ARN - # errors on mismatch. - def self.lookup_devices(device_arns) - device_arns.filter_map do |arn| - client.get_device(arn: arn).device - rescue ::Aws::DeviceFarm::Errors::NotFoundException => exception - puts "Device Farm: skipping unresolved device #{arn}: #{exception.message}" - nil - end + # Returns the appropriate project ARN and raises if blank. + def self.project_arn_for(mobile: false) + if mobile + raise 'Please define CDO.device_farm_mobile_project_arn AWS Secrets Manager or locals.yml' \ + if CDO.device_farm_mobile_project_arn.blank? + CDO.device_farm_mobile_project_arn + else + raise 'Please define CDO.device_farm_desktop_project_arn AWS Secrets Manager or locals.yml' \ + if CDO.device_farm_desktop_project_arn.blank? + CDO.device_farm_desktop_project_arn end + end - # Availability tiers AWS reports for each device, in descending order - # of desirability. TEMPORARY_NOT_AVAILABLE is excluded entirely -- the - # device is out of service and requesting it wastes time. - AVAILABILITY_PREFERENCE = %w[HIGHLY_AVAILABLE AVAILABLE BUSY].freeze - - # Groups candidate devices by availability, picks the best-tier group, - # and samples within it at random to spread load across equal-priority - # devices. Raises if no device is in any usable tier. - def self.pick_best_device(devices) - by_tier = devices.group_by(&:availability) - AVAILABILITY_PREFERENCE.each do |tier| - candidates = by_tier[tier] - return candidates.sample if candidates&.any? + # Polls until the remote access session is RUNNING and returns its + # WebDriver endpoint URL. Raises on timeout or unexpected terminal state. + # Each gating status has its own deadline so we can tell from a timeout + # message which phase ran long. + TERMINAL_STATUSES = %w[COMPLETED STOPPING STOPPED].freeze + + def self.wait_for_mobile_session_endpoint(session_arn) + concurrency_deadline = Time.now + MOBILE_CONCURRENCY_TIMEOUT + pending_device_deadline = nil # set on first PENDING_DEVICE observation + preparing_deadline = nil # set on first PREPARING / other intermediate + last_status = nil + + loop do + resp = client.get_remote_access_session(arn: session_arn) + session = resp.remote_access_session + + if session.status != last_status + puts "Device Farm: session #{session_arn} status=#{session.status}" + last_status = session.status end - raise "Device Farm: no usable device among candidates " \ - "(availabilities: #{by_tier.keys.sort})" - end - # Returns the appropriate project ARN and raises if blank. - def self.project_arn_for(mobile: false) - if mobile - raise 'Please define CDO.device_farm_mobile_project_arn AWS Secrets Manager or locals.yml' \ - if CDO.device_farm_mobile_project_arn.blank? - CDO.device_farm_mobile_project_arn - else - raise 'Please define CDO.device_farm_desktop_project_arn AWS Secrets Manager or locals.yml' \ - if CDO.device_farm_desktop_project_arn.blank? - CDO.device_farm_desktop_project_arn + if session.status == 'RUNNING' + driver_endpoint = session.endpoints&.remote_driver_endpoint + legacy_endpoint = session.endpoint + puts "Device Farm: session ready, endpoint=#{(driver_endpoint || legacy_endpoint).inspect}" + url = driver_endpoint || legacy_endpoint + raise "Device Farm session RUNNING but no endpoint URL available" unless url + return url end - end - # Polls until the remote access session is RUNNING and returns its - # WebDriver endpoint URL. Raises on timeout or unexpected terminal state. - # Each gating status has its own deadline so we can tell from a timeout - # message which phase ran long. - TERMINAL_STATUSES = %w[COMPLETED STOPPING STOPPED].freeze - - def self.wait_for_mobile_session_endpoint(session_arn) - concurrency_deadline = Time.now + MOBILE_CONCURRENCY_TIMEOUT - pending_device_deadline = nil # set on first PENDING_DEVICE observation - preparing_deadline = nil # set on first PREPARING / other intermediate - last_status = nil - - loop do - resp = client.get_remote_access_session(arn: session_arn) - session = resp.remote_access_session - - if session.status != last_status - puts "Device Farm: session #{session_arn} status=#{session.status}" - last_status = session.status - end + if TERMINAL_STATUSES.include?(session.status) + raise "Device Farm mobile session ended unexpectedly: " \ + "status=#{session.status}, result=#{session.result}, message=#{session.message}" + end - if session.status == 'RUNNING' - driver_endpoint = session.endpoints&.remote_driver_endpoint - legacy_endpoint = session.endpoint - puts "Device Farm: session ready, endpoint=#{(driver_endpoint || legacy_endpoint).inspect}" - url = driver_endpoint || legacy_endpoint - raise "Device Farm session RUNNING but no endpoint URL available" unless url - return url + case session.status + when 'PENDING_CONCURRENCY' + if Time.now > concurrency_deadline + raise "Timed out after #{MOBILE_CONCURRENCY_TIMEOUT}s waiting for a free concurrency slot " \ + "(PENDING_CONCURRENCY) for session #{session_arn}" end - - if TERMINAL_STATUSES.include?(session.status) - raise "Device Farm mobile session ended unexpectedly: " \ - "status=#{session.status}, result=#{session.result}, message=#{session.message}" + when 'PENDING_DEVICE' + pending_device_deadline ||= Time.now + MOBILE_PENDING_DEVICE_TIMEOUT + if Time.now > pending_device_deadline + raise "Timed out after #{MOBILE_PENDING_DEVICE_TIMEOUT}s waiting for a free device " \ + "(PENDING_DEVICE) for session #{session_arn}" end - - case session.status - when 'PENDING_CONCURRENCY' - if Time.now > concurrency_deadline - raise "Timed out after #{MOBILE_CONCURRENCY_TIMEOUT}s waiting for a free concurrency slot " \ - "(PENDING_CONCURRENCY) for session #{session_arn}" - end - when 'PENDING_DEVICE' - pending_device_deadline ||= Time.now + MOBILE_PENDING_DEVICE_TIMEOUT - if Time.now > pending_device_deadline - raise "Timed out after #{MOBILE_PENDING_DEVICE_TIMEOUT}s waiting for a free device " \ - "(PENDING_DEVICE) for session #{session_arn}" - end - else - # PREPARING and any other intermediate status. First observation - # of such a status starts the preparing (boot) timer. - preparing_deadline ||= Time.now + MOBILE_PREPARING_TIMEOUT - if Time.now > preparing_deadline - raise "Timed out after #{MOBILE_PREPARING_TIMEOUT}s waiting for device to boot " \ - "for session #{session_arn} (last status: #{session.status})" - end + else + # PREPARING and any other intermediate status. First observation + # of such a status starts the preparing (boot) timer. + preparing_deadline ||= Time.now + MOBILE_PREPARING_TIMEOUT + if Time.now > preparing_deadline + raise "Timed out after #{MOBILE_PREPARING_TIMEOUT}s waiting for device to boot " \ + "for session #{session_arn} (last status: #{session.status})" end - - sleep MOBILE_SESSION_POLL_INTERVAL end - end - def self.client - # Scope-resolve to be explicit that we mean the AWS SDK's top-level - # Aws module (unrelated to our Cdo::AWS namespace). Our module's - # case-different name already protects against collision, but the - # `::` prefix makes intent unambiguous at the call site. - # - # retry_mode: 'adaptive' + retry_limit: 20 absorbs DF's per-account - # API TPS throttling (separate from the session-concurrency quota) - # when many workers burst create_test_grid_url at once. The token - # bucket is per-process (post-fork), so this softens each worker's - # response to ThrottlingException rather than coordinating workers. - # 20 retries gives a worst-case backoff window of ~350s (full - # jitter, capped at 20s/attempt) -- enough to ride out a multi- - # minute sustained throttle without giving up. - @client ||= ::Aws::DeviceFarm::Client.new( - region: REGION, - retry_mode: 'adaptive', - retry_limit: 20 - ) + sleep MOBILE_SESSION_POLL_INTERVAL end + end - private_class_method :lookup_devices, :pick_best_device, :project_arn_for, - :wait_for_mobile_session_endpoint, :client + def self.client + # Scope-resolve to be explicit that we mean the AWS SDK's top-level + # Aws module (unrelated to our AWS namespace). Our module's + # case-different name already protects against collision, but the + # `::` prefix makes intent unambiguous at the call site. + # + # retry_mode: 'adaptive' + retry_limit: 20 absorbs DF's per-account + # API TPS throttling (separate from the session-concurrency quota) + # when many workers burst create_test_grid_url at once. The token + # bucket is per-process (post-fork), so this softens each worker's + # response to ThrottlingException rather than coordinating workers. + # 20 retries gives a worst-case backoff window of ~350s (full + # jitter, capped at 20s/attempt) -- enough to ride out a multi- + # minute sustained throttle without giving up. + @client ||= ::Aws::DeviceFarm::Client.new( + region: REGION, + retry_mode: 'adaptive', + retry_limit: 20 + ) end + + private_class_method :lookup_devices, :pick_best_device, :project_arn_for, + :wait_for_mobile_session_endpoint, :client end end diff --git a/lib/cdo/playwright_report.rb b/lib/cdo/playwright_report.rb index f14038713b498..66b0677a50769 100644 --- a/lib/cdo/playwright_report.rb +++ b/lib/cdo/playwright_report.rb @@ -22,9 +22,7 @@ module PlaywrightReport def self.upload(report_dir) return nil unless File.directory?(report_dir) - # Absolute ::AWS — inside module Cdo, a bare AWS resolves to Cdo::AWS - # (defined by cdo/aws/* siblings) and would miss the top-level uploader. - uploader = ::AWS::S3::LogUploader.new(BUCKET, "#{prefix}/playwright", make_public: true) + uploader = AWS::S3::LogUploader.new(BUCKET, "#{prefix}/playwright", make_public: true) index_url = nil Dir.glob(File.join(report_dir, '**', '*')).each do |path| next unless File.file?(path) @@ -47,8 +45,7 @@ def self.upload(report_dir) # Computed without uploading, so the report can be linked before the run finishes. def self.index_url - # ::AWS, not bare AWS (→ Cdo::AWS); see #upload. - ::AWS::S3.public_url(BUCKET, "#{prefix}/playwright/index.html") + AWS::S3.public_url(BUCKET, "#{prefix}/playwright/index.html") rescue StandardError => exception CDO.log.error "Failed to compute Playwright report URL: #{exception.message}" nil