Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions lib/cdo/app_server_hooks.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'cdo/process_memory'

module Cdo
# NOTE: these hooks are only executed when running in puma clustered mode, which spawns worker processes.
# These hooks will NOT be run in local development unless you set `dashboard_workers: 1` (or greater)
Expand Down Expand Up @@ -25,6 +27,42 @@ def self.before_fork
restart_period = DCDO.get('web_service_process_restart_period', 12 * 3600) # default to 12 hours
PumaWorkerKiller.enable_rolling_restart(restart_period)
end

# Compact heap before forking child puma processes to reduce the number of heap pages occupied by long-lived
# objects, which reduces the surface area for Copy-on-Write erosion.
unless @compacted_heap_before_worker_fork
@compacted_heap_before_worker_fork = true

begin
before_gc = GC.stat
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
Cdo::ProcessMemory.log_snapshot(
'Compacting Ruby heap before Puma worker fork',
fields: {
heap_allocated_pages: before_gc[:heap_allocated_pages],
heap_live_slots: before_gc[:heap_live_slots],
old_objects: before_gc[:old_objects]
}
)

GC.start(full_mark: true, immediate_sweep: true)
GC.compact

after_gc = GC.stat
duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
Cdo::ProcessMemory.log_snapshot(
'Compacted Ruby heap before Puma worker fork',
fields: {
duration_seconds: duration.round(3),
heap_allocated_pages: after_gc[:heap_allocated_pages],
heap_live_slots: after_gc[:heap_live_slots],
old_objects: after_gc[:old_objects]
}
)
rescue StandardError => exception
CDO.log.warn("Failed to compact Ruby heap before Puma worker fork: #{exception.class}: #{exception.message}")
end
end
end

def self.before_worker_boot(host:)
Expand Down
57 changes: 57 additions & 0 deletions lib/cdo/process_memory.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
require 'cdo'

module Cdo
# Lightweight process-memory snapshots for diagnostics.
#
# On Linux, this reads /proc/$pid/status and /proc/$pid/smaps_rollup and returns
# selected values in kilobytes. On macOS and other systems without those procfs
# files, it returns an empty hash. Missing or unreadable procfs files are
# treated as unavailable metrics, not as errors.
module ProcessMemory
STATUS_FIELDS = {
'VmRSS' => :proc_vm_rss_kb,
'VmHWM' => :proc_vm_hwm_kb,
'VmSize' => :proc_vm_size_kb,
'VmData' => :proc_vm_data_kb,
'VmSwap' => :proc_vm_swap_kb,
'Threads' => :proc_threads
}.freeze

SMAPS_ROLLUP_FIELDS = {
'Rss' => :smaps_rss_kb,
'Pss' => :smaps_pss_kb,
'Private_Clean' => :smaps_private_clean_kb,
'Private_Dirty' => :smaps_private_dirty_kb,
'Shared_Clean' => :smaps_shared_clean_kb,
'Shared_Dirty' => :smaps_shared_dirty_kb
}.freeze

def self.snapshot_kb(pid: Process.pid)
snapshot = {}
read_kb_fields("/proc/#{pid}/status", STATUS_FIELDS, snapshot)
read_kb_fields("/proc/#{pid}/smaps_rollup", SMAPS_ROLLUP_FIELDS, snapshot)
snapshot
end

def self.log_snapshot(message, fields: {}, pid: Process.pid)
metrics = fields.merge(snapshot_kb(pid: pid))
CDO.log.info("#{message}: #{metrics.map {|key, value| "#{key}=#{value}"}.join(', ')}")
metrics
end

def self.read_kb_fields(path, field_names, snapshot)
return snapshot unless File.readable?(path)

File.foreach(path) do |line|
key, value = line.split(':', 2)
metric_name = field_names[key]
snapshot[metric_name] = value.to_i if metric_name
end

snapshot
rescue Errno::ENOENT, Errno::EACCES
snapshot
end
private_class_method :read_kb_fields
end
end
Loading