| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2020 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include "i915_drv.h" |
| 7 | #include "i915_reg.h" |
| 8 | #include "intel_gt.h" |
| 9 | #include "intel_gt_clock_utils.h" |
| 10 | #include "intel_gt_print.h" |
| 11 | #include "intel_gt_regs.h" |
| 12 | #include "soc/intel_dram.h" |
| 13 | |
| 14 | static u32 read_reference_ts_freq(struct intel_uncore *uncore) |
| 15 | { |
| 16 | u32 ts_override = intel_uncore_read(uncore, GEN9_TIMESTAMP_OVERRIDE); |
| 17 | u32 base_freq, frac_freq; |
| 18 | |
| 19 | base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> |
| 20 | GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; |
| 21 | base_freq *= 1000000; |
| 22 | |
| 23 | frac_freq = ((ts_override & |
| 24 | GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> |
| 25 | GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); |
| 26 | frac_freq = 1000000 / (frac_freq + 1); |
| 27 | |
| 28 | return base_freq + frac_freq; |
| 29 | } |
| 30 | |
| 31 | static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, |
| 32 | u32 rpm_config_reg) |
| 33 | { |
| 34 | u32 f19_2_mhz = 19200000; |
| 35 | u32 f24_mhz = 24000000; |
| 36 | u32 f25_mhz = 25000000; |
| 37 | u32 f38_4_mhz = 38400000; |
| 38 | u32 crystal_clock = rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; |
| 39 | |
| 40 | switch (crystal_clock) { |
| 41 | case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: |
| 42 | return f24_mhz; |
| 43 | case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: |
| 44 | return f19_2_mhz; |
| 45 | case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: |
| 46 | return f38_4_mhz; |
| 47 | case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: |
| 48 | return f25_mhz; |
| 49 | default: |
| 50 | MISSING_CASE(crystal_clock); |
| 51 | return 0; |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | static u32 gen11_read_clock_frequency(struct intel_uncore *uncore) |
| 56 | { |
| 57 | u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); |
| 58 | u32 freq = 0; |
| 59 | |
| 60 | /* |
| 61 | * Note that on gen11+, the clock frequency may be reconfigured. |
| 62 | * We do not, and we assume nobody else does. |
| 63 | * |
| 64 | * First figure out the reference frequency. There are 2 ways |
| 65 | * we can compute the frequency, either through the |
| 66 | * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE |
| 67 | * tells us which one we should use. |
| 68 | */ |
| 69 | if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { |
| 70 | freq = read_reference_ts_freq(uncore); |
| 71 | } else { |
| 72 | u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); |
| 73 | |
| 74 | freq = gen11_get_crystal_clock_freq(uncore, rpm_config_reg: c0); |
| 75 | |
| 76 | /* |
| 77 | * Now figure out how the command stream's timestamp |
| 78 | * register increments from this frequency (it might |
| 79 | * increment only every few clock cycle). |
| 80 | */ |
| 81 | freq >>= 3 - REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0); |
| 82 | } |
| 83 | |
| 84 | return freq; |
| 85 | } |
| 86 | |
| 87 | static u32 gen9_read_clock_frequency(struct intel_uncore *uncore) |
| 88 | { |
| 89 | u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); |
| 90 | u32 freq = 0; |
| 91 | |
| 92 | if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { |
| 93 | freq = read_reference_ts_freq(uncore); |
| 94 | } else { |
| 95 | freq = IS_GEN9_LP(uncore->i915) ? 19200000 : 24000000; |
| 96 | |
| 97 | /* |
| 98 | * Now figure out how the command stream's timestamp |
| 99 | * register increments from this frequency (it might |
| 100 | * increment only every few clock cycle). |
| 101 | */ |
| 102 | freq >>= 3 - REG_FIELD_GET(CTC_SHIFT_PARAMETER_MASK, ctc_reg); |
| 103 | } |
| 104 | |
| 105 | return freq; |
| 106 | } |
| 107 | |
| 108 | static u32 gen6_read_clock_frequency(struct intel_uncore *uncore) |
| 109 | { |
| 110 | /* |
| 111 | * PRMs say: |
| 112 | * |
| 113 | * "The PCU TSC counts 10ns increments; this timestamp |
| 114 | * reflects bits 38:3 of the TSC (i.e. 80ns granularity, |
| 115 | * rolling over every 1.5 hours). |
| 116 | */ |
| 117 | return 12500000; |
| 118 | } |
| 119 | |
| 120 | static u32 gen5_read_clock_frequency(struct intel_uncore *uncore) |
| 121 | { |
| 122 | /* |
| 123 | * 63:32 increments every 1000 ns |
| 124 | * 31:0 mbz |
| 125 | */ |
| 126 | return 1000000000 / 1000; |
| 127 | } |
| 128 | |
| 129 | static u32 g4x_read_clock_frequency(struct intel_uncore *uncore) |
| 130 | { |
| 131 | /* |
| 132 | * 63:20 increments every 1/4 ns |
| 133 | * 19:0 mbz |
| 134 | * |
| 135 | * -> 63:32 increments every 1024 ns |
| 136 | */ |
| 137 | return 1000000000 / 1024; |
| 138 | } |
| 139 | |
| 140 | static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) |
| 141 | { |
| 142 | /* |
| 143 | * PRMs say: |
| 144 | * |
| 145 | * "The value in this register increments once every 16 |
| 146 | * hclks." (through the “Clocking Configuration” |
| 147 | * (“CLKCFG”) MCHBAR register) |
| 148 | * |
| 149 | * Testing on actual hardware has shown there is no /16. |
| 150 | */ |
| 151 | return DIV_ROUND_CLOSEST(intel_fsb_freq(uncore->i915), 4) * 1000; |
| 152 | } |
| 153 | |
| 154 | static u32 read_clock_frequency(struct intel_uncore *uncore) |
| 155 | { |
| 156 | if (GRAPHICS_VER(uncore->i915) >= 11) |
| 157 | return gen11_read_clock_frequency(uncore); |
| 158 | else if (GRAPHICS_VER(uncore->i915) >= 9) |
| 159 | return gen9_read_clock_frequency(uncore); |
| 160 | else if (GRAPHICS_VER(uncore->i915) >= 6) |
| 161 | return gen6_read_clock_frequency(uncore); |
| 162 | else if (GRAPHICS_VER(uncore->i915) == 5) |
| 163 | return gen5_read_clock_frequency(uncore); |
| 164 | else if (IS_G4X(uncore->i915)) |
| 165 | return g4x_read_clock_frequency(uncore); |
| 166 | else if (GRAPHICS_VER(uncore->i915) == 4) |
| 167 | return gen4_read_clock_frequency(uncore); |
| 168 | else |
| 169 | return 0; |
| 170 | } |
| 171 | |
| 172 | void intel_gt_init_clock_frequency(struct intel_gt *gt) |
| 173 | { |
| 174 | gt->clock_frequency = read_clock_frequency(uncore: gt->uncore); |
| 175 | |
| 176 | /* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */ |
| 177 | if (GRAPHICS_VER(gt->i915) == 11) |
| 178 | gt->clock_period_ns = NSEC_PER_SEC / 13750000; |
| 179 | else if (gt->clock_frequency) |
| 180 | gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, count: 1); |
| 181 | |
| 182 | GT_TRACE(gt, |
| 183 | "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n" , |
| 184 | gt->clock_frequency / 1000, |
| 185 | gt->clock_period_ns, |
| 186 | div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX), |
| 187 | USEC_PER_SEC)); |
| 188 | } |
| 189 | |
| 190 | #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) |
| 191 | void intel_gt_check_clock_frequency(const struct intel_gt *gt) |
| 192 | { |
| 193 | if (gt->clock_frequency != read_clock_frequency(gt->uncore)) { |
| 194 | gt_err(gt, "GT clock frequency changed, was %uHz, now %uHz!\n" , |
| 195 | gt->clock_frequency, |
| 196 | read_clock_frequency(gt->uncore)); |
| 197 | } |
| 198 | } |
| 199 | #endif |
| 200 | |
| 201 | static u64 div_u64_roundup(u64 nom, u32 den) |
| 202 | { |
| 203 | return div_u64(dividend: nom + den - 1, divisor: den); |
| 204 | } |
| 205 | |
| 206 | u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) |
| 207 | { |
| 208 | return mul_u64_u32_div(a: count, NSEC_PER_SEC, div: gt->clock_frequency); |
| 209 | } |
| 210 | |
| 211 | u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) |
| 212 | { |
| 213 | return intel_gt_clock_interval_to_ns(gt, count: 16 * count); |
| 214 | } |
| 215 | |
| 216 | u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) |
| 217 | { |
| 218 | return mul_u64_u32_div(a: ns, mul: gt->clock_frequency, NSEC_PER_SEC); |
| 219 | } |
| 220 | |
| 221 | u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) |
| 222 | { |
| 223 | u64 val; |
| 224 | |
| 225 | /* |
| 226 | * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS |
| 227 | * 8300) freezing up around GPU hangs. Looks as if even |
| 228 | * scheduling/timer interrupts start misbehaving if the RPS |
| 229 | * EI/thresholds are "bad", leading to a very sluggish or even |
| 230 | * frozen machine. |
| 231 | */ |
| 232 | val = div_u64_roundup(nom: intel_gt_ns_to_clock_interval(gt, ns), den: 16); |
| 233 | if (GRAPHICS_VER(gt->i915) == 6) |
| 234 | val = div_u64_roundup(nom: val, den: 25) * 25; |
| 235 | |
| 236 | return val; |
| 237 | } |
| 238 | |