| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2022 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include "xe_tuning.h" |
| 7 | |
| 8 | #include <kunit/visibility.h> |
| 9 | |
| 10 | #include <drm/drm_managed.h> |
| 11 | #include <drm/drm_print.h> |
| 12 | |
| 13 | #include "regs/xe_gt_regs.h" |
| 14 | #include "xe_gt_types.h" |
| 15 | #include "xe_platform_types.h" |
| 16 | #include "xe_rtp.h" |
| 17 | |
| 18 | #undef XE_REG_MCR |
| 19 | #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) |
| 20 | |
| 21 | static const struct xe_rtp_entry_sr gt_tunings[] = { |
| 22 | { XE_RTP_NAME("Tuning: Blend Fill Caching Optimization Disable" ), |
| 23 | XE_RTP_RULES(PLATFORM(DG2)), |
| 24 | XE_RTP_ACTIONS(SET(XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS)) |
| 25 | }, |
| 26 | { XE_RTP_NAME("Tuning: 32B Access Enable" ), |
| 27 | XE_RTP_RULES(PLATFORM(DG2)), |
| 28 | XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS)) |
| 29 | }, |
| 30 | |
| 31 | /* Xe2 */ |
| 32 | |
| 33 | { XE_RTP_NAME("Tuning: L3 cache" ), |
| 34 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), |
| 35 | XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, |
| 36 | REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) |
| 37 | }, |
| 38 | { XE_RTP_NAME("Tuning: L3 cache - media" ), |
| 39 | XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), |
| 40 | XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, |
| 41 | REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) |
| 42 | }, |
| 43 | { XE_RTP_NAME("Tuning: Compression Overfetch" ), |
| 44 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), |
| 45 | FUNC(xe_rtp_match_has_flat_ccs)), |
| 46 | XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), |
| 47 | SET(CCCHKNREG1, L3CMPCTRL)) |
| 48 | }, |
| 49 | { XE_RTP_NAME("Tuning: Compression Overfetch - media" ), |
| 50 | XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), |
| 51 | XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), |
| 52 | SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) |
| 53 | }, |
| 54 | { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3" ), |
| 55 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), |
| 56 | XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) |
| 57 | }, |
| 58 | { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media" ), |
| 59 | XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)), |
| 60 | XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) |
| 61 | }, |
| 62 | { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only" ), |
| 63 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED), |
| 64 | FUNC(xe_rtp_match_has_flat_ccs)), |
| 65 | XE_RTP_ACTIONS(SET(L3SQCREG2, |
| 66 | COMPMEMRD256BOVRFETCHEN)) |
| 67 | }, |
| 68 | { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media" ), |
| 69 | XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), |
| 70 | FUNC(xe_rtp_match_has_flat_ccs)), |
| 71 | XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, |
| 72 | COMPMEMRD256BOVRFETCHEN)) |
| 73 | }, |
| 74 | { XE_RTP_NAME("Tuning: Stateless compression control" ), |
| 75 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), |
| 76 | XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, |
| 77 | REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) |
| 78 | }, |
| 79 | { XE_RTP_NAME("Tuning: Stateless compression control - media" ), |
| 80 | XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, XE_RTP_END_VERSION_UNDEFINED)), |
| 81 | XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, |
| 82 | REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) |
| 83 | }, |
| 84 | { XE_RTP_NAME("Tuning: L3 RW flush all Cache" ), |
| 85 | XE_RTP_RULES(GRAPHICS_VERSION(2004)), |
| 86 | XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) |
| 87 | }, |
| 88 | { XE_RTP_NAME("Tuning: L3 RW flush all cache - media" ), |
| 89 | XE_RTP_RULES(MEDIA_VERSION(2000)), |
| 90 | XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) |
| 91 | }, |
| 92 | }; |
| 93 | |
| 94 | static const struct xe_rtp_entry_sr engine_tunings[] = { |
| 95 | { XE_RTP_NAME("Tuning: L3 Hashing Mask" ), |
| 96 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), |
| 97 | FUNC(xe_rtp_match_first_render_or_compute)), |
| 98 | XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC)) |
| 99 | }, |
| 100 | { XE_RTP_NAME("Tuning: Set Indirect State Override" ), |
| 101 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), |
| 102 | ENGINE_CLASS(RENDER)), |
| 103 | XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) |
| 104 | }, |
| 105 | { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader" ), |
| 106 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), |
| 107 | FUNC(xe_rtp_match_first_render_or_compute)), |
| 108 | XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) |
| 109 | }, |
| 110 | }; |
| 111 | |
| 112 | static const struct xe_rtp_entry_sr lrc_tunings[] = { |
| 113 | /* DG2 */ |
| 114 | |
| 115 | { XE_RTP_NAME("Tuning: L3 cache" ), |
| 116 | XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), |
| 117 | XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, |
| 118 | REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) |
| 119 | }, |
| 120 | { XE_RTP_NAME("Tuning: TDS gang timer" ), |
| 121 | XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), |
| 122 | /* read verification is ignored as in i915 - need to check enabling */ |
| 123 | XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(XEHP_FF_MODE2, |
| 124 | FF_MODE2_TDS_TIMER_MASK, |
| 125 | FF_MODE2_TDS_TIMER_128)) |
| 126 | }, |
| 127 | { XE_RTP_NAME("Tuning: TBIMR fast clip" ), |
| 128 | XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), |
| 129 | XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) |
| 130 | }, |
| 131 | |
| 132 | /* Xe_LPG */ |
| 133 | |
| 134 | { XE_RTP_NAME("Tuning: L3 cache" ), |
| 135 | XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), |
| 136 | XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, |
| 137 | REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) |
| 138 | }, |
| 139 | |
| 140 | /* Xe2_HPG */ |
| 141 | |
| 142 | { XE_RTP_NAME("Tuning: vs hit max value" ), |
| 143 | XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), |
| 144 | XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, |
| 145 | REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) |
| 146 | }, |
| 147 | }; |
| 148 | |
| 149 | /** |
| 150 | * xe_tuning_init - initialize gt with tunings bookkeeping |
| 151 | * @gt: GT instance to initialize |
| 152 | * |
| 153 | * Returns 0 for success, negative error code otherwise. |
| 154 | */ |
| 155 | int xe_tuning_init(struct xe_gt *gt) |
| 156 | { |
| 157 | struct xe_device *xe = gt_to_xe(gt); |
| 158 | size_t n_lrc, n_engine, n_gt, total; |
| 159 | unsigned long *p; |
| 160 | |
| 161 | n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); |
| 162 | n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); |
| 163 | n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); |
| 164 | total = n_gt + n_engine + n_lrc; |
| 165 | |
| 166 | p = drmm_kzalloc(dev: &xe->drm, size: sizeof(*p) * total, GFP_KERNEL); |
| 167 | if (!p) |
| 168 | return -ENOMEM; |
| 169 | |
| 170 | gt->tuning_active.gt = p; |
| 171 | p += n_gt; |
| 172 | gt->tuning_active.engine = p; |
| 173 | p += n_engine; |
| 174 | gt->tuning_active.lrc = p; |
| 175 | |
| 176 | return 0; |
| 177 | } |
| 178 | ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ |
| 179 | |
| 180 | void xe_tuning_process_gt(struct xe_gt *gt) |
| 181 | { |
| 182 | struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); |
| 183 | |
| 184 | xe_rtp_process_ctx_enable_active_tracking(ctx: &ctx, |
| 185 | active_entries: gt->tuning_active.gt, |
| 186 | ARRAY_SIZE(gt_tunings)); |
| 187 | xe_rtp_process_to_sr(ctx: &ctx, entries: gt_tunings, ARRAY_SIZE(gt_tunings), sr: >->reg_sr); |
| 188 | } |
| 189 | EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); |
| 190 | |
| 191 | void xe_tuning_process_engine(struct xe_hw_engine *hwe) |
| 192 | { |
| 193 | struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); |
| 194 | |
| 195 | xe_rtp_process_ctx_enable_active_tracking(ctx: &ctx, |
| 196 | active_entries: hwe->gt->tuning_active.engine, |
| 197 | ARRAY_SIZE(engine_tunings)); |
| 198 | xe_rtp_process_to_sr(ctx: &ctx, entries: engine_tunings, ARRAY_SIZE(engine_tunings), |
| 199 | sr: &hwe->reg_sr); |
| 200 | } |
| 201 | EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); |
| 202 | |
| 203 | /** |
| 204 | * xe_tuning_process_lrc - process lrc tunings |
| 205 | * @hwe: engine instance to process tunings for |
| 206 | * |
| 207 | * Process LRC table for this platform, saving in @hwe all the tunings that need |
| 208 | * to be applied on context restore. These are tunings touching registers that |
| 209 | * are part of the HW context image. |
| 210 | */ |
| 211 | void xe_tuning_process_lrc(struct xe_hw_engine *hwe) |
| 212 | { |
| 213 | struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); |
| 214 | |
| 215 | xe_rtp_process_ctx_enable_active_tracking(ctx: &ctx, |
| 216 | active_entries: hwe->gt->tuning_active.lrc, |
| 217 | ARRAY_SIZE(lrc_tunings)); |
| 218 | xe_rtp_process_to_sr(ctx: &ctx, entries: lrc_tunings, ARRAY_SIZE(lrc_tunings), sr: &hwe->reg_lrc); |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * xe_tuning_dump() - Dump GT tuning info into a drm printer. |
| 223 | * @gt: the &xe_gt |
| 224 | * @p: the &drm_printer |
| 225 | * |
| 226 | * Return: always 0. |
| 227 | */ |
| 228 | int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) |
| 229 | { |
| 230 | size_t idx; |
| 231 | |
| 232 | drm_printf(p, f: "GT Tunings\n" ); |
| 233 | for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) |
| 234 | drm_printf_indent(p, 1, "%s\n" , gt_tunings[idx].name); |
| 235 | |
| 236 | drm_puts(p, str: "\n" ); |
| 237 | drm_printf(p, f: "Engine Tunings\n" ); |
| 238 | for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) |
| 239 | drm_printf_indent(p, 1, "%s\n" , engine_tunings[idx].name); |
| 240 | |
| 241 | drm_puts(p, str: "\n" ); |
| 242 | drm_printf(p, f: "LRC Tunings\n" ); |
| 243 | for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) |
| 244 | drm_printf_indent(p, 1, "%s\n" , lrc_tunings[idx].name); |
| 245 | |
| 246 | return 0; |
| 247 | } |
| 248 | |