| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2014 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include <drm/drm_print.h> |
| 7 | |
| 8 | #include "gem/i915_gem_internal.h" |
| 9 | |
| 10 | #include "i915_drv.h" |
| 11 | #include "intel_renderstate.h" |
| 12 | #include "intel_context.h" |
| 13 | #include "intel_gpu_commands.h" |
| 14 | #include "intel_ring.h" |
| 15 | |
| 16 | static const struct intel_renderstate_rodata * |
| 17 | render_state_get_rodata(const struct intel_engine_cs *engine) |
| 18 | { |
| 19 | if (engine->class != RENDER_CLASS) |
| 20 | return NULL; |
| 21 | |
| 22 | switch (GRAPHICS_VER(engine->i915)) { |
| 23 | case 6: |
| 24 | return &gen6_null_state; |
| 25 | case 7: |
| 26 | return &gen7_null_state; |
| 27 | case 8: |
| 28 | return &gen8_null_state; |
| 29 | case 9: |
| 30 | return &gen9_null_state; |
| 31 | } |
| 32 | |
| 33 | return NULL; |
| 34 | } |
| 35 | |
| 36 | /* |
| 37 | * Macro to add commands to auxiliary batch. |
| 38 | * This macro only checks for page overflow before inserting the commands, |
| 39 | * this is sufficient as the null state generator makes the final batch |
| 40 | * with two passes to build command and state separately. At this point |
| 41 | * the size of both are known and it compacts them by relocating the state |
| 42 | * right after the commands taking care of alignment so we should sufficient |
| 43 | * space below them for adding new commands. |
| 44 | */ |
| 45 | #define OUT_BATCH(batch, i, val) \ |
| 46 | do { \ |
| 47 | if ((i) >= PAGE_SIZE / sizeof(u32)) \ |
| 48 | goto out; \ |
| 49 | (batch)[(i)++] = (val); \ |
| 50 | } while (0) |
| 51 | |
| 52 | static int render_state_setup(struct intel_renderstate *so, |
| 53 | struct drm_i915_private *i915) |
| 54 | { |
| 55 | const struct intel_renderstate_rodata *rodata = so->rodata; |
| 56 | unsigned int i = 0, reloc_index = 0; |
| 57 | int ret = -EINVAL; |
| 58 | u32 *d; |
| 59 | |
| 60 | d = i915_gem_object_pin_map(obj: so->vma->obj, type: I915_MAP_WB); |
| 61 | if (IS_ERR(ptr: d)) |
| 62 | return PTR_ERR(ptr: d); |
| 63 | |
| 64 | while (i < rodata->batch_items) { |
| 65 | u32 s = rodata->batch[i]; |
| 66 | |
| 67 | if (i * 4 == rodata->reloc[reloc_index]) { |
| 68 | u64 r = s + i915_vma_offset(vma: so->vma); |
| 69 | |
| 70 | s = lower_32_bits(r); |
| 71 | if (HAS_64BIT_RELOC(i915)) { |
| 72 | if (i + 1 >= rodata->batch_items || |
| 73 | rodata->batch[i + 1] != 0) |
| 74 | goto out; |
| 75 | |
| 76 | d[i++] = s; |
| 77 | s = upper_32_bits(r); |
| 78 | } |
| 79 | |
| 80 | reloc_index++; |
| 81 | } |
| 82 | |
| 83 | d[i++] = s; |
| 84 | } |
| 85 | |
| 86 | if (rodata->reloc[reloc_index] != -1) { |
| 87 | drm_err(&i915->drm, "only %d relocs resolved\n" , reloc_index); |
| 88 | goto out; |
| 89 | } |
| 90 | |
| 91 | so->batch_offset = i915_ggtt_offset(vma: so->vma); |
| 92 | so->batch_size = rodata->batch_items * sizeof(u32); |
| 93 | |
| 94 | while (i % CACHELINE_DWORDS) |
| 95 | OUT_BATCH(d, i, MI_NOOP); |
| 96 | |
| 97 | so->aux_offset = i * sizeof(u32); |
| 98 | |
| 99 | if (HAS_POOLED_EU(i915)) { |
| 100 | /* |
| 101 | * We always program 3x6 pool config but depending upon which |
| 102 | * subslice is disabled HW drops down to appropriate config |
| 103 | * shown below. |
| 104 | * |
| 105 | * In the below table 2x6 config always refers to |
| 106 | * fused-down version, native 2x6 is not available and can |
| 107 | * be ignored |
| 108 | * |
| 109 | * SNo subslices config eu pool configuration |
| 110 | * ----------------------------------------------------------- |
| 111 | * 1 3 subslices enabled (3x6) - 0x00777000 (9+9) |
| 112 | * 2 ss0 disabled (2x6) - 0x00777000 (3+9) |
| 113 | * 3 ss1 disabled (2x6) - 0x00770000 (6+6) |
| 114 | * 4 ss2 disabled (2x6) - 0x00007000 (9+3) |
| 115 | */ |
| 116 | u32 eu_pool_config = 0x00777000; |
| 117 | |
| 118 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); |
| 119 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); |
| 120 | OUT_BATCH(d, i, eu_pool_config); |
| 121 | OUT_BATCH(d, i, 0); |
| 122 | OUT_BATCH(d, i, 0); |
| 123 | OUT_BATCH(d, i, 0); |
| 124 | } |
| 125 | |
| 126 | OUT_BATCH(d, i, MI_BATCH_BUFFER_END); |
| 127 | so->aux_size = i * sizeof(u32) - so->aux_offset; |
| 128 | so->aux_offset += so->batch_offset; |
| 129 | /* |
| 130 | * Since we are sending length, we need to strictly conform to |
| 131 | * all requirements. For Gen2 this must be a multiple of 8. |
| 132 | */ |
| 133 | so->aux_size = ALIGN(so->aux_size, 8); |
| 134 | |
| 135 | ret = 0; |
| 136 | out: |
| 137 | __i915_gem_object_flush_map(obj: so->vma->obj, offset: 0, size: i * sizeof(u32)); |
| 138 | __i915_gem_object_release_map(obj: so->vma->obj); |
| 139 | return ret; |
| 140 | } |
| 141 | |
| 142 | #undef OUT_BATCH |
| 143 | |
| 144 | int intel_renderstate_init(struct intel_renderstate *so, |
| 145 | struct intel_context *ce) |
| 146 | { |
| 147 | struct intel_engine_cs *engine = ce->engine; |
| 148 | struct drm_i915_gem_object *obj = NULL; |
| 149 | int err; |
| 150 | |
| 151 | memset(so, 0, sizeof(*so)); |
| 152 | |
| 153 | so->rodata = render_state_get_rodata(engine); |
| 154 | if (so->rodata) { |
| 155 | if (so->rodata->batch_items * 4 > PAGE_SIZE) |
| 156 | return -EINVAL; |
| 157 | |
| 158 | obj = i915_gem_object_create_internal(i915: engine->i915, PAGE_SIZE); |
| 159 | if (IS_ERR(ptr: obj)) |
| 160 | return PTR_ERR(ptr: obj); |
| 161 | |
| 162 | so->vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL); |
| 163 | if (IS_ERR(ptr: so->vma)) { |
| 164 | err = PTR_ERR(ptr: so->vma); |
| 165 | goto err_obj; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | i915_gem_ww_ctx_init(ctx: &so->ww, intr: true); |
| 170 | retry: |
| 171 | err = intel_context_pin_ww(ce, ww: &so->ww); |
| 172 | if (err) |
| 173 | goto err_fini; |
| 174 | |
| 175 | /* return early if there's nothing to setup */ |
| 176 | if (!err && !so->rodata) |
| 177 | return 0; |
| 178 | |
| 179 | err = i915_gem_object_lock(obj: so->vma->obj, ww: &so->ww); |
| 180 | if (err) |
| 181 | goto err_context; |
| 182 | |
| 183 | err = i915_vma_pin_ww(vma: so->vma, ww: &so->ww, size: 0, alignment: 0, PIN_GLOBAL | PIN_HIGH); |
| 184 | if (err) |
| 185 | goto err_context; |
| 186 | |
| 187 | err = render_state_setup(so, i915: engine->i915); |
| 188 | if (err) |
| 189 | goto err_unpin; |
| 190 | |
| 191 | return 0; |
| 192 | |
| 193 | err_unpin: |
| 194 | i915_vma_unpin(vma: so->vma); |
| 195 | err_context: |
| 196 | intel_context_unpin(ce); |
| 197 | err_fini: |
| 198 | if (err == -EDEADLK) { |
| 199 | err = i915_gem_ww_ctx_backoff(ctx: &so->ww); |
| 200 | if (!err) |
| 201 | goto retry; |
| 202 | } |
| 203 | i915_gem_ww_ctx_fini(ctx: &so->ww); |
| 204 | err_obj: |
| 205 | if (obj) |
| 206 | i915_gem_object_put(obj); |
| 207 | so->vma = NULL; |
| 208 | return err; |
| 209 | } |
| 210 | |
| 211 | int intel_renderstate_emit(struct intel_renderstate *so, |
| 212 | struct i915_request *rq) |
| 213 | { |
| 214 | struct intel_engine_cs *engine = rq->engine; |
| 215 | int err; |
| 216 | |
| 217 | if (!so->vma) |
| 218 | return 0; |
| 219 | |
| 220 | err = i915_vma_move_to_active(vma: so->vma, rq, flags: 0); |
| 221 | if (err) |
| 222 | return err; |
| 223 | |
| 224 | err = engine->emit_bb_start(rq, |
| 225 | so->batch_offset, so->batch_size, |
| 226 | I915_DISPATCH_SECURE); |
| 227 | if (err) |
| 228 | return err; |
| 229 | |
| 230 | if (so->aux_size > 8) { |
| 231 | err = engine->emit_bb_start(rq, |
| 232 | so->aux_offset, so->aux_size, |
| 233 | I915_DISPATCH_SECURE); |
| 234 | if (err) |
| 235 | return err; |
| 236 | } |
| 237 | |
| 238 | return 0; |
| 239 | } |
| 240 | |
| 241 | void intel_renderstate_fini(struct intel_renderstate *so, |
| 242 | struct intel_context *ce) |
| 243 | { |
| 244 | if (so->vma) { |
| 245 | i915_vma_unpin(vma: so->vma); |
| 246 | i915_vma_close(vma: so->vma); |
| 247 | } |
| 248 | |
| 249 | intel_context_unpin(ce); |
| 250 | i915_gem_ww_ctx_fini(ctx: &so->ww); |
| 251 | |
| 252 | if (so->vma) |
| 253 | i915_gem_object_put(obj: so->vma->obj); |
| 254 | } |
| 255 | |