| 1 | /* |
| 2 | * Copyright 2016 Advanced Micro Devices, Inc. |
| 3 | * All Rights Reserved. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | * copy of this software and associated documentation files (the |
| 7 | * "Software"), to deal in the Software without restriction, including |
| 8 | * without limitation the rights to use, copy, modify, merge, publish, |
| 9 | * distribute, sub license, and/or sell copies of the Software, and to |
| 10 | * permit persons to whom the Software is furnished to do so, subject to |
| 11 | * the following conditions: |
| 12 | * |
| 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| 16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| 17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| 19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 20 | * |
| 21 | * The above copyright notice and this permission notice (including the |
| 22 | * next paragraph) shall be included in all copies or substantial portions |
| 23 | * of the Software. |
| 24 | * |
| 25 | */ |
| 26 | |
| 27 | #include <linux/firmware.h> |
| 28 | #include <drm/drm_drv.h> |
| 29 | |
| 30 | #include "amdgpu.h" |
| 31 | #include "amdgpu_vce.h" |
| 32 | #include "soc15.h" |
| 33 | #include "soc15d.h" |
| 34 | #include "soc15_common.h" |
| 35 | #include "mmsch_v1_0.h" |
| 36 | |
| 37 | #include "vce/vce_4_0_offset.h" |
| 38 | #include "vce/vce_4_0_default.h" |
| 39 | #include "vce/vce_4_0_sh_mask.h" |
| 40 | #include "mmhub/mmhub_1_0_offset.h" |
| 41 | #include "mmhub/mmhub_1_0_sh_mask.h" |
| 42 | |
| 43 | #include "ivsrcid/vce/irqsrcs_vce_4_0.h" |
| 44 | |
| 45 | #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 |
| 46 | |
| 47 | #define VCE_V4_0_FW_SIZE (384 * 1024) |
| 48 | #define VCE_V4_0_STACK_SIZE (64 * 1024) |
| 49 | #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) |
| 50 | |
| 51 | static void vce_v4_0_mc_resume(struct amdgpu_device *adev); |
| 52 | static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); |
| 53 | static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); |
| 54 | |
| 55 | /** |
| 56 | * vce_v4_0_ring_get_rptr - get read pointer |
| 57 | * |
| 58 | * @ring: amdgpu_ring pointer |
| 59 | * |
| 60 | * Returns the current hardware read pointer |
| 61 | */ |
| 62 | static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) |
| 63 | { |
| 64 | struct amdgpu_device *adev = ring->adev; |
| 65 | |
| 66 | if (ring->me == 0) |
| 67 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); |
| 68 | else if (ring->me == 1) |
| 69 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); |
| 70 | else |
| 71 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); |
| 72 | } |
| 73 | |
| 74 | /** |
| 75 | * vce_v4_0_ring_get_wptr - get write pointer |
| 76 | * |
| 77 | * @ring: amdgpu_ring pointer |
| 78 | * |
| 79 | * Returns the current hardware write pointer |
| 80 | */ |
| 81 | static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) |
| 82 | { |
| 83 | struct amdgpu_device *adev = ring->adev; |
| 84 | |
| 85 | if (ring->use_doorbell) |
| 86 | return *ring->wptr_cpu_addr; |
| 87 | |
| 88 | if (ring->me == 0) |
| 89 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); |
| 90 | else if (ring->me == 1) |
| 91 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); |
| 92 | else |
| 93 | return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * vce_v4_0_ring_set_wptr - set write pointer |
| 98 | * |
| 99 | * @ring: amdgpu_ring pointer |
| 100 | * |
| 101 | * Commits the write pointer to the hardware |
| 102 | */ |
| 103 | static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) |
| 104 | { |
| 105 | struct amdgpu_device *adev = ring->adev; |
| 106 | |
| 107 | if (ring->use_doorbell) { |
| 108 | /* XXX check if swapping is necessary on BE */ |
| 109 | *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); |
| 110 | WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); |
| 111 | return; |
| 112 | } |
| 113 | |
| 114 | if (ring->me == 0) |
| 115 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), |
| 116 | lower_32_bits(ring->wptr)); |
| 117 | else if (ring->me == 1) |
| 118 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), |
| 119 | lower_32_bits(ring->wptr)); |
| 120 | else |
| 121 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), |
| 122 | lower_32_bits(ring->wptr)); |
| 123 | } |
| 124 | |
| 125 | static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) |
| 126 | { |
| 127 | int i, j; |
| 128 | |
| 129 | for (i = 0; i < 10; ++i) { |
| 130 | for (j = 0; j < 100; ++j) { |
| 131 | uint32_t status = |
| 132 | RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); |
| 133 | |
| 134 | if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) |
| 135 | return 0; |
| 136 | mdelay(10); |
| 137 | } |
| 138 | |
| 139 | DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n" ); |
| 140 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), |
| 141 | VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, |
| 142 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); |
| 143 | mdelay(10); |
| 144 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, |
| 145 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); |
| 146 | mdelay(10); |
| 147 | |
| 148 | } |
| 149 | |
| 150 | return -ETIMEDOUT; |
| 151 | } |
| 152 | |
| 153 | static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, |
| 154 | struct amdgpu_mm_table *table) |
| 155 | { |
| 156 | uint32_t data = 0, loop; |
| 157 | uint64_t addr = table->gpu_addr; |
| 158 | struct mmsch_v1_0_init_header * = (struct mmsch_v1_0_init_header *)table->cpu_addr; |
| 159 | uint32_t size; |
| 160 | |
| 161 | size = header->header_size + header->vce_table_size + header->uvd_table_size; |
| 162 | |
| 163 | /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ |
| 164 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); |
| 165 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); |
| 166 | |
| 167 | /* 2, update vmid of descriptor */ |
| 168 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); |
| 169 | data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; |
| 170 | data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ |
| 171 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); |
| 172 | |
| 173 | /* 3, notify mmsch about the size of this descriptor */ |
| 174 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); |
| 175 | |
| 176 | /* 4, set resp to zero */ |
| 177 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); |
| 178 | |
| 179 | WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); |
| 180 | *adev->vce.ring[0].wptr_cpu_addr = 0; |
| 181 | adev->vce.ring[0].wptr = 0; |
| 182 | adev->vce.ring[0].wptr_old = 0; |
| 183 | |
| 184 | /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ |
| 185 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); |
| 186 | |
| 187 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); |
| 188 | loop = 1000; |
| 189 | while ((data & 0x10000002) != 0x10000002) { |
| 190 | udelay(usec: 10); |
| 191 | data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); |
| 192 | loop--; |
| 193 | if (!loop) |
| 194 | break; |
| 195 | } |
| 196 | |
| 197 | if (!loop) { |
| 198 | dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n" , data); |
| 199 | return -EBUSY; |
| 200 | } |
| 201 | |
| 202 | return 0; |
| 203 | } |
| 204 | |
| 205 | static int vce_v4_0_sriov_start(struct amdgpu_device *adev) |
| 206 | { |
| 207 | struct amdgpu_ring *ring; |
| 208 | uint32_t offset, size; |
| 209 | uint32_t table_size = 0; |
| 210 | struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; |
| 211 | struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; |
| 212 | struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; |
| 213 | struct mmsch_v1_0_cmd_end end = { { 0 } }; |
| 214 | uint32_t *init_table = adev->virt.mm_table.cpu_addr; |
| 215 | struct mmsch_v1_0_init_header * = (struct mmsch_v1_0_init_header *)init_table; |
| 216 | |
| 217 | direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; |
| 218 | direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; |
| 219 | direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; |
| 220 | end.cmd_header.command_type = MMSCH_COMMAND__END; |
| 221 | |
| 222 | if (header->vce_table_offset == 0 && header->vce_table_size == 0) { |
| 223 | header->version = MMSCH_VERSION; |
| 224 | header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; |
| 225 | |
| 226 | if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) |
| 227 | header->vce_table_offset = header->header_size; |
| 228 | else |
| 229 | header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; |
| 230 | |
| 231 | init_table += header->vce_table_offset; |
| 232 | |
| 233 | ring = &adev->vce.ring[0]; |
| 234 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), |
| 235 | lower_32_bits(ring->gpu_addr)); |
| 236 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), |
| 237 | upper_32_bits(ring->gpu_addr)); |
| 238 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), |
| 239 | ring->ring_size / 4); |
| 240 | |
| 241 | /* BEGING OF MC_RESUME */ |
| 242 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); |
| 243 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); |
| 244 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); |
| 245 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); |
| 246 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); |
| 247 | |
| 248 | offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
| 249 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 250 | uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; |
| 251 | uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; |
| 252 | uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; |
| 253 | |
| 254 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 255 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); |
| 256 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 257 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
| 258 | (tmr_mc_addr >> 40) & 0xff); |
| 259 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); |
| 260 | } else { |
| 261 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 262 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
| 263 | adev->vce.gpu_addr >> 8); |
| 264 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 265 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
| 266 | (adev->vce.gpu_addr >> 40) & 0xff); |
| 267 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), |
| 268 | offset & ~0x0f000000); |
| 269 | |
| 270 | } |
| 271 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 272 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), |
| 273 | adev->vce.gpu_addr >> 8); |
| 274 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 275 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), |
| 276 | (adev->vce.gpu_addr >> 40) & 0xff); |
| 277 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 278 | mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), |
| 279 | adev->vce.gpu_addr >> 8); |
| 280 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, |
| 281 | mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), |
| 282 | (adev->vce.gpu_addr >> 40) & 0xff); |
| 283 | |
| 284 | size = VCE_V4_0_FW_SIZE; |
| 285 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); |
| 286 | |
| 287 | offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; |
| 288 | size = VCE_V4_0_STACK_SIZE; |
| 289 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), |
| 290 | (offset & ~0x0f000000) | (1 << 24)); |
| 291 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); |
| 292 | |
| 293 | offset += size; |
| 294 | size = VCE_V4_0_DATA_SIZE; |
| 295 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), |
| 296 | (offset & ~0x0f000000) | (2 << 24)); |
| 297 | MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); |
| 298 | |
| 299 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); |
| 300 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), |
| 301 | VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, |
| 302 | VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); |
| 303 | |
| 304 | /* end of MC_RESUME */ |
| 305 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), |
| 306 | VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); |
| 307 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), |
| 308 | ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); |
| 309 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), |
| 310 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); |
| 311 | |
| 312 | MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), |
| 313 | VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, |
| 314 | VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); |
| 315 | |
| 316 | /* clear BUSY flag */ |
| 317 | MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), |
| 318 | ~VCE_STATUS__JOB_BUSY_MASK, 0); |
| 319 | |
| 320 | /* add end packet */ |
| 321 | memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); |
| 322 | table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; |
| 323 | header->vce_table_size = table_size; |
| 324 | } |
| 325 | |
| 326 | return vce_v4_0_mmsch_start(adev, table: &adev->virt.mm_table); |
| 327 | } |
| 328 | |
| 329 | /** |
| 330 | * vce_v4_0_start - start VCE block |
| 331 | * |
| 332 | * @adev: amdgpu_device pointer |
| 333 | * |
| 334 | * Setup and start the VCE block |
| 335 | */ |
| 336 | static int vce_v4_0_start(struct amdgpu_device *adev) |
| 337 | { |
| 338 | struct amdgpu_ring *ring; |
| 339 | int r; |
| 340 | |
| 341 | ring = &adev->vce.ring[0]; |
| 342 | |
| 343 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); |
| 344 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); |
| 345 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); |
| 346 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); |
| 347 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); |
| 348 | |
| 349 | ring = &adev->vce.ring[1]; |
| 350 | |
| 351 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); |
| 352 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); |
| 353 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); |
| 354 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); |
| 355 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); |
| 356 | |
| 357 | ring = &adev->vce.ring[2]; |
| 358 | |
| 359 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); |
| 360 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); |
| 361 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); |
| 362 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); |
| 363 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); |
| 364 | |
| 365 | vce_v4_0_mc_resume(adev); |
| 366 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, |
| 367 | ~VCE_STATUS__JOB_BUSY_MASK); |
| 368 | |
| 369 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); |
| 370 | |
| 371 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, |
| 372 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); |
| 373 | mdelay(100); |
| 374 | |
| 375 | r = vce_v4_0_firmware_loaded(adev); |
| 376 | |
| 377 | /* clear BUSY flag */ |
| 378 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); |
| 379 | |
| 380 | if (r) { |
| 381 | DRM_ERROR("VCE not responding, giving up!!!\n" ); |
| 382 | return r; |
| 383 | } |
| 384 | |
| 385 | return 0; |
| 386 | } |
| 387 | |
| 388 | static int vce_v4_0_stop(struct amdgpu_device *adev) |
| 389 | { |
| 390 | |
| 391 | /* Disable VCPU */ |
| 392 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); |
| 393 | |
| 394 | /* hold on ECPU */ |
| 395 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), |
| 396 | VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, |
| 397 | ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); |
| 398 | |
| 399 | /* clear VCE_STATUS */ |
| 400 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0); |
| 401 | |
| 402 | /* Set Clock-Gating off */ |
| 403 | /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) |
| 404 | vce_v4_0_set_vce_sw_clock_gating(adev, false); |
| 405 | */ |
| 406 | |
| 407 | return 0; |
| 408 | } |
| 409 | |
| 410 | static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block) |
| 411 | { |
| 412 | struct amdgpu_device *adev = ip_block->adev; |
| 413 | int r; |
| 414 | |
| 415 | r = amdgpu_vce_early_init(adev); |
| 416 | if (r) |
| 417 | return r; |
| 418 | |
| 419 | if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ |
| 420 | adev->vce.num_rings = 1; |
| 421 | else |
| 422 | adev->vce.num_rings = 3; |
| 423 | |
| 424 | vce_v4_0_set_ring_funcs(adev); |
| 425 | vce_v4_0_set_irq_funcs(adev); |
| 426 | |
| 427 | return 0; |
| 428 | } |
| 429 | |
| 430 | static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block) |
| 431 | { |
| 432 | struct amdgpu_device *adev = ip_block->adev; |
| 433 | struct amdgpu_ring *ring; |
| 434 | |
| 435 | unsigned size; |
| 436 | int r, i; |
| 437 | |
| 438 | r = amdgpu_irq_add_id(adev, client_id: SOC15_IH_CLIENTID_VCE0, src_id: 167, source: &adev->vce.irq); |
| 439 | if (r) |
| 440 | return r; |
| 441 | |
| 442 | size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; |
| 443 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
| 444 | size += VCE_V4_0_FW_SIZE; |
| 445 | |
| 446 | r = amdgpu_vce_sw_init(adev, size); |
| 447 | if (r) |
| 448 | return r; |
| 449 | |
| 450 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 451 | const struct common_firmware_header *hdr; |
| 452 | unsigned size = amdgpu_bo_size(bo: adev->vce.vcpu_bo); |
| 453 | |
| 454 | adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL); |
| 455 | if (!adev->vce.saved_bo) |
| 456 | return -ENOMEM; |
| 457 | |
| 458 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; |
| 459 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; |
| 460 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; |
| 461 | adev->firmware.fw_size += |
| 462 | ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); |
| 463 | DRM_INFO("PSP loading VCE firmware\n" ); |
| 464 | } else { |
| 465 | r = amdgpu_vce_resume(adev); |
| 466 | if (r) |
| 467 | return r; |
| 468 | } |
| 469 | |
| 470 | for (i = 0; i < adev->vce.num_rings; i++) { |
| 471 | enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(ring: i); |
| 472 | |
| 473 | ring = &adev->vce.ring[i]; |
| 474 | ring->vm_hub = AMDGPU_MMHUB0(0); |
| 475 | sprintf(buf: ring->name, fmt: "vce%d" , i); |
| 476 | if (amdgpu_sriov_vf(adev)) { |
| 477 | /* DOORBELL only works under SRIOV */ |
| 478 | ring->use_doorbell = true; |
| 479 | |
| 480 | /* currently only use the first encoding ring for sriov, |
| 481 | * so set unused location for other unused rings. |
| 482 | */ |
| 483 | if (i == 0) |
| 484 | ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; |
| 485 | else |
| 486 | ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; |
| 487 | } |
| 488 | r = amdgpu_ring_init(adev, ring, max_dw: 512, irq_src: &adev->vce.irq, irq_type: 0, |
| 489 | hw_prio, NULL); |
| 490 | if (r) |
| 491 | return r; |
| 492 | } |
| 493 | |
| 494 | r = amdgpu_virt_alloc_mm_table(adev); |
| 495 | if (r) |
| 496 | return r; |
| 497 | |
| 498 | return r; |
| 499 | } |
| 500 | |
| 501 | static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) |
| 502 | { |
| 503 | int r; |
| 504 | struct amdgpu_device *adev = ip_block->adev; |
| 505 | |
| 506 | /* free MM table */ |
| 507 | amdgpu_virt_free_mm_table(adev); |
| 508 | |
| 509 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 510 | kvfree(addr: adev->vce.saved_bo); |
| 511 | adev->vce.saved_bo = NULL; |
| 512 | } |
| 513 | |
| 514 | r = amdgpu_vce_suspend(adev); |
| 515 | if (r) |
| 516 | return r; |
| 517 | |
| 518 | return amdgpu_vce_sw_fini(adev); |
| 519 | } |
| 520 | |
| 521 | static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block) |
| 522 | { |
| 523 | int r, i; |
| 524 | struct amdgpu_device *adev = ip_block->adev; |
| 525 | |
| 526 | if (amdgpu_sriov_vf(adev)) |
| 527 | r = vce_v4_0_sriov_start(adev); |
| 528 | else |
| 529 | r = vce_v4_0_start(adev); |
| 530 | if (r) |
| 531 | return r; |
| 532 | |
| 533 | for (i = 0; i < adev->vce.num_rings; i++) { |
| 534 | r = amdgpu_ring_test_helper(ring: &adev->vce.ring[i]); |
| 535 | if (r) |
| 536 | return r; |
| 537 | } |
| 538 | |
| 539 | DRM_INFO("VCE initialized successfully.\n" ); |
| 540 | |
| 541 | return 0; |
| 542 | } |
| 543 | |
| 544 | static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) |
| 545 | { |
| 546 | struct amdgpu_device *adev = ip_block->adev; |
| 547 | |
| 548 | cancel_delayed_work_sync(dwork: &adev->vce.idle_work); |
| 549 | |
| 550 | if (!amdgpu_sriov_vf(adev)) { |
| 551 | /* vce_v4_0_wait_for_idle(ip_block); */ |
| 552 | vce_v4_0_stop(adev); |
| 553 | } else { |
| 554 | /* full access mode, so don't touch any VCE register */ |
| 555 | DRM_DEBUG("For SRIOV client, shouldn't do anything.\n" ); |
| 556 | } |
| 557 | |
| 558 | return 0; |
| 559 | } |
| 560 | |
| 561 | static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) |
| 562 | { |
| 563 | struct amdgpu_device *adev = ip_block->adev; |
| 564 | int r, idx; |
| 565 | |
| 566 | if (adev->vce.vcpu_bo == NULL) |
| 567 | return 0; |
| 568 | |
| 569 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
| 570 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 571 | unsigned size = amdgpu_bo_size(bo: adev->vce.vcpu_bo); |
| 572 | void *ptr = adev->vce.cpu_addr; |
| 573 | |
| 574 | memcpy_fromio(adev->vce.saved_bo, ptr, size); |
| 575 | } |
| 576 | drm_dev_exit(idx); |
| 577 | } |
| 578 | |
| 579 | /* |
| 580 | * Proper cleanups before halting the HW engine: |
| 581 | * - cancel the delayed idle work |
| 582 | * - enable powergating |
| 583 | * - enable clockgating |
| 584 | * - disable dpm |
| 585 | * |
| 586 | * TODO: to align with the VCN implementation, move the |
| 587 | * jobs for clockgating/powergating/dpm setting to |
| 588 | * ->set_powergating_state(). |
| 589 | */ |
| 590 | cancel_delayed_work_sync(dwork: &adev->vce.idle_work); |
| 591 | |
| 592 | if (adev->pm.dpm_enabled) { |
| 593 | amdgpu_dpm_enable_vce(adev, enable: false); |
| 594 | } else { |
| 595 | amdgpu_asic_set_vce_clocks(adev, 0, 0); |
| 596 | amdgpu_device_ip_set_powergating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE, |
| 597 | state: AMD_PG_STATE_GATE); |
| 598 | amdgpu_device_ip_set_clockgating_state(dev: adev, block_type: AMD_IP_BLOCK_TYPE_VCE, |
| 599 | state: AMD_CG_STATE_GATE); |
| 600 | } |
| 601 | |
| 602 | r = vce_v4_0_hw_fini(ip_block); |
| 603 | if (r) |
| 604 | return r; |
| 605 | |
| 606 | return amdgpu_vce_suspend(adev); |
| 607 | } |
| 608 | |
| 609 | static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) |
| 610 | { |
| 611 | struct amdgpu_device *adev = ip_block->adev; |
| 612 | int r, idx; |
| 613 | |
| 614 | if (adev->vce.vcpu_bo == NULL) |
| 615 | return -EINVAL; |
| 616 | |
| 617 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 618 | |
| 619 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
| 620 | unsigned size = amdgpu_bo_size(bo: adev->vce.vcpu_bo); |
| 621 | void *ptr = adev->vce.cpu_addr; |
| 622 | |
| 623 | memcpy_toio(ptr, adev->vce.saved_bo, size); |
| 624 | drm_dev_exit(idx); |
| 625 | } |
| 626 | } else { |
| 627 | r = amdgpu_vce_resume(adev); |
| 628 | if (r) |
| 629 | return r; |
| 630 | } |
| 631 | |
| 632 | return vce_v4_0_hw_init(ip_block); |
| 633 | } |
| 634 | |
| 635 | static void vce_v4_0_mc_resume(struct amdgpu_device *adev) |
| 636 | { |
| 637 | uint32_t offset, size; |
| 638 | uint64_t tmr_mc_addr; |
| 639 | |
| 640 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); |
| 641 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); |
| 642 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); |
| 643 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); |
| 644 | |
| 645 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); |
| 646 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); |
| 647 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); |
| 648 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); |
| 649 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); |
| 650 | |
| 651 | offset = AMDGPU_VCE_FIRMWARE_OFFSET; |
| 652 | |
| 653 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
| 654 | tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | |
| 655 | adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; |
| 656 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
| 657 | (tmr_mc_addr >> 8)); |
| 658 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
| 659 | (tmr_mc_addr >> 40) & 0xff); |
| 660 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); |
| 661 | } else { |
| 662 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), |
| 663 | (adev->vce.gpu_addr >> 8)); |
| 664 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), |
| 665 | (adev->vce.gpu_addr >> 40) & 0xff); |
| 666 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); |
| 667 | } |
| 668 | |
| 669 | size = VCE_V4_0_FW_SIZE; |
| 670 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); |
| 671 | |
| 672 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); |
| 673 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); |
| 674 | offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; |
| 675 | size = VCE_V4_0_STACK_SIZE; |
| 676 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); |
| 677 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); |
| 678 | |
| 679 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); |
| 680 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); |
| 681 | offset += size; |
| 682 | size = VCE_V4_0_DATA_SIZE; |
| 683 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); |
| 684 | WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); |
| 685 | |
| 686 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); |
| 687 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), |
| 688 | VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, |
| 689 | ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); |
| 690 | } |
| 691 | |
| 692 | static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, |
| 693 | enum amd_clockgating_state state) |
| 694 | { |
| 695 | /* needed for driver unload*/ |
| 696 | return 0; |
| 697 | } |
| 698 | |
| 699 | static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, |
| 700 | enum amd_powergating_state state) |
| 701 | { |
| 702 | /* This doesn't actually powergate the VCE block. |
| 703 | * That's done in the dpm code via the SMC. This |
| 704 | * just re-inits the block as necessary. The actual |
| 705 | * gating still happens in the dpm code. We should |
| 706 | * revisit this when there is a cleaner line between |
| 707 | * the smc and the hw blocks |
| 708 | */ |
| 709 | struct amdgpu_device *adev = ip_block->adev; |
| 710 | |
| 711 | if (state == AMD_PG_STATE_GATE) |
| 712 | return vce_v4_0_stop(adev); |
| 713 | else |
| 714 | return vce_v4_0_start(adev); |
| 715 | } |
| 716 | |
| 717 | static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, |
| 718 | struct amdgpu_ib *ib, uint32_t flags) |
| 719 | { |
| 720 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
| 721 | |
| 722 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); |
| 723 | amdgpu_ring_write(ring, v: vmid); |
| 724 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
| 725 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
| 726 | amdgpu_ring_write(ring, v: ib->length_dw); |
| 727 | } |
| 728 | |
| 729 | static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, |
| 730 | u64 seq, unsigned flags) |
| 731 | { |
| 732 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
| 733 | |
| 734 | amdgpu_ring_write(ring, VCE_CMD_FENCE); |
| 735 | amdgpu_ring_write(ring, v: addr); |
| 736 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
| 737 | amdgpu_ring_write(ring, v: seq); |
| 738 | amdgpu_ring_write(ring, VCE_CMD_TRAP); |
| 739 | } |
| 740 | |
| 741 | static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) |
| 742 | { |
| 743 | amdgpu_ring_write(ring, VCE_CMD_END); |
| 744 | } |
| 745 | |
| 746 | static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, |
| 747 | uint32_t val, uint32_t mask) |
| 748 | { |
| 749 | amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); |
| 750 | amdgpu_ring_write(ring, v: reg << 2); |
| 751 | amdgpu_ring_write(ring, v: mask); |
| 752 | amdgpu_ring_write(ring, v: val); |
| 753 | } |
| 754 | |
| 755 | static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, |
| 756 | unsigned int vmid, uint64_t pd_addr) |
| 757 | { |
| 758 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; |
| 759 | |
| 760 | pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
| 761 | |
| 762 | /* wait for reg writes */ |
| 763 | vce_v4_0_emit_reg_wait(ring, reg: hub->ctx0_ptb_addr_lo32 + |
| 764 | vmid * hub->ctx_addr_distance, |
| 765 | lower_32_bits(pd_addr), mask: 0xffffffff); |
| 766 | } |
| 767 | |
| 768 | static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, |
| 769 | uint32_t reg, uint32_t val) |
| 770 | { |
| 771 | amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); |
| 772 | amdgpu_ring_write(ring, v: reg << 2); |
| 773 | amdgpu_ring_write(ring, v: val); |
| 774 | } |
| 775 | |
| 776 | static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, |
| 777 | struct amdgpu_irq_src *source, |
| 778 | unsigned type, |
| 779 | enum amdgpu_interrupt_state state) |
| 780 | { |
| 781 | uint32_t val = 0; |
| 782 | |
| 783 | if (!amdgpu_sriov_vf(adev)) { |
| 784 | if (state == AMDGPU_IRQ_STATE_ENABLE) |
| 785 | val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; |
| 786 | |
| 787 | WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, |
| 788 | ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); |
| 789 | } |
| 790 | return 0; |
| 791 | } |
| 792 | |
| 793 | static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, |
| 794 | struct amdgpu_irq_src *source, |
| 795 | struct amdgpu_iv_entry *entry) |
| 796 | { |
| 797 | DRM_DEBUG("IH: VCE\n" ); |
| 798 | |
| 799 | switch (entry->src_data[0]) { |
| 800 | case 0: |
| 801 | case 1: |
| 802 | case 2: |
| 803 | amdgpu_fence_process(ring: &adev->vce.ring[entry->src_data[0]]); |
| 804 | break; |
| 805 | default: |
| 806 | DRM_ERROR("Unhandled interrupt: %d %d\n" , |
| 807 | entry->src_id, entry->src_data[0]); |
| 808 | break; |
| 809 | } |
| 810 | |
| 811 | return 0; |
| 812 | } |
| 813 | |
| 814 | const struct amd_ip_funcs vce_v4_0_ip_funcs = { |
| 815 | .name = "vce_v4_0" , |
| 816 | .early_init = vce_v4_0_early_init, |
| 817 | .sw_init = vce_v4_0_sw_init, |
| 818 | .sw_fini = vce_v4_0_sw_fini, |
| 819 | .hw_init = vce_v4_0_hw_init, |
| 820 | .hw_fini = vce_v4_0_hw_fini, |
| 821 | .suspend = vce_v4_0_suspend, |
| 822 | .resume = vce_v4_0_resume, |
| 823 | .set_clockgating_state = vce_v4_0_set_clockgating_state, |
| 824 | .set_powergating_state = vce_v4_0_set_powergating_state, |
| 825 | }; |
| 826 | |
| 827 | static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { |
| 828 | .type = AMDGPU_RING_TYPE_VCE, |
| 829 | .align_mask = 0x3f, |
| 830 | .nop = VCE_CMD_NO_OP, |
| 831 | .support_64bit_ptrs = false, |
| 832 | .no_user_fence = true, |
| 833 | .get_rptr = vce_v4_0_ring_get_rptr, |
| 834 | .get_wptr = vce_v4_0_ring_get_wptr, |
| 835 | .set_wptr = vce_v4_0_ring_set_wptr, |
| 836 | .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, |
| 837 | .emit_frame_size = |
| 838 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + |
| 839 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + |
| 840 | 4 + /* vce_v4_0_emit_vm_flush */ |
| 841 | 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ |
| 842 | 1, /* vce_v4_0_ring_insert_end */ |
| 843 | .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ |
| 844 | .emit_ib = vce_v4_0_ring_emit_ib, |
| 845 | .emit_vm_flush = vce_v4_0_emit_vm_flush, |
| 846 | .emit_fence = vce_v4_0_ring_emit_fence, |
| 847 | .test_ring = amdgpu_vce_ring_test_ring, |
| 848 | .test_ib = amdgpu_vce_ring_test_ib, |
| 849 | .insert_nop = amdgpu_ring_insert_nop, |
| 850 | .insert_end = vce_v4_0_ring_insert_end, |
| 851 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| 852 | .begin_use = amdgpu_vce_ring_begin_use, |
| 853 | .end_use = amdgpu_vce_ring_end_use, |
| 854 | .emit_wreg = vce_v4_0_emit_wreg, |
| 855 | .emit_reg_wait = vce_v4_0_emit_reg_wait, |
| 856 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
| 857 | }; |
| 858 | |
| 859 | static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) |
| 860 | { |
| 861 | int i; |
| 862 | |
| 863 | for (i = 0; i < adev->vce.num_rings; i++) { |
| 864 | adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; |
| 865 | adev->vce.ring[i].me = i; |
| 866 | } |
| 867 | DRM_INFO("VCE enabled in VM mode\n" ); |
| 868 | } |
| 869 | |
| 870 | static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { |
| 871 | .set = vce_v4_0_set_interrupt_state, |
| 872 | .process = vce_v4_0_process_interrupt, |
| 873 | }; |
| 874 | |
| 875 | static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) |
| 876 | { |
| 877 | adev->vce.irq.num_types = 1; |
| 878 | adev->vce.irq.funcs = &vce_v4_0_irq_funcs; |
| 879 | }; |
| 880 | |
| 881 | const struct amdgpu_ip_block_version vce_v4_0_ip_block = |
| 882 | { |
| 883 | .type = AMD_IP_BLOCK_TYPE_VCE, |
| 884 | .major = 4, |
| 885 | .minor = 0, |
| 886 | .rev = 0, |
| 887 | .funcs = &vce_v4_0_ip_funcs, |
| 888 | }; |
| 889 | |