| 1 | /* |
| 2 | * Copyright 2019 Advanced Micro Devices, Inc. |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice shall be included in |
| 12 | * all copies or substantial portions of the Software. |
| 13 | * |
| 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 20 | * OTHER DEALINGS IN THE SOFTWARE. |
| 21 | * |
| 22 | */ |
| 23 | |
| 24 | #include "amdgpu.h" |
| 25 | #include "amdgpu_jpeg.h" |
| 26 | #include "amdgpu_cs.h" |
| 27 | #include "soc15.h" |
| 28 | #include "soc15d.h" |
| 29 | #include "vcn_v1_0.h" |
| 30 | #include "jpeg_v1_0.h" |
| 31 | |
| 32 | #include "vcn/vcn_1_0_offset.h" |
| 33 | #include "vcn/vcn_1_0_sh_mask.h" |
| 34 | |
| 35 | static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); |
| 36 | static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); |
| 37 | static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); |
| 38 | static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, |
| 39 | struct amdgpu_job *job, |
| 40 | struct amdgpu_ib *ib); |
| 41 | |
| 42 | static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) |
| 43 | { |
| 44 | struct amdgpu_device *adev = ring->adev; |
| 45 | ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); |
| 46 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
| 47 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
| 48 | ring->ring[(*ptr)++] = 0; |
| 49 | ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); |
| 50 | } else { |
| 51 | ring->ring[(*ptr)++] = reg_offset; |
| 52 | ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); |
| 53 | } |
| 54 | ring->ring[(*ptr)++] = val; |
| 55 | } |
| 56 | |
| 57 | static void jpeg_v1_0_decode_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) |
| 58 | { |
| 59 | struct amdgpu_device *adev = ring->adev; |
| 60 | |
| 61 | uint32_t reg, reg_offset, val, mask, i; |
| 62 | |
| 63 | // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW |
| 64 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); |
| 65 | reg_offset = (reg << 2); |
| 66 | val = lower_32_bits(ring->gpu_addr); |
| 67 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 68 | |
| 69 | // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH |
| 70 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); |
| 71 | reg_offset = (reg << 2); |
| 72 | val = upper_32_bits(ring->gpu_addr); |
| 73 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 74 | |
| 75 | // 3rd to 5th: issue MEM_READ commands |
| 76 | for (i = 0; i <= 2; i++) { |
| 77 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); |
| 78 | ring->ring[ptr++] = 0; |
| 79 | } |
| 80 | |
| 81 | // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability |
| 82 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
| 83 | reg_offset = (reg << 2); |
| 84 | val = 0x13; |
| 85 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 86 | |
| 87 | // 7th: program mmUVD_JRBC_RB_REF_DATA |
| 88 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA); |
| 89 | reg_offset = (reg << 2); |
| 90 | val = 0x1; |
| 91 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 92 | |
| 93 | // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL |
| 94 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
| 95 | reg_offset = (reg << 2); |
| 96 | val = 0x1; |
| 97 | mask = 0x1; |
| 98 | |
| 99 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); |
| 100 | ring->ring[ptr++] = 0x01400200; |
| 101 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); |
| 102 | ring->ring[ptr++] = val; |
| 103 | ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); |
| 104 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
| 105 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
| 106 | ring->ring[ptr++] = 0; |
| 107 | ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); |
| 108 | } else { |
| 109 | ring->ring[ptr++] = reg_offset; |
| 110 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); |
| 111 | } |
| 112 | ring->ring[ptr++] = mask; |
| 113 | |
| 114 | //9th to 21st: insert no-op |
| 115 | for (i = 0; i <= 12; i++) { |
| 116 | ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); |
| 117 | ring->ring[ptr++] = 0; |
| 118 | } |
| 119 | |
| 120 | //22nd: reset mmUVD_JRBC_RB_RPTR |
| 121 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_RPTR); |
| 122 | reg_offset = (reg << 2); |
| 123 | val = 0; |
| 124 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 125 | |
| 126 | //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch |
| 127 | reg = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_CNTL); |
| 128 | reg_offset = (reg << 2); |
| 129 | val = 0x12; |
| 130 | jpeg_v1_0_decode_ring_patch_wreg(ring, ptr: &ptr, reg_offset, val); |
| 131 | } |
| 132 | |
| 133 | /** |
| 134 | * jpeg_v1_0_decode_ring_get_rptr - get read pointer |
| 135 | * |
| 136 | * @ring: amdgpu_ring pointer |
| 137 | * |
| 138 | * Returns the current hardware read pointer |
| 139 | */ |
| 140 | static uint64_t jpeg_v1_0_decode_ring_get_rptr(struct amdgpu_ring *ring) |
| 141 | { |
| 142 | struct amdgpu_device *adev = ring->adev; |
| 143 | |
| 144 | return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR); |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * jpeg_v1_0_decode_ring_get_wptr - get write pointer |
| 149 | * |
| 150 | * @ring: amdgpu_ring pointer |
| 151 | * |
| 152 | * Returns the current hardware write pointer |
| 153 | */ |
| 154 | static uint64_t jpeg_v1_0_decode_ring_get_wptr(struct amdgpu_ring *ring) |
| 155 | { |
| 156 | struct amdgpu_device *adev = ring->adev; |
| 157 | |
| 158 | return RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); |
| 159 | } |
| 160 | |
| 161 | /** |
| 162 | * jpeg_v1_0_decode_ring_set_wptr - set write pointer |
| 163 | * |
| 164 | * @ring: amdgpu_ring pointer |
| 165 | * |
| 166 | * Commits the write pointer to the hardware |
| 167 | */ |
| 168 | static void jpeg_v1_0_decode_ring_set_wptr(struct amdgpu_ring *ring) |
| 169 | { |
| 170 | struct amdgpu_device *adev = ring->adev; |
| 171 | |
| 172 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * jpeg_v1_0_decode_ring_insert_start - insert a start command |
| 177 | * |
| 178 | * @ring: amdgpu_ring pointer |
| 179 | * |
| 180 | * Write a start command to the ring. |
| 181 | */ |
| 182 | static void jpeg_v1_0_decode_ring_insert_start(struct amdgpu_ring *ring) |
| 183 | { |
| 184 | struct amdgpu_device *adev = ring->adev; |
| 185 | |
| 186 | amdgpu_ring_write(ring, |
| 187 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
| 188 | amdgpu_ring_write(ring, v: 0x68e04); |
| 189 | |
| 190 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
| 191 | amdgpu_ring_write(ring, v: 0x80010000); |
| 192 | } |
| 193 | |
| 194 | /** |
| 195 | * jpeg_v1_0_decode_ring_insert_end - insert a end command |
| 196 | * |
| 197 | * @ring: amdgpu_ring pointer |
| 198 | * |
| 199 | * Write a end command to the ring. |
| 200 | */ |
| 201 | static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) |
| 202 | { |
| 203 | struct amdgpu_device *adev = ring->adev; |
| 204 | |
| 205 | amdgpu_ring_write(ring, |
| 206 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
| 207 | amdgpu_ring_write(ring, v: 0x68e04); |
| 208 | |
| 209 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
| 210 | amdgpu_ring_write(ring, v: 0x00010000); |
| 211 | } |
| 212 | |
| 213 | /** |
| 214 | * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command |
| 215 | * |
| 216 | * @ring: amdgpu_ring pointer |
| 217 | * @addr: address |
| 218 | * @seq: sequence number |
| 219 | * @flags: fence related flags |
| 220 | * |
| 221 | * Write a fence and a trap command to the ring. |
| 222 | */ |
| 223 | static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, |
| 224 | unsigned flags) |
| 225 | { |
| 226 | struct amdgpu_device *adev = ring->adev; |
| 227 | |
| 228 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
| 229 | |
| 230 | amdgpu_ring_write(ring, |
| 231 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); |
| 232 | amdgpu_ring_write(ring, v: seq); |
| 233 | |
| 234 | amdgpu_ring_write(ring, |
| 235 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); |
| 236 | amdgpu_ring_write(ring, v: seq); |
| 237 | |
| 238 | amdgpu_ring_write(ring, |
| 239 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
| 240 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
| 241 | |
| 242 | amdgpu_ring_write(ring, |
| 243 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
| 244 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
| 245 | |
| 246 | amdgpu_ring_write(ring, |
| 247 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); |
| 248 | amdgpu_ring_write(ring, v: 0x8); |
| 249 | |
| 250 | amdgpu_ring_write(ring, |
| 251 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); |
| 252 | amdgpu_ring_write(ring, v: 0); |
| 253 | |
| 254 | amdgpu_ring_write(ring, |
| 255 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
| 256 | amdgpu_ring_write(ring, v: 0x01400200); |
| 257 | |
| 258 | amdgpu_ring_write(ring, |
| 259 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
| 260 | amdgpu_ring_write(ring, v: seq); |
| 261 | |
| 262 | amdgpu_ring_write(ring, |
| 263 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
| 264 | amdgpu_ring_write(ring, lower_32_bits(addr)); |
| 265 | |
| 266 | amdgpu_ring_write(ring, |
| 267 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
| 268 | amdgpu_ring_write(ring, upper_32_bits(addr)); |
| 269 | |
| 270 | amdgpu_ring_write(ring, |
| 271 | PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); |
| 272 | amdgpu_ring_write(ring, v: 0xffffffff); |
| 273 | |
| 274 | amdgpu_ring_write(ring, |
| 275 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
| 276 | amdgpu_ring_write(ring, v: 0x3fbc); |
| 277 | |
| 278 | amdgpu_ring_write(ring, |
| 279 | PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
| 280 | amdgpu_ring_write(ring, v: 0x1); |
| 281 | |
| 282 | /* emit trap */ |
| 283 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); |
| 284 | amdgpu_ring_write(ring, v: 0); |
| 285 | } |
| 286 | |
| 287 | /** |
| 288 | * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer |
| 289 | * |
| 290 | * @ring: amdgpu_ring pointer |
| 291 | * @job: job to retrieve vmid from |
| 292 | * @ib: indirect buffer to execute |
| 293 | * @flags: unused |
| 294 | * |
| 295 | * Write ring commands to execute the indirect buffer. |
| 296 | */ |
| 297 | static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, |
| 298 | struct amdgpu_job *job, |
| 299 | struct amdgpu_ib *ib, |
| 300 | uint32_t flags) |
| 301 | { |
| 302 | struct amdgpu_device *adev = ring->adev; |
| 303 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
| 304 | |
| 305 | amdgpu_ring_write(ring, |
| 306 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); |
| 307 | if (ring->funcs->parse_cs) |
| 308 | amdgpu_ring_write(ring, v: 0); |
| 309 | else |
| 310 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
| 311 | |
| 312 | amdgpu_ring_write(ring, |
| 313 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); |
| 314 | amdgpu_ring_write(ring, v: (vmid | (vmid << 4))); |
| 315 | |
| 316 | amdgpu_ring_write(ring, |
| 317 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
| 318 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
| 319 | |
| 320 | amdgpu_ring_write(ring, |
| 321 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
| 322 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
| 323 | |
| 324 | amdgpu_ring_write(ring, |
| 325 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); |
| 326 | amdgpu_ring_write(ring, v: ib->length_dw); |
| 327 | |
| 328 | amdgpu_ring_write(ring, |
| 329 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); |
| 330 | amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); |
| 331 | |
| 332 | amdgpu_ring_write(ring, |
| 333 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); |
| 334 | amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); |
| 335 | |
| 336 | amdgpu_ring_write(ring, |
| 337 | PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); |
| 338 | amdgpu_ring_write(ring, v: 0); |
| 339 | |
| 340 | amdgpu_ring_write(ring, |
| 341 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
| 342 | amdgpu_ring_write(ring, v: 0x01400200); |
| 343 | |
| 344 | amdgpu_ring_write(ring, |
| 345 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
| 346 | amdgpu_ring_write(ring, v: 0x2); |
| 347 | |
| 348 | amdgpu_ring_write(ring, |
| 349 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); |
| 350 | amdgpu_ring_write(ring, v: 0x2); |
| 351 | } |
| 352 | |
| 353 | static void jpeg_v1_0_decode_ring_emit_reg_wait(struct amdgpu_ring *ring, |
| 354 | uint32_t reg, uint32_t val, |
| 355 | uint32_t mask) |
| 356 | { |
| 357 | struct amdgpu_device *adev = ring->adev; |
| 358 | uint32_t reg_offset = (reg << 2); |
| 359 | |
| 360 | amdgpu_ring_write(ring, |
| 361 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); |
| 362 | amdgpu_ring_write(ring, v: 0x01400200); |
| 363 | |
| 364 | amdgpu_ring_write(ring, |
| 365 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); |
| 366 | amdgpu_ring_write(ring, v: val); |
| 367 | |
| 368 | amdgpu_ring_write(ring, |
| 369 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
| 370 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
| 371 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
| 372 | amdgpu_ring_write(ring, v: 0); |
| 373 | amdgpu_ring_write(ring, |
| 374 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); |
| 375 | } else { |
| 376 | amdgpu_ring_write(ring, v: reg_offset); |
| 377 | amdgpu_ring_write(ring, |
| 378 | PACKETJ(0, 0, 0, PACKETJ_TYPE3)); |
| 379 | } |
| 380 | amdgpu_ring_write(ring, v: mask); |
| 381 | } |
| 382 | |
| 383 | static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, |
| 384 | unsigned vmid, uint64_t pd_addr) |
| 385 | { |
| 386 | struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; |
| 387 | uint32_t data0, data1, mask; |
| 388 | |
| 389 | pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); |
| 390 | |
| 391 | /* wait for register write */ |
| 392 | data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; |
| 393 | data1 = lower_32_bits(pd_addr); |
| 394 | mask = 0xffffffff; |
| 395 | jpeg_v1_0_decode_ring_emit_reg_wait(ring, reg: data0, val: data1, mask); |
| 396 | } |
| 397 | |
| 398 | static void jpeg_v1_0_decode_ring_emit_wreg(struct amdgpu_ring *ring, |
| 399 | uint32_t reg, uint32_t val) |
| 400 | { |
| 401 | struct amdgpu_device *adev = ring->adev; |
| 402 | uint32_t reg_offset = (reg << 2); |
| 403 | |
| 404 | amdgpu_ring_write(ring, |
| 405 | PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); |
| 406 | if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || |
| 407 | ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { |
| 408 | amdgpu_ring_write(ring, v: 0); |
| 409 | amdgpu_ring_write(ring, |
| 410 | PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); |
| 411 | } else { |
| 412 | amdgpu_ring_write(ring, v: reg_offset); |
| 413 | amdgpu_ring_write(ring, |
| 414 | PACKETJ(0, 0, 0, PACKETJ_TYPE0)); |
| 415 | } |
| 416 | amdgpu_ring_write(ring, v: val); |
| 417 | } |
| 418 | |
| 419 | static void jpeg_v1_0_decode_ring_nop(struct amdgpu_ring *ring, uint32_t count) |
| 420 | { |
| 421 | int i; |
| 422 | |
| 423 | WARN_ON(ring->wptr % 2 || count % 2); |
| 424 | |
| 425 | for (i = 0; i < count / 2; i++) { |
| 426 | amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); |
| 427 | amdgpu_ring_write(ring, v: 0); |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | static int jpeg_v1_0_set_interrupt_state(struct amdgpu_device *adev, |
| 432 | struct amdgpu_irq_src *source, |
| 433 | unsigned type, |
| 434 | enum amdgpu_interrupt_state state) |
| 435 | { |
| 436 | return 0; |
| 437 | } |
| 438 | |
| 439 | static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, |
| 440 | struct amdgpu_irq_src *source, |
| 441 | struct amdgpu_iv_entry *entry) |
| 442 | { |
| 443 | DRM_DEBUG("IH: JPEG decode TRAP\n" ); |
| 444 | |
| 445 | switch (entry->src_id) { |
| 446 | case 126: |
| 447 | amdgpu_fence_process(ring: adev->jpeg.inst->ring_dec); |
| 448 | break; |
| 449 | default: |
| 450 | DRM_ERROR("Unhandled interrupt: %d %d\n" , |
| 451 | entry->src_id, entry->src_data[0]); |
| 452 | break; |
| 453 | } |
| 454 | |
| 455 | return 0; |
| 456 | } |
| 457 | |
| 458 | /** |
| 459 | * jpeg_v1_0_early_init - set function pointers |
| 460 | * |
| 461 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
| 462 | * |
| 463 | * Set ring and irq function pointers |
| 464 | */ |
| 465 | int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block) |
| 466 | { |
| 467 | struct amdgpu_device *adev = ip_block->adev; |
| 468 | |
| 469 | adev->jpeg.num_jpeg_inst = 1; |
| 470 | adev->jpeg.num_jpeg_rings = 1; |
| 471 | |
| 472 | jpeg_v1_0_set_dec_ring_funcs(adev); |
| 473 | jpeg_v1_0_set_irq_funcs(adev); |
| 474 | |
| 475 | return 0; |
| 476 | } |
| 477 | |
| 478 | /** |
| 479 | * jpeg_v1_0_sw_init - sw init for JPEG block |
| 480 | * |
| 481 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
| 482 | * |
| 483 | */ |
| 484 | int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block) |
| 485 | { |
| 486 | struct amdgpu_device *adev = ip_block->adev; |
| 487 | struct amdgpu_ring *ring; |
| 488 | int r; |
| 489 | |
| 490 | /* JPEG TRAP */ |
| 491 | r = amdgpu_irq_add_id(adev, client_id: SOC15_IH_CLIENTID_VCN, src_id: 126, source: &adev->jpeg.inst->irq); |
| 492 | if (r) |
| 493 | return r; |
| 494 | |
| 495 | ring = adev->jpeg.inst->ring_dec; |
| 496 | ring->vm_hub = AMDGPU_MMHUB0(0); |
| 497 | sprintf(buf: ring->name, fmt: "jpeg_dec" ); |
| 498 | r = amdgpu_ring_init(adev, ring, max_dw: 512, irq_src: &adev->jpeg.inst->irq, |
| 499 | irq_type: 0, hw_prio: AMDGPU_RING_PRIO_DEFAULT, NULL); |
| 500 | if (r) |
| 501 | return r; |
| 502 | |
| 503 | adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] = |
| 504 | SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); |
| 505 | |
| 506 | return 0; |
| 507 | } |
| 508 | |
| 509 | /** |
| 510 | * jpeg_v1_0_sw_fini - sw fini for JPEG block |
| 511 | * |
| 512 | * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. |
| 513 | * |
| 514 | * JPEG free up sw allocation |
| 515 | */ |
| 516 | void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) |
| 517 | { |
| 518 | struct amdgpu_device *adev = ip_block->adev; |
| 519 | |
| 520 | amdgpu_ring_fini(ring: adev->jpeg.inst->ring_dec); |
| 521 | } |
| 522 | |
| 523 | /** |
| 524 | * jpeg_v1_0_start - start JPEG block |
| 525 | * |
| 526 | * @adev: amdgpu_device pointer |
| 527 | * @mode: SPG or DPG mode |
| 528 | * |
| 529 | * Setup and start the JPEG block |
| 530 | */ |
| 531 | void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) |
| 532 | { |
| 533 | struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; |
| 534 | |
| 535 | if (mode == 0) { |
| 536 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); |
| 537 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | |
| 538 | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
| 539 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); |
| 540 | WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); |
| 541 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_RPTR, 0); |
| 542 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR, 0); |
| 543 | WREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
| 544 | } |
| 545 | |
| 546 | /* initialize wptr */ |
| 547 | ring->wptr = RREG32_SOC15(JPEG, 0, mmUVD_JRBC_RB_WPTR); |
| 548 | |
| 549 | /* copy patch commands to the jpeg ring */ |
| 550 | jpeg_v1_0_decode_ring_set_patch_ring(ring, |
| 551 | ptr: (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); |
| 552 | } |
| 553 | |
| 554 | static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { |
| 555 | .type = AMDGPU_RING_TYPE_VCN_JPEG, |
| 556 | .align_mask = 0xf, |
| 557 | .nop = PACKET0(0x81ff, 0), |
| 558 | .support_64bit_ptrs = false, |
| 559 | .no_user_fence = true, |
| 560 | .extra_bytes = 256, |
| 561 | .get_rptr = jpeg_v1_0_decode_ring_get_rptr, |
| 562 | .get_wptr = jpeg_v1_0_decode_ring_get_wptr, |
| 563 | .set_wptr = jpeg_v1_0_decode_ring_set_wptr, |
| 564 | .parse_cs = jpeg_v1_dec_ring_parse_cs, |
| 565 | .emit_frame_size = |
| 566 | 6 + 6 + /* hdp invalidate / flush */ |
| 567 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + |
| 568 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + |
| 569 | 8 + /* jpeg_v1_0_decode_ring_emit_vm_flush */ |
| 570 | 26 + 26 + /* jpeg_v1_0_decode_ring_emit_fence x2 vm fence */ |
| 571 | 6, |
| 572 | .emit_ib_size = 22, /* jpeg_v1_0_decode_ring_emit_ib */ |
| 573 | .emit_ib = jpeg_v1_0_decode_ring_emit_ib, |
| 574 | .emit_fence = jpeg_v1_0_decode_ring_emit_fence, |
| 575 | .emit_vm_flush = jpeg_v1_0_decode_ring_emit_vm_flush, |
| 576 | .test_ring = amdgpu_jpeg_dec_ring_test_ring, |
| 577 | .test_ib = amdgpu_jpeg_dec_ring_test_ib, |
| 578 | .insert_nop = jpeg_v1_0_decode_ring_nop, |
| 579 | .insert_start = jpeg_v1_0_decode_ring_insert_start, |
| 580 | .insert_end = jpeg_v1_0_decode_ring_insert_end, |
| 581 | .pad_ib = amdgpu_ring_generic_pad_ib, |
| 582 | .begin_use = jpeg_v1_0_ring_begin_use, |
| 583 | .end_use = vcn_v1_0_ring_end_use, |
| 584 | .emit_wreg = jpeg_v1_0_decode_ring_emit_wreg, |
| 585 | .emit_reg_wait = jpeg_v1_0_decode_ring_emit_reg_wait, |
| 586 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
| 587 | }; |
| 588 | |
| 589 | static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) |
| 590 | { |
| 591 | adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs; |
| 592 | } |
| 593 | |
| 594 | static const struct amdgpu_irq_src_funcs jpeg_v1_0_irq_funcs = { |
| 595 | .set = jpeg_v1_0_set_interrupt_state, |
| 596 | .process = jpeg_v1_0_process_interrupt, |
| 597 | }; |
| 598 | |
| 599 | static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) |
| 600 | { |
| 601 | adev->jpeg.inst->irq.funcs = &jpeg_v1_0_irq_funcs; |
| 602 | } |
| 603 | |
| 604 | static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) |
| 605 | { |
| 606 | struct amdgpu_device *adev = ring->adev; |
| 607 | bool set_clocks = !cancel_delayed_work_sync(dwork: &adev->vcn.inst[0].idle_work); |
| 608 | int cnt = 0; |
| 609 | |
| 610 | mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround); |
| 611 | |
| 612 | if (amdgpu_fence_wait_empty(ring: &adev->vcn.inst->ring_dec)) |
| 613 | DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n" ); |
| 614 | |
| 615 | for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) { |
| 616 | if (amdgpu_fence_wait_empty(ring: &adev->vcn.inst->ring_enc[cnt])) |
| 617 | DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n" , cnt); |
| 618 | } |
| 619 | |
| 620 | vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); |
| 621 | } |
| 622 | |
| 623 | /** |
| 624 | * jpeg_v1_dec_ring_parse_cs - command submission parser |
| 625 | * |
| 626 | * @parser: Command submission parser context |
| 627 | * @job: the job to parse |
| 628 | * @ib: the IB to parse |
| 629 | * |
| 630 | * Parse the command stream, return -EINVAL for invalid packet, |
| 631 | * 0 otherwise |
| 632 | */ |
| 633 | static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, |
| 634 | struct amdgpu_job *job, |
| 635 | struct amdgpu_ib *ib) |
| 636 | { |
| 637 | u32 i, reg, res, cond, type; |
| 638 | int ret = 0; |
| 639 | struct amdgpu_device *adev = parser->adev; |
| 640 | |
| 641 | for (i = 0; i < ib->length_dw ; i += 2) { |
| 642 | reg = CP_PACKETJ_GET_REG(ib->ptr[i]); |
| 643 | res = CP_PACKETJ_GET_RES(ib->ptr[i]); |
| 644 | cond = CP_PACKETJ_GET_COND(ib->ptr[i]); |
| 645 | type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); |
| 646 | |
| 647 | if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */ |
| 648 | return -EINVAL; |
| 649 | |
| 650 | if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END) |
| 651 | continue; |
| 652 | |
| 653 | switch (type) { |
| 654 | case PACKETJ_TYPE0: |
| 655 | if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH && |
| 656 | reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW && |
| 657 | reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH && |
| 658 | reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW && |
| 659 | reg != JPEG_V1_REG_CTX_INDEX && |
| 660 | reg != JPEG_V1_REG_CTX_DATA) { |
| 661 | ret = -EINVAL; |
| 662 | } |
| 663 | break; |
| 664 | case PACKETJ_TYPE1: |
| 665 | if (reg != JPEG_V1_REG_CTX_DATA) |
| 666 | ret = -EINVAL; |
| 667 | break; |
| 668 | case PACKETJ_TYPE3: |
| 669 | if (reg != JPEG_V1_REG_SOFT_RESET) |
| 670 | ret = -EINVAL; |
| 671 | break; |
| 672 | case PACKETJ_TYPE6: |
| 673 | if (ib->ptr[i] != CP_PACKETJ_NOP) |
| 674 | ret = -EINVAL; |
| 675 | break; |
| 676 | default: |
| 677 | ret = -EINVAL; |
| 678 | } |
| 679 | |
| 680 | if (ret) { |
| 681 | dev_err(adev->dev, "Invalid packet [0x%08x]!\n" , ib->ptr[i]); |
| 682 | break; |
| 683 | } |
| 684 | } |
| 685 | |
| 686 | return ret; |
| 687 | } |
| 688 | |