| 1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
| 2 | /* |
| 3 | * Copyright 2016-2022 Advanced Micro Devices, Inc. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | * copy of this software and associated documentation files (the "Software"), |
| 7 | * to deal in the Software without restriction, including without limitation |
| 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 9 | * and/or sell copies of the Software, and to permit persons to whom the |
| 10 | * Software is furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be included in |
| 13 | * all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 21 | * OTHER DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include "kfd_kernel_queue.h" |
| 26 | #include "kfd_device_queue_manager.h" |
| 27 | #include "kfd_pm4_headers_ai.h" |
| 28 | #include "kfd_pm4_headers_aldebaran.h" |
| 29 | #include "kfd_pm4_opcodes.h" |
| 30 | #include "gc/gc_10_1_0_sh_mask.h" |
| 31 | |
| 32 | static int pm_map_process_v9(struct packet_manager *pm, |
| 33 | uint32_t *buffer, struct qcm_process_device *qpd) |
| 34 | { |
| 35 | struct pm4_mes_map_process *packet; |
| 36 | uint64_t vm_page_table_base_addr = qpd->page_table_base; |
| 37 | struct kfd_node *kfd = pm->dqm->dev; |
| 38 | struct kfd_process_device *pdd = |
| 39 | container_of(qpd, struct kfd_process_device, qpd); |
| 40 | struct amdgpu_device *adev = kfd->adev; |
| 41 | |
| 42 | packet = (struct pm4_mes_map_process *)buffer; |
| 43 | memset(buffer, 0, sizeof(struct pm4_mes_map_process)); |
| 44 | packet->header.u32All = pm_build_pm4_header(opcode: IT_MAP_PROCESS, |
| 45 | packet_size: sizeof(struct pm4_mes_map_process)); |
| 46 | if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE) |
| 47 | packet->bitfields2.exec_cleaner_shader = 1; |
| 48 | packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; |
| 49 | packet->bitfields2.process_quantum = 10; |
| 50 | packet->bitfields2.pasid = pdd->pasid; |
| 51 | packet->bitfields14.gds_size = qpd->gds_size & 0x3F; |
| 52 | packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; |
| 53 | packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; |
| 54 | packet->bitfields14.num_oac = qpd->num_oac; |
| 55 | packet->bitfields14.sdma_enable = 1; |
| 56 | packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; |
| 57 | |
| 58 | if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled && |
| 59 | pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) { |
| 60 | packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid; |
| 61 | packet->bitfields2.new_debug = 1; |
| 62 | } |
| 63 | |
| 64 | packet->sh_mem_config = qpd->sh_mem_config; |
| 65 | packet->sh_mem_bases = qpd->sh_mem_bases; |
| 66 | if (qpd->tba_addr) { |
| 67 | packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); |
| 68 | /* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is |
| 69 | * not defined, so setting it won't do any harm. |
| 70 | */ |
| 71 | packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) |
| 72 | | 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT; |
| 73 | |
| 74 | packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); |
| 75 | packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); |
| 76 | } |
| 77 | |
| 78 | packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); |
| 79 | packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); |
| 80 | |
| 81 | packet->vm_context_page_table_base_addr_lo32 = |
| 82 | lower_32_bits(vm_page_table_base_addr); |
| 83 | packet->vm_context_page_table_base_addr_hi32 = |
| 84 | upper_32_bits(vm_page_table_base_addr); |
| 85 | |
| 86 | return 0; |
| 87 | } |
| 88 | |
| 89 | static int pm_map_process_aldebaran(struct packet_manager *pm, |
| 90 | uint32_t *buffer, struct qcm_process_device *qpd) |
| 91 | { |
| 92 | struct pm4_mes_map_process_aldebaran *packet; |
| 93 | uint64_t vm_page_table_base_addr = qpd->page_table_base; |
| 94 | struct kfd_dev *kfd = pm->dqm->dev->kfd; |
| 95 | struct kfd_node *knode = pm->dqm->dev; |
| 96 | struct kfd_process_device *pdd = |
| 97 | container_of(qpd, struct kfd_process_device, qpd); |
| 98 | int i; |
| 99 | struct amdgpu_device *adev = kfd->adev; |
| 100 | |
| 101 | packet = (struct pm4_mes_map_process_aldebaran *)buffer; |
| 102 | memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); |
| 103 | packet->header.u32All = pm_build_pm4_header(opcode: IT_MAP_PROCESS, |
| 104 | packet_size: sizeof(struct pm4_mes_map_process_aldebaran)); |
| 105 | if (adev->enforce_isolation[knode->node_id] == |
| 106 | AMDGPU_ENFORCE_ISOLATION_ENABLE) |
| 107 | packet->bitfields2.exec_cleaner_shader = 1; |
| 108 | packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; |
| 109 | packet->bitfields2.process_quantum = 10; |
| 110 | packet->bitfields2.pasid = pdd->pasid; |
| 111 | packet->bitfields14.gds_size = qpd->gds_size & 0x3F; |
| 112 | packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; |
| 113 | packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; |
| 114 | packet->bitfields14.num_oac = qpd->num_oac; |
| 115 | packet->bitfields14.sdma_enable = 1; |
| 116 | packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; |
| 117 | packet->spi_gdbg_per_vmid_cntl = pdd->spi_dbg_override | |
| 118 | pdd->spi_dbg_launch_mode; |
| 119 | |
| 120 | if (pdd->process->debug_trap_enabled) { |
| 121 | for (i = 0; i < kfd->device_info.num_of_watch_points; i++) |
| 122 | packet->tcp_watch_cntl[i] = pdd->watch_points[i]; |
| 123 | |
| 124 | packet->bitfields2.single_memops = |
| 125 | !!(pdd->process->dbg_flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP); |
| 126 | } |
| 127 | |
| 128 | packet->sh_mem_config = qpd->sh_mem_config; |
| 129 | packet->sh_mem_bases = qpd->sh_mem_bases; |
| 130 | if (qpd->tba_addr) { |
| 131 | packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); |
| 132 | packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); |
| 133 | packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); |
| 134 | packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); |
| 135 | } |
| 136 | |
| 137 | packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); |
| 138 | packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); |
| 139 | |
| 140 | packet->vm_context_page_table_base_addr_lo32 = |
| 141 | lower_32_bits(vm_page_table_base_addr); |
| 142 | packet->vm_context_page_table_base_addr_hi32 = |
| 143 | upper_32_bits(vm_page_table_base_addr); |
| 144 | |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, |
| 149 | uint64_t ib, size_t ib_size_in_dwords, bool chain) |
| 150 | { |
| 151 | struct pm4_mes_runlist *packet; |
| 152 | |
| 153 | int concurrent_proc_cnt = 0; |
| 154 | struct kfd_node *kfd = pm->dqm->dev; |
| 155 | struct amdgpu_device *adev = kfd->adev; |
| 156 | |
| 157 | /* Determine the number of processes to map together to HW: |
| 158 | * it can not exceed the number of VMIDs available to the |
| 159 | * scheduler, and it is determined by the smaller of the number |
| 160 | * of processes in the runlist and kfd module parameter |
| 161 | * hws_max_conc_proc. |
| 162 | * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs |
| 163 | * cleaner between process switch), enable single-process mode |
| 164 | * in HWS. |
| 165 | * Note: the arbitration between the number of VMIDs and |
| 166 | * hws_max_conc_proc has been done in |
| 167 | * kgd2kfd_device_init(). |
| 168 | */ |
| 169 | concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] == |
| 170 | AMDGPU_ENFORCE_ISOLATION_ENABLE) ? |
| 171 | 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); |
| 172 | |
| 173 | packet = (struct pm4_mes_runlist *)buffer; |
| 174 | |
| 175 | memset(buffer, 0, sizeof(struct pm4_mes_runlist)); |
| 176 | packet->header.u32All = pm_build_pm4_header(opcode: IT_RUN_LIST, |
| 177 | packet_size: sizeof(struct pm4_mes_runlist)); |
| 178 | |
| 179 | packet->bitfields4.ib_size = ib_size_in_dwords; |
| 180 | packet->bitfields4.chain = chain ? 1 : 0; |
| 181 | packet->bitfields4.offload_polling = 0; |
| 182 | packet->bitfields4.chained_runlist_idle_disable = chain ? 1 : 0; |
| 183 | packet->bitfields4.valid = 1; |
| 184 | packet->bitfields4.process_cnt = concurrent_proc_cnt; |
| 185 | packet->ordinal2 = lower_32_bits(ib); |
| 186 | packet->ib_base_hi = upper_32_bits(ib); |
| 187 | |
| 188 | return 0; |
| 189 | } |
| 190 | |
| 191 | static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, |
| 192 | struct scheduling_resources *res) |
| 193 | { |
| 194 | struct pm4_mes_set_resources *packet; |
| 195 | |
| 196 | packet = (struct pm4_mes_set_resources *)buffer; |
| 197 | memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); |
| 198 | |
| 199 | packet->header.u32All = pm_build_pm4_header(opcode: IT_SET_RESOURCES, |
| 200 | packet_size: sizeof(struct pm4_mes_set_resources)); |
| 201 | |
| 202 | packet->bitfields2.queue_type = |
| 203 | queue_type__mes_set_resources__hsa_interface_queue_hiq; |
| 204 | packet->bitfields2.vmid_mask = res->vmid_mask; |
| 205 | packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; |
| 206 | if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN) |
| 207 | packet->bitfields2.enb_xnack_retry_disable_check = 1; |
| 208 | packet->bitfields7.oac_mask = res->oac_mask; |
| 209 | packet->bitfields8.gds_heap_base = res->gds_heap_base; |
| 210 | packet->bitfields8.gds_heap_size = res->gds_heap_size; |
| 211 | |
| 212 | packet->gws_mask_lo = lower_32_bits(res->gws_mask); |
| 213 | packet->gws_mask_hi = upper_32_bits(res->gws_mask); |
| 214 | |
| 215 | packet->queue_mask_lo = lower_32_bits(res->queue_mask); |
| 216 | packet->queue_mask_hi = upper_32_bits(res->queue_mask); |
| 217 | |
| 218 | return 0; |
| 219 | } |
| 220 | |
| 221 | static inline bool pm_use_ext_eng(struct kfd_dev *dev) |
| 222 | { |
| 223 | return amdgpu_ip_version(adev: dev->adev, ip: SDMA0_HWIP, inst: 0) >= |
| 224 | IP_VERSION(5, 2, 0); |
| 225 | } |
| 226 | |
| 227 | static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, |
| 228 | struct queue *q, bool is_static) |
| 229 | { |
| 230 | struct pm4_mes_map_queues *packet; |
| 231 | |
| 232 | packet = (struct pm4_mes_map_queues *)buffer; |
| 233 | memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); |
| 234 | |
| 235 | packet->header.u32All = pm_build_pm4_header(opcode: IT_MAP_QUEUES, |
| 236 | packet_size: sizeof(struct pm4_mes_map_queues)); |
| 237 | packet->bitfields2.num_queues = 1; |
| 238 | packet->bitfields2.queue_sel = |
| 239 | queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; |
| 240 | |
| 241 | packet->bitfields2.engine_sel = |
| 242 | engine_sel__mes_map_queues__compute_vi; |
| 243 | packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; |
| 244 | packet->bitfields2.extended_engine_sel = |
| 245 | extended_engine_sel__mes_map_queues__legacy_engine_sel; |
| 246 | packet->bitfields2.queue_type = |
| 247 | queue_type__mes_map_queues__normal_compute_vi; |
| 248 | |
| 249 | switch (q->properties.type) { |
| 250 | case KFD_QUEUE_TYPE_COMPUTE: |
| 251 | if (is_static) |
| 252 | packet->bitfields2.queue_type = |
| 253 | queue_type__mes_map_queues__normal_latency_static_queue_vi; |
| 254 | break; |
| 255 | case KFD_QUEUE_TYPE_DIQ: |
| 256 | packet->bitfields2.queue_type = |
| 257 | queue_type__mes_map_queues__debug_interface_queue_vi; |
| 258 | break; |
| 259 | case KFD_QUEUE_TYPE_SDMA: |
| 260 | case KFD_QUEUE_TYPE_SDMA_XGMI: |
| 261 | if (q->properties.sdma_engine_id < 2 && |
| 262 | !pm_use_ext_eng(dev: q->device->kfd)) |
| 263 | packet->bitfields2.engine_sel = q->properties.sdma_engine_id + |
| 264 | engine_sel__mes_map_queues__sdma0_vi; |
| 265 | else { |
| 266 | /* |
| 267 | * For GFX9.4.3, SDMA engine id can be greater than 8. |
| 268 | * For such cases, set extended_engine_sel to 2 and |
| 269 | * ensure engine_sel lies between 0-7. |
| 270 | */ |
| 271 | if (q->properties.sdma_engine_id >= 8) |
| 272 | packet->bitfields2.extended_engine_sel = |
| 273 | extended_engine_sel__mes_map_queues__sdma8_to_15_sel; |
| 274 | else |
| 275 | packet->bitfields2.extended_engine_sel = |
| 276 | extended_engine_sel__mes_map_queues__sdma0_to_7_sel; |
| 277 | |
| 278 | packet->bitfields2.engine_sel = q->properties.sdma_engine_id % 8; |
| 279 | } |
| 280 | break; |
| 281 | default: |
| 282 | WARN(1, "queue type %d" , q->properties.type); |
| 283 | return -EINVAL; |
| 284 | } |
| 285 | packet->bitfields3.doorbell_offset = |
| 286 | q->properties.doorbell_off; |
| 287 | |
| 288 | packet->mqd_addr_lo = |
| 289 | lower_32_bits(q->gart_mqd_addr); |
| 290 | |
| 291 | packet->mqd_addr_hi = |
| 292 | upper_32_bits(q->gart_mqd_addr); |
| 293 | |
| 294 | packet->wptr_addr_lo = |
| 295 | lower_32_bits((uint64_t)q->properties.write_ptr); |
| 296 | |
| 297 | packet->wptr_addr_hi = |
| 298 | upper_32_bits((uint64_t)q->properties.write_ptr); |
| 299 | |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, |
| 304 | uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset, |
| 305 | uint32_t *reg_data) |
| 306 | { |
| 307 | pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info( |
| 308 | pm->dqm->dev->adev, |
| 309 | pm->dqm->wait_times, |
| 310 | sch_value, |
| 311 | que_sleep, |
| 312 | reg_offset, |
| 313 | reg_data); |
| 314 | } |
| 315 | |
| 316 | /* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with |
| 317 | * register/value for configuring dequeue wait counts |
| 318 | * |
| 319 | * @return: -ve for failure and 0 for success and buffer is |
| 320 | * filled in with packet |
| 321 | * |
| 322 | **/ |
| 323 | static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, |
| 324 | uint32_t *buffer, |
| 325 | enum kfd_config_dequeue_wait_counts_cmd cmd, |
| 326 | uint32_t value) |
| 327 | { |
| 328 | struct pm4_mec_write_data_mmio *packet; |
| 329 | uint32_t reg_offset = 0; |
| 330 | uint32_t reg_data = 0; |
| 331 | |
| 332 | switch (cmd) { |
| 333 | case KFD_DEQUEUE_WAIT_INIT: { |
| 334 | uint32_t sch_wave = 0, que_sleep = 1; |
| 335 | |
| 336 | /* For all gfx9 ASICs > gfx941, |
| 337 | * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. |
| 338 | * On a 1GHz machine this is roughly 1 microsecond, which is |
| 339 | * about how long it takes to load data out of memory during |
| 340 | * queue connect |
| 341 | * QUE_SLEEP: Wait Count for Dequeue Retry. |
| 342 | * |
| 343 | * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU |
| 344 | */ |
| 345 | if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || |
| 346 | KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)) |
| 347 | return -EPERM; |
| 348 | |
| 349 | if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && |
| 350 | (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) |
| 351 | sch_wave = 1; |
| 352 | |
| 353 | pm_build_dequeue_wait_counts_packet_info(pm, sch_value: sch_wave, que_sleep, |
| 354 | reg_offset: ®_offset, reg_data: ®_data); |
| 355 | |
| 356 | break; |
| 357 | } |
| 358 | case KFD_DEQUEUE_WAIT_RESET: |
| 359 | /* reg_data would be set to dqm->wait_times */ |
| 360 | pm_build_dequeue_wait_counts_packet_info(pm, sch_value: 0, que_sleep: 0, reg_offset: ®_offset, reg_data: ®_data); |
| 361 | break; |
| 362 | |
| 363 | case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: |
| 364 | /* The CP cannot handle value 0 and it will result in |
| 365 | * an infinite grace period being set so set to 1 to prevent this. Also |
| 366 | * avoid debugger API breakage as it sets 0 and expects a low value. |
| 367 | */ |
| 368 | if (!value) |
| 369 | value = 1; |
| 370 | pm_build_dequeue_wait_counts_packet_info(pm, sch_value: value, que_sleep: 0, reg_offset: ®_offset, reg_data: ®_data); |
| 371 | break; |
| 372 | default: |
| 373 | pr_err("Invalid dequeue wait cmd\n" ); |
| 374 | return -EINVAL; |
| 375 | } |
| 376 | |
| 377 | packet = (struct pm4_mec_write_data_mmio *)buffer; |
| 378 | memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); |
| 379 | |
| 380 | packet->header.u32All = pm_build_pm4_header(opcode: IT_WRITE_DATA, |
| 381 | packet_size: sizeof(struct pm4_mec_write_data_mmio)); |
| 382 | |
| 383 | packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register; |
| 384 | packet->bitfields2.addr_incr = |
| 385 | addr_incr___write_data__do_not_increment_address; |
| 386 | |
| 387 | packet->bitfields3.dst_mmreg_addr = reg_offset; |
| 388 | |
| 389 | packet->data = reg_data; |
| 390 | |
| 391 | return 0; |
| 392 | } |
| 393 | |
| 394 | static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, |
| 395 | enum kfd_unmap_queues_filter filter, |
| 396 | uint32_t filter_param, bool reset) |
| 397 | { |
| 398 | struct pm4_mes_unmap_queues *packet; |
| 399 | |
| 400 | packet = (struct pm4_mes_unmap_queues *)buffer; |
| 401 | memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); |
| 402 | |
| 403 | packet->header.u32All = pm_build_pm4_header(opcode: IT_UNMAP_QUEUES, |
| 404 | packet_size: sizeof(struct pm4_mes_unmap_queues)); |
| 405 | |
| 406 | packet->bitfields2.extended_engine_sel = |
| 407 | pm_use_ext_eng(dev: pm->dqm->dev->kfd) ? |
| 408 | extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel : |
| 409 | extended_engine_sel__mes_unmap_queues__legacy_engine_sel; |
| 410 | |
| 411 | packet->bitfields2.engine_sel = |
| 412 | engine_sel__mes_unmap_queues__compute; |
| 413 | |
| 414 | if (reset) |
| 415 | packet->bitfields2.action = |
| 416 | action__mes_unmap_queues__reset_queues; |
| 417 | else |
| 418 | packet->bitfields2.action = |
| 419 | action__mes_unmap_queues__preempt_queues; |
| 420 | |
| 421 | switch (filter) { |
| 422 | case KFD_UNMAP_QUEUES_FILTER_BY_PASID: |
| 423 | packet->bitfields2.queue_sel = |
| 424 | queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; |
| 425 | packet->bitfields3a.pasid = filter_param; |
| 426 | break; |
| 427 | case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: |
| 428 | packet->bitfields2.queue_sel = |
| 429 | queue_sel__mes_unmap_queues__unmap_all_queues; |
| 430 | break; |
| 431 | case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: |
| 432 | /* in this case, we do not preempt static queues */ |
| 433 | packet->bitfields2.queue_sel = |
| 434 | queue_sel__mes_unmap_queues__unmap_all_non_static_queues; |
| 435 | break; |
| 436 | default: |
| 437 | WARN(1, "filter %d" , filter); |
| 438 | return -EINVAL; |
| 439 | } |
| 440 | |
| 441 | return 0; |
| 442 | |
| 443 | } |
| 444 | |
| 445 | static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, |
| 446 | uint64_t fence_address, uint64_t fence_value) |
| 447 | { |
| 448 | struct pm4_mes_query_status *packet; |
| 449 | |
| 450 | packet = (struct pm4_mes_query_status *)buffer; |
| 451 | memset(buffer, 0, sizeof(struct pm4_mes_query_status)); |
| 452 | |
| 453 | |
| 454 | packet->header.u32All = pm_build_pm4_header(opcode: IT_QUERY_STATUS, |
| 455 | packet_size: sizeof(struct pm4_mes_query_status)); |
| 456 | |
| 457 | packet->bitfields2.context_id = 0; |
| 458 | packet->bitfields2.interrupt_sel = |
| 459 | interrupt_sel__mes_query_status__completion_status; |
| 460 | packet->bitfields2.command = |
| 461 | command__mes_query_status__fence_only_after_write_ack; |
| 462 | |
| 463 | packet->addr_hi = upper_32_bits((uint64_t)fence_address); |
| 464 | packet->addr_lo = lower_32_bits((uint64_t)fence_address); |
| 465 | packet->data_hi = upper_32_bits((uint64_t)fence_value); |
| 466 | packet->data_lo = lower_32_bits((uint64_t)fence_value); |
| 467 | |
| 468 | return 0; |
| 469 | } |
| 470 | |
| 471 | const struct packet_manager_funcs kfd_v9_pm_funcs = { |
| 472 | .map_process = pm_map_process_v9, |
| 473 | .runlist = pm_runlist_v9, |
| 474 | .set_resources = pm_set_resources_v9, |
| 475 | .map_queues = pm_map_queues_v9, |
| 476 | .unmap_queues = pm_unmap_queues_v9, |
| 477 | .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, |
| 478 | .query_status = pm_query_status_v9, |
| 479 | .release_mem = NULL, |
| 480 | .map_process_size = sizeof(struct pm4_mes_map_process), |
| 481 | .runlist_size = sizeof(struct pm4_mes_runlist), |
| 482 | .set_resources_size = sizeof(struct pm4_mes_set_resources), |
| 483 | .map_queues_size = sizeof(struct pm4_mes_map_queues), |
| 484 | .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), |
| 485 | .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), |
| 486 | .query_status_size = sizeof(struct pm4_mes_query_status), |
| 487 | .release_mem_size = 0, |
| 488 | }; |
| 489 | |
| 490 | const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { |
| 491 | .map_process = pm_map_process_aldebaran, |
| 492 | .runlist = pm_runlist_v9, |
| 493 | .set_resources = pm_set_resources_v9, |
| 494 | .map_queues = pm_map_queues_v9, |
| 495 | .unmap_queues = pm_unmap_queues_v9, |
| 496 | .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, |
| 497 | .query_status = pm_query_status_v9, |
| 498 | .release_mem = NULL, |
| 499 | .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), |
| 500 | .runlist_size = sizeof(struct pm4_mes_runlist), |
| 501 | .set_resources_size = sizeof(struct pm4_mes_set_resources), |
| 502 | .map_queues_size = sizeof(struct pm4_mes_map_queues), |
| 503 | .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), |
| 504 | .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), |
| 505 | .query_status_size = sizeof(struct pm4_mes_query_status), |
| 506 | .release_mem_size = 0, |
| 507 | }; |
| 508 | |