| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* |
| 3 | * Copyright (c) 2025 Advanced Micro Devices, Inc. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | * of this software and associated documentation files (the "Software"), to deal |
| 7 | * in the Software without restriction, including without limitation the rights |
| 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | * copies of the Software, and to permit persons to whom the Software is |
| 10 | * furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be included in |
| 13 | * all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 21 | * THE SOFTWARE. |
| 22 | */ |
| 23 | #ifndef __AMDGPU_RAS_MGR_H__ |
| 24 | #define __AMDGPU_RAS_MGR_H__ |
| 25 | #include "ras.h" |
| 26 | #include "amdgpu_ras_process.h" |
| 27 | |
| 28 | enum ras_ih_type { |
| 29 | RAS_IH_NONE, |
| 30 | RAS_IH_FROM_BLOCK_CONTROLLER, |
| 31 | RAS_IH_FROM_CONSUMER_CLIENT, |
| 32 | RAS_IH_FROM_FATAL_ERROR, |
| 33 | }; |
| 34 | |
| 35 | struct ras_ih_info { |
| 36 | uint32_t block; |
| 37 | union { |
| 38 | struct amdgpu_iv_entry iv_entry; |
| 39 | struct { |
| 40 | uint16_t pasid; |
| 41 | uint32_t reset; |
| 42 | pasid_notify pasid_fn; |
| 43 | void *data; |
| 44 | }; |
| 45 | }; |
| 46 | }; |
| 47 | |
| 48 | struct amdgpu_ras_mgr { |
| 49 | struct amdgpu_device *adev; |
| 50 | struct ras_core_context *ras_core; |
| 51 | struct delayed_work retire_page_dwork; |
| 52 | struct ras_event_manager ras_event_mgr; |
| 53 | uint64_t last_poison_consumption_seqno; |
| 54 | bool ras_is_ready; |
| 55 | |
| 56 | bool is_paused; |
| 57 | struct completion ras_event_done; |
| 58 | }; |
| 59 | |
| 60 | extern const struct amdgpu_ip_block_version ras_v1_0_ip_block; |
| 61 | |
| 62 | struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context( |
| 63 | struct amdgpu_device *adev); |
| 64 | int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable); |
| 65 | bool amdgpu_uniras_enabled(struct amdgpu_device *adev); |
| 66 | int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); |
| 67 | int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); |
| 68 | int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data); |
| 69 | int amdgpu_ras_mgr_update_ras_ecc(struct amdgpu_device *adev); |
| 70 | int amdgpu_ras_mgr_reset_gpu(struct amdgpu_device *adev, uint32_t flags); |
| 71 | uint64_t amdgpu_ras_mgr_gen_ras_event_seqno(struct amdgpu_device *adev, |
| 72 | enum ras_seqno_type seqno_type); |
| 73 | bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev); |
| 74 | int amdgpu_ras_mgr_get_curr_nps_mode(struct amdgpu_device *adev, uint32_t *nps_mode); |
| 75 | bool amdgpu_ras_mgr_check_retired_addr(struct amdgpu_device *adev, |
| 76 | uint64_t addr); |
| 77 | bool amdgpu_ras_mgr_is_rma(struct amdgpu_device *adev); |
| 78 | int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, |
| 79 | uint32_t cmd_id, void *input, uint32_t input_size, |
| 80 | void *output, uint32_t out_size); |
| 81 | int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev); |
| 82 | int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev); |
| 83 | #endif |
| 84 | |