| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* |
| 3 | * Copyright 2025 Advanced Micro Devices, Inc. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | * copy of this software and associated documentation files (the "Software"), |
| 7 | * to deal in the Software without restriction, including without limitation |
| 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 9 | * and/or sell copies of the Software, and to permit persons to whom the |
| 10 | * Software is furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be included in |
| 13 | * all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 21 | * OTHER DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef __RAS_UMC_H__ |
| 26 | #define __RAS_UMC_H__ |
| 27 | #include "ras.h" |
| 28 | #include "ras_eeprom.h" |
| 29 | #include "ras_cmd.h" |
| 30 | |
| 31 | #define UMC_VRAM_TYPE_UNKNOWN 0 |
| 32 | #define UMC_VRAM_TYPE_GDDR1 1 |
| 33 | #define UMC_VRAM_TYPE_DDR2 2 |
| 34 | #define UMC_VRAM_TYPE_GDDR3 3 |
| 35 | #define UMC_VRAM_TYPE_GDDR4 4 |
| 36 | #define UMC_VRAM_TYPE_GDDR5 5 |
| 37 | #define UMC_VRAM_TYPE_HBM 6 |
| 38 | #define UMC_VRAM_TYPE_DDR3 7 |
| 39 | #define UMC_VRAM_TYPE_DDR4 8 |
| 40 | #define UMC_VRAM_TYPE_GDDR6 9 |
| 41 | #define UMC_VRAM_TYPE_DDR5 10 |
| 42 | #define UMC_VRAM_TYPE_LPDDR4 11 |
| 43 | #define UMC_VRAM_TYPE_LPDDR5 12 |
| 44 | #define UMC_VRAM_TYPE_HBM3E 13 |
| 45 | |
| 46 | #define UMC_ECC_NEW_DETECTED_TAG 0x1 |
| 47 | #define UMC_INV_MEM_PFN (0xFFFFFFFFFFFFFFFF) |
| 48 | |
| 49 | /* three column bits and one row bit in MCA address flip |
| 50 | * in bad page retirement |
| 51 | */ |
| 52 | #define UMC_PA_FLIP_BITS_NUM 4 |
| 53 | |
| 54 | enum umc_memory_partition_mode { |
| 55 | UMC_MEMORY_PARTITION_MODE_NONE = 0, |
| 56 | UMC_MEMORY_PARTITION_MODE_NPS1 = 1, |
| 57 | UMC_MEMORY_PARTITION_MODE_NPS2 = 2, |
| 58 | UMC_MEMORY_PARTITION_MODE_NPS3 = 3, |
| 59 | UMC_MEMORY_PARTITION_MODE_NPS4 = 4, |
| 60 | UMC_MEMORY_PARTITION_MODE_NPS6 = 6, |
| 61 | UMC_MEMORY_PARTITION_MODE_NPS8 = 8, |
| 62 | UMC_MEMORY_PARTITION_MODE_UNKNOWN |
| 63 | }; |
| 64 | |
| 65 | struct ras_core_context; |
| 66 | struct ras_bank_ecc; |
| 67 | |
| 68 | struct umc_flip_bits { |
| 69 | uint32_t flip_bits_in_pa[UMC_PA_FLIP_BITS_NUM]; |
| 70 | uint32_t flip_row_bit; |
| 71 | uint32_t r13_in_pa; |
| 72 | uint32_t bit_num; |
| 73 | }; |
| 74 | |
| 75 | struct umc_mca_addr { |
| 76 | uint64_t err_addr; |
| 77 | uint32_t ch_inst; |
| 78 | uint32_t umc_inst; |
| 79 | uint32_t node_inst; |
| 80 | uint32_t socket_id; |
| 81 | }; |
| 82 | |
| 83 | struct umc_phy_addr { |
| 84 | uint64_t pa; |
| 85 | uint32_t bank; |
| 86 | uint32_t channel_idx; |
| 87 | }; |
| 88 | |
| 89 | struct umc_bank_addr { |
| 90 | uint32_t stack_id; /* SID */ |
| 91 | uint32_t bank_group; |
| 92 | uint32_t bank; |
| 93 | uint32_t row; |
| 94 | uint32_t column; |
| 95 | uint32_t channel; |
| 96 | uint32_t subchannel; /* Also called Pseudochannel (PC) */ |
| 97 | }; |
| 98 | |
| 99 | struct ras_umc_ip_func { |
| 100 | int (*bank_to_eeprom_record)(struct ras_core_context *ras_core, |
| 101 | struct ras_bank_ecc *bank, struct eeprom_umc_record *record); |
| 102 | int (*eeprom_record_to_nps_record)(struct ras_core_context *ras_core, |
| 103 | struct eeprom_umc_record *record, uint32_t nps); |
| 104 | int (*eeprom_record_to_nps_pages)(struct ras_core_context *ras_core, |
| 105 | struct eeprom_umc_record *record, uint32_t nps, |
| 106 | uint64_t *pfns, uint32_t num); |
| 107 | int (*bank_to_soc_pa)(struct ras_core_context *ras_core, |
| 108 | struct umc_bank_addr bank_addr, uint64_t *soc_pa); |
| 109 | int (*soc_pa_to_bank)(struct ras_core_context *ras_core, |
| 110 | uint64_t soc_pa, struct umc_bank_addr *bank_addr); |
| 111 | }; |
| 112 | |
| 113 | struct eeprom_store_record { |
| 114 | /* point to data records array */ |
| 115 | struct eeprom_umc_record *bps; |
| 116 | /* the count of entries */ |
| 117 | int count; |
| 118 | /* the space can place new entries */ |
| 119 | int space_left; |
| 120 | }; |
| 121 | |
| 122 | struct ras_umc_err_data { |
| 123 | struct eeprom_store_record rom_data; |
| 124 | struct eeprom_store_record ram_data; |
| 125 | enum umc_memory_partition_mode umc_nps_mode; |
| 126 | uint64_t last_retired_pfn; |
| 127 | }; |
| 128 | |
| 129 | struct ras_umc { |
| 130 | u32 umc_ip_version; |
| 131 | u32 umc_vram_type; |
| 132 | const struct ras_umc_ip_func *ip_func; |
| 133 | struct radix_tree_root root; |
| 134 | struct mutex tree_lock; |
| 135 | struct mutex umc_lock; |
| 136 | struct mutex bank_log_lock; |
| 137 | struct mutex pending_ecc_lock; |
| 138 | struct ras_umc_err_data umc_err_data; |
| 139 | struct list_head pending_ecc_list; |
| 140 | }; |
| 141 | |
| 142 | int ras_umc_sw_init(struct ras_core_context *ras); |
| 143 | int ras_umc_sw_fini(struct ras_core_context *ras); |
| 144 | int ras_umc_hw_init(struct ras_core_context *ras); |
| 145 | int ras_umc_hw_fini(struct ras_core_context *ras); |
| 146 | int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core, |
| 147 | struct umc_mca_addr *in, struct umc_phy_addr *out, |
| 148 | uint32_t nps); |
| 149 | int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data); |
| 150 | int ras_umc_log_bad_bank(struct ras_core_context *ras, struct ras_bank_ecc *bank); |
| 151 | int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank); |
| 152 | int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core); |
| 153 | int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core); |
| 154 | int ras_umc_load_bad_pages(struct ras_core_context *ras_core); |
| 155 | int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core); |
| 156 | int ras_umc_clean_badpage_data(struct ras_core_context *ras_core); |
| 157 | int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core, |
| 158 | uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr, |
| 159 | enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record); |
| 160 | |
| 161 | int ras_umc_get_badpage_count(struct ras_core_context *ras_core); |
| 162 | int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record); |
| 163 | bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr); |
| 164 | int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core, |
| 165 | uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa); |
| 166 | #endif |
| 167 | |