| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright 2025 Advanced Micro Devices, Inc. |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 6 | * copy of this software and associated documentation files (the "Software"), |
| 7 | * to deal in the Software without restriction, including without limitation |
| 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 9 | * and/or sell copies of the Software, and to permit persons to whom the |
| 10 | * Software is furnished to do so, subject to the following conditions: |
| 11 | * |
| 12 | * The above copyright notice and this permission notice shall be included in |
| 13 | * all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 21 | * OTHER DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | #include "ras.h" |
| 25 | #include "ras_core_status.h" |
| 26 | #include "ras_log_ring.h" |
| 27 | |
| 28 | #define RAS_LOG_MAX_QUERY_SIZE 0xC000 |
| 29 | #define RAS_LOG_MEM_TEMP_SIZE 0x200 |
| 30 | #define RAS_LOG_MEMPOOL_SIZE \ |
| 31 | (RAS_LOG_MAX_QUERY_SIZE + RAS_LOG_MEM_TEMP_SIZE) |
| 32 | |
| 33 | #define BATCH_IDX_TO_TREE_IDX(batch_idx, sn) (((batch_idx) << 8) | (sn)) |
| 34 | |
| 35 | static const uint64_t ras_rma_aca_reg[ACA_REG_MAX_COUNT] = { |
| 36 | [ACA_REG_IDX__CTL] = 0x1, |
| 37 | [ACA_REG_IDX__STATUS] = 0xB000000000000137, |
| 38 | [ACA_REG_IDX__ADDR] = 0x0, |
| 39 | [ACA_REG_IDX__MISC0] = 0x0, |
| 40 | [ACA_REG_IDX__CONFG] = 0x1ff00000002, |
| 41 | [ACA_REG_IDX__IPID] = 0x9600000000, |
| 42 | [ACA_REG_IDX__SYND] = 0x0, |
| 43 | }; |
| 44 | |
| 45 | static uint64_t ras_log_ring_get_logged_ecc_count(struct ras_core_context *ras_core) |
| 46 | { |
| 47 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 48 | uint64_t count = 0; |
| 49 | |
| 50 | if (log_ring->logged_ecc_count < 0) { |
| 51 | RAS_DEV_WARN(ras_core->dev, |
| 52 | "Error: the logged ras count should not less than 0!\n" ); |
| 53 | count = 0; |
| 54 | } else { |
| 55 | count = log_ring->logged_ecc_count; |
| 56 | } |
| 57 | |
| 58 | if (count > RAS_LOG_MEMPOOL_SIZE) |
| 59 | RAS_DEV_WARN(ras_core->dev, |
| 60 | "Error: the logged ras count is out of range!\n" ); |
| 61 | |
| 62 | return count; |
| 63 | } |
| 64 | |
| 65 | static int ras_log_ring_add_data(struct ras_core_context *ras_core, |
| 66 | struct ras_log_info *log, struct ras_log_batch_tag *batch_tag) |
| 67 | { |
| 68 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 69 | unsigned long flags = 0; |
| 70 | int ret = 0; |
| 71 | |
| 72 | if (batch_tag && (batch_tag->sub_seqno >= MAX_RECORD_PER_BATCH)) { |
| 73 | RAS_DEV_ERR(ras_core->dev, |
| 74 | "Invalid batch sub seqno:%d, batch:0x%llx\n" , |
| 75 | batch_tag->sub_seqno, batch_tag->batch_id); |
| 76 | return -EINVAL; |
| 77 | } |
| 78 | |
| 79 | spin_lock_irqsave(&log_ring->spin_lock, flags); |
| 80 | if (batch_tag) { |
| 81 | log->seqno = |
| 82 | BATCH_IDX_TO_TREE_IDX(batch_tag->batch_id, batch_tag->sub_seqno); |
| 83 | batch_tag->sub_seqno++; |
| 84 | } else { |
| 85 | log->seqno = BATCH_IDX_TO_TREE_IDX(log_ring->mono_upward_batch_id, 0); |
| 86 | log_ring->mono_upward_batch_id++; |
| 87 | } |
| 88 | ret = radix_tree_insert(&log_ring->ras_log_root, index: log->seqno, log); |
| 89 | if (!ret) |
| 90 | log_ring->logged_ecc_count++; |
| 91 | spin_unlock_irqrestore(lock: &log_ring->spin_lock, flags); |
| 92 | |
| 93 | if (ret) { |
| 94 | RAS_DEV_ERR(ras_core->dev, |
| 95 | "Failed to add ras log! seqno:0x%llx, ret:%d\n" , |
| 96 | log->seqno, ret); |
| 97 | mempool_free(element: log, pool: log_ring->ras_log_mempool); |
| 98 | } |
| 99 | |
| 100 | return ret; |
| 101 | } |
| 102 | |
| 103 | static int ras_log_ring_delete_data(struct ras_core_context *ras_core, uint32_t count) |
| 104 | { |
| 105 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 106 | unsigned long flags = 0; |
| 107 | uint32_t i = 0, j = 0; |
| 108 | uint64_t batch_id, idx; |
| 109 | void *data; |
| 110 | int ret = -ENODATA; |
| 111 | |
| 112 | if (count > ras_log_ring_get_logged_ecc_count(ras_core)) |
| 113 | return -EINVAL; |
| 114 | |
| 115 | spin_lock_irqsave(&log_ring->spin_lock, flags); |
| 116 | batch_id = log_ring->last_del_batch_id; |
| 117 | while (batch_id < log_ring->mono_upward_batch_id) { |
| 118 | for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { |
| 119 | idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); |
| 120 | data = radix_tree_delete(&log_ring->ras_log_root, idx); |
| 121 | if (data) { |
| 122 | mempool_free(element: data, pool: log_ring->ras_log_mempool); |
| 123 | log_ring->logged_ecc_count--; |
| 124 | i++; |
| 125 | } |
| 126 | } |
| 127 | batch_id = ++log_ring->last_del_batch_id; |
| 128 | if (i >= count) { |
| 129 | ret = 0; |
| 130 | break; |
| 131 | } |
| 132 | } |
| 133 | spin_unlock_irqrestore(lock: &log_ring->spin_lock, flags); |
| 134 | |
| 135 | return ret; |
| 136 | } |
| 137 | |
| 138 | static void ras_log_ring_clear_log_tree(struct ras_core_context *ras_core) |
| 139 | { |
| 140 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 141 | uint64_t batch_id, idx; |
| 142 | unsigned long flags = 0; |
| 143 | void *data; |
| 144 | int j; |
| 145 | |
| 146 | if ((log_ring->mono_upward_batch_id <= log_ring->last_del_batch_id) && |
| 147 | !log_ring->logged_ecc_count) |
| 148 | return; |
| 149 | |
| 150 | spin_lock_irqsave(&log_ring->spin_lock, flags); |
| 151 | batch_id = log_ring->last_del_batch_id; |
| 152 | while (batch_id < log_ring->mono_upward_batch_id) { |
| 153 | for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { |
| 154 | idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); |
| 155 | data = radix_tree_delete(&log_ring->ras_log_root, idx); |
| 156 | if (data) { |
| 157 | mempool_free(element: data, pool: log_ring->ras_log_mempool); |
| 158 | log_ring->logged_ecc_count--; |
| 159 | } |
| 160 | } |
| 161 | batch_id++; |
| 162 | } |
| 163 | spin_unlock_irqrestore(lock: &log_ring->spin_lock, flags); |
| 164 | |
| 165 | } |
| 166 | |
| 167 | int ras_log_ring_sw_init(struct ras_core_context *ras_core) |
| 168 | { |
| 169 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 170 | |
| 171 | memset(log_ring, 0, sizeof(*log_ring)); |
| 172 | |
| 173 | log_ring->ras_log_mempool = mempool_create_kmalloc_pool( |
| 174 | RAS_LOG_MEMPOOL_SIZE, sizeof(struct ras_log_info)); |
| 175 | if (!log_ring->ras_log_mempool) |
| 176 | return -ENOMEM; |
| 177 | |
| 178 | INIT_RADIX_TREE(&log_ring->ras_log_root, GFP_KERNEL); |
| 179 | |
| 180 | spin_lock_init(&log_ring->spin_lock); |
| 181 | |
| 182 | return 0; |
| 183 | } |
| 184 | |
| 185 | int ras_log_ring_sw_fini(struct ras_core_context *ras_core) |
| 186 | { |
| 187 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 188 | |
| 189 | ras_log_ring_clear_log_tree(ras_core); |
| 190 | log_ring->logged_ecc_count = 0; |
| 191 | log_ring->last_del_batch_id = 0; |
| 192 | log_ring->mono_upward_batch_id = 0; |
| 193 | |
| 194 | mempool_destroy(pool: log_ring->ras_log_mempool); |
| 195 | |
| 196 | return 0; |
| 197 | } |
| 198 | |
| 199 | struct ras_log_batch_tag *ras_log_ring_create_batch_tag(struct ras_core_context *ras_core) |
| 200 | { |
| 201 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 202 | struct ras_log_batch_tag *batch_tag; |
| 203 | unsigned long flags = 0; |
| 204 | |
| 205 | batch_tag = kzalloc(sizeof(*batch_tag), GFP_KERNEL); |
| 206 | if (!batch_tag) |
| 207 | return NULL; |
| 208 | |
| 209 | spin_lock_irqsave(&log_ring->spin_lock, flags); |
| 210 | batch_tag->batch_id = log_ring->mono_upward_batch_id; |
| 211 | log_ring->mono_upward_batch_id++; |
| 212 | spin_unlock_irqrestore(lock: &log_ring->spin_lock, flags); |
| 213 | |
| 214 | batch_tag->sub_seqno = 0; |
| 215 | batch_tag->timestamp = ras_core_get_utc_second_timestamp(ras_core); |
| 216 | return batch_tag; |
| 217 | } |
| 218 | |
| 219 | void ras_log_ring_destroy_batch_tag(struct ras_core_context *ras_core, |
| 220 | struct ras_log_batch_tag *batch_tag) |
| 221 | { |
| 222 | kfree(objp: batch_tag); |
| 223 | } |
| 224 | |
| 225 | void ras_log_ring_add_log_event(struct ras_core_context *ras_core, |
| 226 | enum ras_log_event event, void *data, struct ras_log_batch_tag *batch_tag) |
| 227 | { |
| 228 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 229 | struct device_system_info dev_info = {0}; |
| 230 | struct ras_log_info *log; |
| 231 | uint64_t socket_id; |
| 232 | void *obj; |
| 233 | |
| 234 | obj = mempool_alloc_preallocated(pool: log_ring->ras_log_mempool); |
| 235 | if (!obj || |
| 236 | (ras_log_ring_get_logged_ecc_count(ras_core) >= RAS_LOG_MEMPOOL_SIZE)) { |
| 237 | ras_log_ring_delete_data(ras_core, RAS_LOG_MEM_TEMP_SIZE); |
| 238 | if (!obj) |
| 239 | obj = mempool_alloc_preallocated(pool: log_ring->ras_log_mempool); |
| 240 | } |
| 241 | |
| 242 | if (!obj) { |
| 243 | RAS_DEV_ERR(ras_core->dev, "ERROR: Failed to alloc ras log buffer!\n" ); |
| 244 | return; |
| 245 | } |
| 246 | |
| 247 | log = (struct ras_log_info *)obj; |
| 248 | |
| 249 | memset(log, 0, sizeof(*log)); |
| 250 | log->timestamp = |
| 251 | batch_tag ? batch_tag->timestamp : ras_core_get_utc_second_timestamp(ras_core); |
| 252 | log->event = event; |
| 253 | |
| 254 | if (data) |
| 255 | memcpy(&log->aca_reg, data, sizeof(log->aca_reg)); |
| 256 | |
| 257 | if (event == RAS_LOG_EVENT_RMA) { |
| 258 | memcpy(&log->aca_reg, ras_rma_aca_reg, sizeof(log->aca_reg)); |
| 259 | ras_core_get_device_system_info(ras_core, dev_info: &dev_info); |
| 260 | socket_id = dev_info.socket_id; |
| 261 | log->aca_reg.regs[ACA_REG_IDX__IPID] |= ((socket_id / 4) & 0x01); |
| 262 | log->aca_reg.regs[ACA_REG_IDX__IPID] |= (((socket_id % 4) & 0x3) << 44); |
| 263 | } |
| 264 | |
| 265 | ras_log_ring_add_data(ras_core, log, batch_tag); |
| 266 | } |
| 267 | |
| 268 | static struct ras_log_info *ras_log_ring_lookup_data(struct ras_core_context *ras_core, |
| 269 | uint64_t idx) |
| 270 | { |
| 271 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 272 | unsigned long flags = 0; |
| 273 | void *data; |
| 274 | |
| 275 | spin_lock_irqsave(&log_ring->spin_lock, flags); |
| 276 | data = radix_tree_lookup(&log_ring->ras_log_root, idx); |
| 277 | spin_unlock_irqrestore(lock: &log_ring->spin_lock, flags); |
| 278 | |
| 279 | return (struct ras_log_info *)data; |
| 280 | } |
| 281 | |
| 282 | int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t batch_id, |
| 283 | struct ras_log_info **log_arr, uint32_t arr_num) |
| 284 | { |
| 285 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 286 | uint32_t i, idx, count = 0; |
| 287 | void *data; |
| 288 | |
| 289 | if ((batch_id >= log_ring->mono_upward_batch_id) || |
| 290 | (batch_id < log_ring->last_del_batch_id)) |
| 291 | return -EINVAL; |
| 292 | |
| 293 | for (i = 0; i < MAX_RECORD_PER_BATCH; i++) { |
| 294 | idx = BATCH_IDX_TO_TREE_IDX(batch_id, i); |
| 295 | data = ras_log_ring_lookup_data(ras_core, idx); |
| 296 | if (data) { |
| 297 | log_arr[count++] = data; |
| 298 | if (count >= arr_num) |
| 299 | break; |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | return count; |
| 304 | } |
| 305 | |
| 306 | int ras_log_ring_get_batch_overview(struct ras_core_context *ras_core, |
| 307 | struct ras_log_batch_overview *overview) |
| 308 | { |
| 309 | struct ras_log_ring *log_ring = &ras_core->ras_log_ring; |
| 310 | |
| 311 | overview->logged_batch_count = |
| 312 | log_ring->mono_upward_batch_id - log_ring->last_del_batch_id; |
| 313 | overview->last_batch_id = log_ring->mono_upward_batch_id; |
| 314 | overview->first_batch_id = log_ring->last_del_batch_id; |
| 315 | |
| 316 | return 0; |
| 317 | } |
| 318 | |