1// SPDX-License-Identifier: MIT
2/*
3 * Copyright 2025 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24#include "ras.h"
25#include "ras_aca.h"
26#include "ras_core_status.h"
27#include "ras_aca_v1_0.h"
28
29struct ras_aca_hwip {
30 int hwid;
31 int mcatype;
32};
33
34static struct ras_aca_hwip aca_hwid_mcatypes[ACA_ECC_HWIP_COUNT] = {
35 [ACA_ECC_HWIP__SMU] = {0x01, 0x01},
36 [ACA_ECC_HWIP__PCS_XGMI] = {0x50, 0x00},
37 [ACA_ECC_HWIP__UMC] = {0x96, 0x00},
38};
39
40static int aca_decode_bank_info(struct aca_block *aca_blk,
41 struct aca_bank_reg *bank, struct aca_ecc_info *info)
42{
43 u64 ipid;
44 u32 instidhi, instidlo;
45
46 ipid = bank->regs[ACA_REG_IDX__IPID];
47 info->hwid = ACA_REG_IPID_HARDWAREID(ipid);
48 info->mcatype = ACA_REG_IPID_MCATYPE(ipid);
49 /*
50 * Unified DieID Format: SAASS. A:AID, S:Socket.
51 * Unified DieID[4:4] = InstanceId[0:0]
52 * Unified DieID[0:3] = InstanceIdHi[0:3]
53 */
54 instidhi = ACA_REG_IPID_INSTANCEIDHI(ipid);
55 instidlo = ACA_REG_IPID_INSTANCEIDLO(ipid);
56 info->die_id = ((instidhi >> 2) & 0x03);
57 info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
58
59 if ((aca_blk->blk_info->hwip == ACA_ECC_HWIP__SMU) &&
60 (aca_blk->blk_info->ras_block_id == RAS_BLOCK_ID__GFX))
61 info->xcd_id =
62 ((instidlo & GENMASK_ULL(31, 1)) == mmSMNAID_XCD0_MCA_SMU) ? 0 : 1;
63
64 return 0;
65}
66
67static bool aca_check_bank_hwip(struct aca_bank_reg *bank, enum aca_ecc_hwip type)
68{
69 struct ras_aca_hwip *hwip;
70 int hwid, mcatype;
71 u64 ipid;
72
73 if (!bank || (type == ACA_ECC_HWIP__UNKNOWN))
74 return false;
75
76 hwip = &aca_hwid_mcatypes[type];
77 if (!hwip->hwid)
78 return false;
79
80 ipid = bank->regs[ACA_REG_IDX__IPID];
81 hwid = ACA_REG_IPID_HARDWAREID(ipid);
82 mcatype = ACA_REG_IPID_MCATYPE(ipid);
83
84 return hwip->hwid == hwid && hwip->mcatype == mcatype;
85}
86
87static bool aca_match_bank_default(struct aca_block *aca_blk, void *data)
88{
89 return aca_check_bank_hwip(bank: (struct aca_bank_reg *)data, type: aca_blk->blk_info->hwip);
90}
91
92static bool aca_match_gfx_bank(struct aca_block *aca_blk, void *data)
93{
94 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
95 u32 instlo;
96
97 if (!aca_check_bank_hwip(bank, type: aca_blk->blk_info->hwip))
98 return false;
99
100 instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]);
101 instlo &= GENMASK_ULL(31, 1);
102 switch (instlo) {
103 case mmSMNAID_XCD0_MCA_SMU:
104 case mmSMNAID_XCD1_MCA_SMU:
105 case mmSMNXCD_XCD0_MCA_SMU:
106 return true;
107 default:
108 break;
109 }
110
111 return false;
112}
113
114static bool aca_match_sdma_bank(struct aca_block *aca_blk, void *data)
115{
116 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
117 /* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
118 static int sdma_err_codes[] = { 33, 34, 35, 36 };
119 u32 instlo;
120 int errcode, i;
121
122 if (!aca_check_bank_hwip(bank, type: aca_blk->blk_info->hwip))
123 return false;
124
125 instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]);
126 instlo &= GENMASK_ULL(31, 1);
127 if (instlo != mmSMNAID_AID0_MCA_SMU)
128 return false;
129
130 errcode = ACA_REG_SYND_ERRORINFORMATION(bank->regs[ACA_REG_IDX__SYND]);
131 errcode &= 0xff;
132
133 /* Check SDMA error codes */
134 for (i = 0; i < ARRAY_SIZE(sdma_err_codes); i++) {
135 if (errcode == sdma_err_codes[i])
136 return true;
137 }
138
139 return false;
140}
141
142static bool aca_match_mmhub_bank(struct aca_block *aca_blk, void *data)
143{
144 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
145 /* reference to smu driver if header file */
146 const int mmhub_err_codes[] = {
147 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */
148 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */
149 10, /* CODE_UTCL2_ROUTER */
150 11, /* CODE_VML2 */
151 12, /* CODE_VML2_WALKER */
152 13, /* CODE_MMCANE */
153 };
154 u32 instlo;
155 int errcode, i;
156
157 if (!aca_check_bank_hwip(bank, type: aca_blk->blk_info->hwip))
158 return false;
159
160 instlo = ACA_REG_IPID_INSTANCEIDLO(bank->regs[ACA_REG_IDX__IPID]);
161 instlo &= GENMASK_ULL(31, 1);
162 if (instlo != mmSMNAID_AID0_MCA_SMU)
163 return false;
164
165 errcode = ACA_REG_SYND_ERRORINFORMATION(bank->regs[ACA_REG_IDX__SYND]);
166 errcode &= 0xff;
167
168 /* Check MMHUB error codes */
169 for (i = 0; i < ARRAY_SIZE(mmhub_err_codes); i++) {
170 if (errcode == mmhub_err_codes[i])
171 return true;
172 }
173
174 return false;
175}
176
177static bool aca_check_umc_de(struct ras_core_context *ras_core, uint64_t mc_umc_status)
178{
179 return (ras_core->poison_supported &&
180 ACA_REG_STATUS_VAL(mc_umc_status) &&
181 ACA_REG_STATUS_DEFERRED(mc_umc_status));
182}
183
184static bool aca_check_umc_ue(struct ras_core_context *ras_core, uint64_t mc_umc_status)
185{
186 if (aca_check_umc_de(ras_core, mc_umc_status))
187 return false;
188
189 return (ACA_REG_STATUS_VAL(mc_umc_status) &&
190 (ACA_REG_STATUS_PCC(mc_umc_status) ||
191 ACA_REG_STATUS_UC(mc_umc_status) ||
192 ACA_REG_STATUS_TCC(mc_umc_status)));
193}
194
195static bool aca_check_umc_ce(struct ras_core_context *ras_core, uint64_t mc_umc_status)
196{
197 if (aca_check_umc_de(ras_core, mc_umc_status))
198 return false;
199
200 return (ACA_REG_STATUS_VAL(mc_umc_status) &&
201 (ACA_REG_STATUS_CECC(mc_umc_status) ||
202 (ACA_REG_STATUS_UECC(mc_umc_status) &&
203 ACA_REG_STATUS_UC(mc_umc_status) == 0) ||
204 /* Identify data parity error in replay mode */
205 ((ACA_REG_STATUS_ERRORCODEEXT(mc_umc_status) == 0x5 ||
206 ACA_REG_STATUS_ERRORCODEEXT(mc_umc_status) == 0xb) &&
207 !(aca_check_umc_ue(ras_core, mc_umc_status)))));
208}
209
210static int aca_parse_umc_bank(struct ras_core_context *ras_core,
211 struct aca_block *ras_blk, void *data, void *buf)
212{
213 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
214 struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf;
215 struct aca_ecc_info bank_info;
216 uint32_t ext_error_code;
217 uint64_t status0;
218
219 status0 = bank->regs[ACA_REG_IDX__STATUS];
220 if (!ACA_REG_STATUS_VAL(status0))
221 return 0;
222
223 memset(&bank_info, 0, sizeof(bank_info));
224 aca_decode_bank_info(aca_blk: ras_blk, bank, info: &bank_info);
225 memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info));
226 ecc->bank_info.status = bank->regs[ACA_REG_IDX__STATUS];
227 ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID];
228 ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR];
229
230 ext_error_code = ACA_REG_STATUS_ERRORCODEEXT(status0);
231
232 if (aca_check_umc_de(ras_core, mc_umc_status: status0))
233 ecc->de_count = 1;
234 else if (aca_check_umc_ue(ras_core, mc_umc_status: status0))
235 ecc->ue_count = ext_error_code ?
236 1 : ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]);
237 else if (aca_check_umc_ce(ras_core, mc_umc_status: status0))
238 ecc->ce_count = ext_error_code ?
239 1 : ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]);
240
241 return 0;
242}
243
244static bool aca_check_bank_is_de(struct ras_core_context *ras_core,
245 uint64_t status)
246{
247 return (ACA_REG_STATUS_POISON(status) ||
248 ACA_REG_STATUS_DEFERRED(status));
249}
250
251static int aca_parse_bank_default(struct ras_core_context *ras_core,
252 struct aca_block *ras_blk,
253 void *data, void *buf)
254{
255 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
256 struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf;
257 struct aca_ecc_info bank_info;
258 u64 misc0 = bank->regs[ACA_REG_IDX__MISC0];
259 u64 status = bank->regs[ACA_REG_IDX__STATUS];
260
261 memset(&bank_info, 0, sizeof(bank_info));
262 aca_decode_bank_info(aca_blk: ras_blk, bank, info: &bank_info);
263 memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info));
264 ecc->bank_info.status = status;
265 ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID];
266 ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR];
267
268 if (aca_check_bank_is_de(ras_core, status)) {
269 ecc->de_count = 1;
270 } else {
271 if (bank->ecc_type == RAS_ERR_TYPE__UE)
272 ecc->ue_count = 1;
273 else if (bank->ecc_type == RAS_ERR_TYPE__CE)
274 ecc->ce_count = ACA_REG_MISC0_ERRCNT(misc0);
275 }
276
277 return 0;
278}
279
280static int aca_parse_xgmi_bank(struct ras_core_context *ras_core,
281 struct aca_block *ras_blk,
282 void *data, void *buf)
283{
284 struct aca_bank_reg *bank = (struct aca_bank_reg *)data;
285 struct aca_bank_ecc *ecc = (struct aca_bank_ecc *)buf;
286 struct aca_ecc_info bank_info;
287 u64 status, count;
288 int ext_error_code;
289
290 memset(&bank_info, 0, sizeof(bank_info));
291 aca_decode_bank_info(aca_blk: ras_blk, bank, info: &bank_info);
292 memcpy(&ecc->bank_info, &bank_info, sizeof(bank_info));
293 ecc->bank_info.status = bank->regs[ACA_REG_IDX__STATUS];
294 ecc->bank_info.ipid = bank->regs[ACA_REG_IDX__IPID];
295 ecc->bank_info.addr = bank->regs[ACA_REG_IDX__ADDR];
296
297 status = bank->regs[ACA_REG_IDX__STATUS];
298 ext_error_code = ACA_REG_STATUS_ERRORCODEEXT(status);
299
300 count = ACA_REG_MISC0_ERRCNT(bank->regs[ACA_REG_IDX__MISC0]);
301 if (bank->ecc_type == RAS_ERR_TYPE__UE) {
302 if (ext_error_code != 0 && ext_error_code != 9)
303 count = 0ULL;
304 ecc->ue_count = count;
305 } else if (bank->ecc_type == RAS_ERR_TYPE__CE) {
306 count = ext_error_code == 6 ? count : 0ULL;
307 ecc->ce_count = count;
308 }
309
310 return 0;
311}
312
313static const struct aca_block_info aca_v1_0_umc = {
314 .name = "umc",
315 .ras_block_id = RAS_BLOCK_ID__UMC,
316 .hwip = ACA_ECC_HWIP__UMC,
317 .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK | ACA_ERROR__DE_MASK,
318 .bank_ops = {
319 .bank_match = aca_match_bank_default,
320 .bank_parse = aca_parse_umc_bank,
321 },
322};
323
324static const struct aca_block_info aca_v1_0_gfx = {
325 .name = "gfx",
326 .ras_block_id = RAS_BLOCK_ID__GFX,
327 .hwip = ACA_ECC_HWIP__SMU,
328 .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK,
329 .bank_ops = {
330 .bank_match = aca_match_gfx_bank,
331 .bank_parse = aca_parse_bank_default,
332 },
333};
334
335static const struct aca_block_info aca_v1_0_sdma = {
336 .name = "sdma",
337 .ras_block_id = RAS_BLOCK_ID__SDMA,
338 .hwip = ACA_ECC_HWIP__SMU,
339 .mask = ACA_ERROR__UE_MASK,
340 .bank_ops = {
341 .bank_match = aca_match_sdma_bank,
342 .bank_parse = aca_parse_bank_default,
343 },
344};
345
346static const struct aca_block_info aca_v1_0_mmhub = {
347 .name = "mmhub",
348 .ras_block_id = RAS_BLOCK_ID__MMHUB,
349 .hwip = ACA_ECC_HWIP__SMU,
350 .mask = ACA_ERROR__UE_MASK,
351 .bank_ops = {
352 .bank_match = aca_match_mmhub_bank,
353 .bank_parse = aca_parse_bank_default,
354 },
355};
356
357static const struct aca_block_info aca_v1_0_xgmi = {
358 .name = "xgmi",
359 .ras_block_id = RAS_BLOCK_ID__XGMI_WAFL,
360 .hwip = ACA_ECC_HWIP__PCS_XGMI,
361 .mask = ACA_ERROR__UE_MASK | ACA_ERROR__CE_MASK,
362 .bank_ops = {
363 .bank_match = aca_match_bank_default,
364 .bank_parse = aca_parse_xgmi_bank,
365 },
366};
367
368static const struct aca_block_info *aca_block_info_v1_0[] = {
369 &aca_v1_0_umc,
370 &aca_v1_0_gfx,
371 &aca_v1_0_sdma,
372 &aca_v1_0_mmhub,
373 &aca_v1_0_xgmi,
374};
375
376const struct ras_aca_ip_func ras_aca_func_v1_0 = {
377 .block_num = ARRAY_SIZE(aca_block_info_v1_0),
378 .block_info = aca_block_info_v1_0,
379};
380

source code of linux/drivers/gpu/drm/amd/ras/rascore/ras_aca_v1_0.c