1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright 2025 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#ifndef __RAS_ACA_H__
26#define __RAS_ACA_H__
27#include "ras.h"
28
29#define MAX_SOCKET_NUM_PER_HIVE 8
30#define MAX_AID_NUM_PER_SOCKET 4
31#define MAX_XCD_NUM_PER_AID 2
32#define MAX_ACA_RAS_BLOCK 20
33
34#define ACA_ERROR__UE_MASK (0x1 << RAS_ERR_TYPE__UE)
35#define ACA_ERROR__CE_MASK (0x1 << RAS_ERR_TYPE__CE)
36#define ACA_ERROR__DE_MASK (0x1 << RAS_ERR_TYPE__DE)
37
38enum ras_aca_reg_idx {
39 ACA_REG_IDX__CTL = 0,
40 ACA_REG_IDX__STATUS = 1,
41 ACA_REG_IDX__ADDR = 2,
42 ACA_REG_IDX__MISC0 = 3,
43 ACA_REG_IDX__CONFG = 4,
44 ACA_REG_IDX__IPID = 5,
45 ACA_REG_IDX__SYND = 6,
46 ACA_REG_IDX__DESTAT = 8,
47 ACA_REG_IDX__DEADDR = 9,
48 ACA_REG_IDX__CTL_MASK = 10,
49 ACA_REG_MAX_COUNT = 16,
50};
51
52struct ras_core_context;
53struct aca_block;
54
55struct aca_bank_reg {
56 u32 ecc_type;
57 u64 seq_no;
58 u64 regs[ACA_REG_MAX_COUNT];
59};
60
61enum aca_ecc_hwip {
62 ACA_ECC_HWIP__UNKNOWN = -1,
63 ACA_ECC_HWIP__PSP = 0,
64 ACA_ECC_HWIP__UMC,
65 ACA_ECC_HWIP__SMU,
66 ACA_ECC_HWIP__PCS_XGMI,
67 ACA_ECC_HWIP_COUNT,
68};
69
70struct aca_ecc_info {
71 int die_id;
72 int socket_id;
73 int xcd_id;
74 int hwid;
75 int mcatype;
76 uint64_t status;
77 uint64_t ipid;
78 uint64_t addr;
79};
80
81struct aca_bank_ecc {
82 struct aca_ecc_info bank_info;
83 u32 ce_count;
84 u32 ue_count;
85 u32 de_count;
86};
87
88struct aca_ecc_count {
89 u32 new_ce_count;
90 u32 total_ce_count;
91 u32 new_ue_count;
92 u32 total_ue_count;
93 u32 new_de_count;
94 u32 total_de_count;
95};
96
97struct aca_xcd_ecc {
98 struct aca_ecc_count ecc_err;
99};
100
101struct aca_aid_ecc {
102 union {
103 struct aca_xcd {
104 struct aca_xcd_ecc xcd[MAX_XCD_NUM_PER_AID];
105 u32 xcd_num;
106 } xcd;
107 struct aca_ecc_count ecc_err;
108 };
109};
110
111struct aca_socket_ecc {
112 struct aca_aid_ecc aid[MAX_AID_NUM_PER_SOCKET];
113 u32 aid_num;
114};
115
116struct aca_block_ecc {
117 struct aca_socket_ecc socket[MAX_SOCKET_NUM_PER_HIVE];
118 u32 socket_num_per_hive;
119};
120
121struct aca_bank_hw_ops {
122 bool (*bank_match)(struct aca_block *ras_blk, void *data);
123 int (*bank_parse)(struct ras_core_context *ras_core,
124 struct aca_block *aca_blk, void *data, void *buf);
125};
126
127struct aca_block_info {
128 char name[32];
129 u32 ras_block_id;
130 enum aca_ecc_hwip hwip;
131 struct aca_bank_hw_ops bank_ops;
132 u32 mask;
133};
134
135struct aca_block {
136 const struct aca_block_info *blk_info;
137 struct aca_block_ecc ecc;
138};
139
140struct ras_aca_ip_func {
141 uint32_t block_num;
142 const struct aca_block_info **block_info;
143};
144
145struct ras_aca {
146 uint32_t aca_ip_version;
147 const struct ras_aca_ip_func *ip_func;
148 struct mutex aca_lock;
149 struct mutex bank_op_lock;
150 struct aca_block aca_blk[MAX_ACA_RAS_BLOCK];
151 uint32_t ue_updated_mark;
152};
153
154int ras_aca_sw_init(struct ras_core_context *ras_core);
155int ras_aca_sw_fini(struct ras_core_context *ras_core);
156int ras_aca_hw_init(struct ras_core_context *ras_core);
157int ras_aca_hw_fini(struct ras_core_context *ras_core);
158int ras_aca_get_block_ecc_count(struct ras_core_context *ras_core, u32 blk, void *data);
159int ras_aca_clear_block_new_ecc_count(struct ras_core_context *ras_core, u32 blk);
160int ras_aca_clear_all_blocks_ecc_count(struct ras_core_context *ras_core);
161int ras_aca_update_ecc(struct ras_core_context *ras_core, u32 ecc_type, void *data);
162void ras_aca_mark_fatal_flag(struct ras_core_context *ras_core);
163void ras_aca_clear_fatal_flag(struct ras_core_context *ras_core);
164#endif
165

source code of linux/drivers/gpu/drm/amd/ras/rascore/ras_aca.h