1/* SPDX-License-Identifier: MIT */
2/*
3 * Copyright 2025 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24#ifndef __RAS_CPER_H__
25#define __RAS_CPER_H__
26
27#define CPER_UUID_MAX_SIZE 16
28struct ras_cper_guid {
29 uint8_t b[CPER_UUID_MAX_SIZE];
30};
31
32#define CPER_GUID__INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
33 ((struct ras_cper_guid) \
34 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
35 (b) & 0xff, ((b) >> 8) & 0xff, \
36 (c) & 0xff, ((c) >> 8) & 0xff, \
37 (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
38
39#define CPER_HDR__REV_1 (0x100)
40#define CPER_SEC__MINOR_REV_1 (0x01)
41#define CPER_SEC__MAJOR_REV_22 (0x22)
42#define CPER_OAM_MAX_COUNT (8)
43
44#define CPER_CTX_TYPE__CRASH (1)
45#define CPER_CTX_TYPE__BOOT (9)
46
47#define CPER_CREATOR_ID__AMDGPU "amdgpu"
48
49#define CPER_NOTIFY__MCE \
50 CPER_GUID__INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \
51 0xE1, 0x49, 0x13, 0xBB)
52#define CPER_NOTIFY__CMC \
53 CPER_GUID__INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \
54 0xEB, 0xD4, 0xF8, 0x90)
55#define BOOT__TYPE \
56 CPER_GUID__INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \
57 0xD4, 0x64, 0xB3, 0x8F)
58
59#define GPU__CRASHDUMP \
60 CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0xB0, 0xD0, 0x73, 0x65, \
61 0x72, 0x5F, 0xD6, 0xAE)
62#define GPU__NONSTANDARD_ERROR \
63 CPER_GUID__INIT(0x32AC0C78, 0x2623, 0x48F6, 0x81, 0xA2, 0xAC, 0x69, \
64 0x17, 0x80, 0x55, 0x1D)
65#define PROC_ERR__SECTION_TYPE \
66 CPER_GUID__INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \
67 0x24, 0x2B, 0x6E, 0x1D)
68
69enum ras_cper_type {
70 RAS_CPER_TYPE_RUNTIME,
71 RAS_CPER_TYPE_FATAL,
72 RAS_CPER_TYPE_BOOT,
73 RAS_CPER_TYPE_RMA,
74};
75
76enum ras_cper_severity {
77 RAS_CPER_SEV_NON_FATAL_UE = 0,
78 RAS_CPER_SEV_FATAL_UE = 1,
79 RAS_CPER_SEV_NON_FATAL_CE = 2,
80 RAS_CPER_SEV_RMA = 3,
81
82 RAS_CPER_SEV_UNUSED = 10,
83};
84
85enum ras_cper_aca_reg {
86 RAS_CPER_ACA_REG_CTL = 0,
87 RAS_CPER_ACA_REG_STATUS = 1,
88 RAS_CPER_ACA_REG_ADDR = 2,
89 RAS_CPER_ACA_REG_MISC0 = 3,
90 RAS_CPER_ACA_REG_CONFIG = 4,
91 RAS_CPER_ACA_REG_IPID = 5,
92 RAS_CPER_ACA_REG_SYND = 6,
93 RAS_CPER_ACA_REG_DESTAT = 8,
94 RAS_CPER_ACA_REG_DEADDR = 9,
95 RAS_CPER_ACA_REG_MASK = 10,
96
97 RAS_CPER_ACA_REG_COUNT = 16,
98};
99
100#pragma pack(push, 1)
101
102struct ras_cper_timestamp {
103 uint8_t seconds;
104 uint8_t minutes;
105 uint8_t hours;
106 uint8_t flag;
107 uint8_t day;
108 uint8_t month;
109 uint8_t year;
110 uint8_t century;
111};
112
113struct cper_section_hdr {
114 char signature[4]; /* "CPER" */
115 uint16_t revision;
116 uint32_t signature_end; /* 0xFFFFFFFF */
117 uint16_t sec_cnt;
118 enum ras_cper_severity error_severity;
119 union {
120 struct {
121 uint32_t platform_id : 1;
122 uint32_t timestamp : 1;
123 uint32_t partition_id : 1;
124 uint32_t reserved : 29;
125 } valid_bits;
126 uint32_t valid_mask;
127 };
128 uint32_t record_length; /* Total size of CPER Entry */
129 struct ras_cper_timestamp timestamp;
130 char platform_id[16];
131 struct ras_cper_guid partition_id; /* Reserved */
132 char creator_id[16];
133 struct ras_cper_guid notify_type; /* CMC, MCE */
134 char record_id[8]; /* Unique CPER Entry ID */
135 uint32_t flags; /* Reserved */
136 uint64_t persistence_info; /* Reserved */
137 uint8_t reserved[12]; /* Reserved */
138};
139
140struct cper_section_descriptor {
141 uint32_t sec_offset; /* Offset from the start of CPER entry */
142 uint32_t sec_length;
143 uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */
144 uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */
145 union {
146 struct {
147 uint8_t fru_id : 1;
148 uint8_t fru_text : 1;
149 uint8_t reserved : 6;
150 } valid_bits;
151 uint8_t valid_mask;
152 };
153 uint8_t reserved;
154 union {
155 struct {
156 uint32_t primary : 1;
157 uint32_t reserved1 : 2;
158 uint32_t exceed_err_threshold : 1;
159 uint32_t latent_err : 1;
160 uint32_t reserved2 : 27;
161 } flag_bits;
162 uint32_t flag_mask;
163 };
164 struct ras_cper_guid sec_type;
165 char fru_id[16];
166 enum ras_cper_severity severity;
167 char fru_text[20];
168};
169
170struct runtime_hdr {
171 union {
172 struct {
173 uint64_t apic_id : 1;
174 uint64_t fw_id : 1;
175 uint64_t err_info_cnt : 6;
176 uint64_t err_context_cnt : 6;
177 } valid_bits;
178 uint64_t valid_mask;
179 };
180 uint64_t apic_id;
181 char fw_id[48];
182};
183
184struct runtime_descriptor {
185 struct ras_cper_guid error_type;
186 union {
187 struct {
188 uint64_t ms_chk : 1;
189 uint64_t target_addr_id : 1;
190 uint64_t req_id : 1;
191 uint64_t resp_id : 1;
192 uint64_t instr_ptr : 1;
193 uint64_t reserved : 59;
194 } valid_bits;
195 uint64_t valid_mask;
196 };
197 union {
198 struct {
199 uint64_t err_type_valid : 1;
200 uint64_t pcc_valid : 1;
201 uint64_t uncorr_valid : 1;
202 uint64_t precise_ip_valid : 1;
203 uint64_t restartable_ip_valid : 1;
204 uint64_t overflow_valid : 1;
205 uint64_t reserved1 : 10;
206 uint64_t err_type : 2;
207 uint64_t pcc : 1;
208 uint64_t uncorr : 1;
209 uint64_t precised_ip : 1;
210 uint64_t restartable_ip : 1;
211 uint64_t overflow : 1;
212 uint64_t reserved2 : 41;
213 } ms_chk_bits;
214 uint64_t ms_chk_mask;
215 };
216 uint64_t target_addr_id;
217 uint64_t req_id;
218 uint64_t resp_id;
219 uint64_t instr_ptr;
220};
221
222struct runtime_error_reg {
223 uint16_t reg_ctx_type;
224 uint16_t reg_arr_size;
225 uint32_t msr_addr;
226 uint64_t mm_reg_addr;
227 uint64_t reg_dump[RAS_CPER_ACA_REG_COUNT];
228};
229
230struct cper_section_runtime {
231 struct runtime_hdr hdr;
232 struct runtime_descriptor descriptor;
233 struct runtime_error_reg reg;
234};
235
236struct crashdump_hdr {
237 uint64_t reserved1;
238 uint64_t reserved2;
239 char fw_id[48];
240 uint64_t reserved3[8];
241};
242
243struct fatal_reg_info {
244 uint64_t status;
245 uint64_t addr;
246 uint64_t ipid;
247 uint64_t synd;
248};
249
250struct crashdump_fatal {
251 uint16_t reg_ctx_type;
252 uint16_t reg_arr_size;
253 uint32_t reserved1;
254 uint64_t reserved2;
255 struct fatal_reg_info reg;
256};
257
258struct crashdump_boot {
259 uint16_t reg_ctx_type;
260 uint16_t reg_arr_size;
261 uint32_t reserved1;
262 uint64_t reserved2;
263 uint64_t msg[CPER_OAM_MAX_COUNT];
264};
265
266struct cper_section_fatal {
267 struct crashdump_hdr hdr;
268 struct crashdump_fatal data;
269};
270
271struct cper_section_boot {
272 struct crashdump_hdr hdr;
273 struct crashdump_boot data;
274};
275
276struct ras_cper_fatal_record {
277 struct cper_section_hdr hdr;
278 struct cper_section_descriptor descriptor;
279 struct cper_section_fatal fatal;
280};
281#pragma pack(pop)
282
283#define RAS_HDR_LEN (sizeof(struct cper_section_hdr))
284#define RAS_SEC_DESC_LEN (sizeof(struct cper_sec_desc))
285
286#define RAS_BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot))
287#define RAS_FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal))
288#define RAS_NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err))
289
290#define RAS_SEC_DESC_OFFSET(idx) (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * idx))
291
292#define RAS_BOOT_SEC_OFFSET(count, idx) \
293 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_BOOT_SEC_LEN * idx))
294#define RAS_FATAL_SEC_OFFSET(count, idx) \
295 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_FATAL_SEC_LEN * idx))
296#define RAS_NONSTD_SEC_OFFSET(count, idx) \
297 (RAS_HDR_LEN + (RAS_SEC_DESC_LEN * count) + (RAS_NONSTD_SEC_LEN * idx))
298
299struct ras_core_context;
300struct ras_log_info;
301int ras_cper_generate_cper(struct ras_core_context *ras_core,
302 struct ras_log_info **trace_list, uint32_t count,
303 uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len);
304#endif
305

source code of linux/drivers/gpu/drm/amd/ras/rascore/ras_cper.h