| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* |
| 3 | * Copyright © 2023 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #ifndef _XE_DEVCOREDUMP_TYPES_H_ |
| 7 | #define _XE_DEVCOREDUMP_TYPES_H_ |
| 8 | |
| 9 | #include <linux/ktime.h> |
| 10 | #include <linux/mutex.h> |
| 11 | |
| 12 | #include "xe_hw_engine_types.h" |
| 13 | |
| 14 | struct xe_device; |
| 15 | struct xe_gt; |
| 16 | |
| 17 | /** |
| 18 | * struct xe_devcoredump_snapshot - Crash snapshot |
| 19 | * |
| 20 | * This struct contains all the useful information quickly captured at the time |
| 21 | * of the crash. So, any subsequent reads of the coredump points to a data that |
| 22 | * shows the state of the GPU of when the issue has happened. |
| 23 | */ |
| 24 | struct xe_devcoredump_snapshot { |
| 25 | /** @snapshot_time: Time of this capture. */ |
| 26 | ktime_t snapshot_time; |
| 27 | /** @boot_time: Relative boot time so the uptime can be calculated. */ |
| 28 | ktime_t boot_time; |
| 29 | /** @process_name: Name of process that triggered this gpu hang */ |
| 30 | char process_name[TASK_COMM_LEN]; |
| 31 | /** @pid: Process id of process that triggered this gpu hang */ |
| 32 | pid_t pid; |
| 33 | /** @reason: The reason the coredump was triggered */ |
| 34 | char *reason; |
| 35 | |
| 36 | /** @gt: Affected GT, used by forcewake for delayed capture */ |
| 37 | struct xe_gt *gt; |
| 38 | /** @work: Workqueue for deferred capture outside of signaling context */ |
| 39 | struct work_struct work; |
| 40 | |
| 41 | /** @guc: GuC snapshots */ |
| 42 | struct { |
| 43 | /** @guc.ct: GuC CT snapshot */ |
| 44 | struct xe_guc_ct_snapshot *ct; |
| 45 | /** @guc.log: GuC log snapshot */ |
| 46 | struct xe_guc_log_snapshot *log; |
| 47 | } guc; |
| 48 | |
| 49 | /** @ge: GuC Submission Engine snapshot */ |
| 50 | struct xe_guc_submit_exec_queue_snapshot *ge; |
| 51 | |
| 52 | /** @hwe: HW Engine snapshot array */ |
| 53 | struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; |
| 54 | /** @job: Snapshot of job state */ |
| 55 | struct xe_sched_job_snapshot *job; |
| 56 | /** |
| 57 | * @matched_node: The matched capture node for timedout job |
| 58 | * this single-node tracker works because devcoredump will always only |
| 59 | * produce one hw-engine capture per devcoredump event |
| 60 | */ |
| 61 | struct __guc_capture_parsed_output *matched_node; |
| 62 | /** @vm: Snapshot of VM state */ |
| 63 | struct xe_vm_snapshot *vm; |
| 64 | |
| 65 | /** @read: devcoredump in human readable format */ |
| 66 | struct { |
| 67 | /** @read.size: size of devcoredump in human readable format */ |
| 68 | ssize_t size; |
| 69 | /** @read.chunk_position: position of devcoredump chunk */ |
| 70 | ssize_t chunk_position; |
| 71 | /** @read.buffer: buffer of devcoredump in human readable format */ |
| 72 | char *buffer; |
| 73 | } read; |
| 74 | }; |
| 75 | |
| 76 | /** |
| 77 | * struct xe_devcoredump - Xe devcoredump main structure |
| 78 | * |
| 79 | * This struct represents the live and active dev_coredump node. |
| 80 | * It is created/populated at the time of a crash/error. Then it |
| 81 | * is read later when user access the device coredump data file |
| 82 | * for reading the information. |
| 83 | */ |
| 84 | struct xe_devcoredump { |
| 85 | /** @lock: protects access to entire structure */ |
| 86 | struct mutex lock; |
| 87 | /** @captured: The snapshot of the first hang has already been taken */ |
| 88 | bool captured; |
| 89 | /** @snapshot: Snapshot is captured at time of the first crash */ |
| 90 | struct xe_devcoredump_snapshot snapshot; |
| 91 | }; |
| 92 | |
| 93 | #endif |
| 94 | |