amdgpu_device.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c]

1	/*
2	* Copyright 2008 Advanced Micro Devices, Inc.
3	* Copyright 2008 Red Hat Inc.
4	* Copyright 2009 Jerome Glisse.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),
8	* to deal in the Software without restriction, including without limitation
9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10	* and/or sell copies of the Software, and to permit persons to whom the
11	* Software is furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22	* OTHER DEALINGS IN THE SOFTWARE.
23	*
24	* Authors: Dave Airlie
25	* Alex Deucher
26	* Jerome Glisse
27	*/
28
29	#include <linux/aperture.h>
30	#include <linux/power_supply.h>
31	#include <linux/kthread.h>
32	#include <linux/module.h>
33	#include <linux/console.h>
34	#include <linux/slab.h>
35	#include <linux/iommu.h>
36	#include <linux/pci.h>
37	#include <linux/pci-p2pdma.h>
38	#include <linux/apple-gmux.h>
39
40	#include <drm/drm_atomic_helper.h>
41	#include <drm/drm_client_event.h>
42	#include <drm/drm_crtc_helper.h>
43	#include <drm/drm_probe_helper.h>
44	#include <drm/amdgpu_drm.h>
45	#include <linux/device.h>
46	#include <linux/vgaarb.h>
47	#include <linux/vga_switcheroo.h>
48	#include <linux/efi.h>
49	#include "amdgpu.h"
50	#include "amdgpu_trace.h"
51	#include "amdgpu_i2c.h"
52	#include "atom.h"
53	#include "amdgpu_atombios.h"
54	#include "amdgpu_atomfirmware.h"
55	#include "amd_pcie.h"
56	#ifdef CONFIG_DRM_AMDGPU_SI
57	#include "si.h"
58	#endif
59	#ifdef CONFIG_DRM_AMDGPU_CIK
60	#include "cik.h"
61	#endif
62	#include "vi.h"
63	#include "soc15.h"
64	#include "nv.h"
65	#include "bif/bif_4_1_d.h"
66	#include <linux/firmware.h>
67	#include "amdgpu_vf_error.h"
68
69	#include "amdgpu_amdkfd.h"
70	#include "amdgpu_pm.h"
71
72	#include "amdgpu_xgmi.h"
73	#include "amdgpu_ras.h"
74	#include "amdgpu_ras_mgr.h"
75	#include "amdgpu_pmu.h"
76	#include "amdgpu_fru_eeprom.h"
77	#include "amdgpu_reset.h"
78	#include "amdgpu_virt.h"
79	#include "amdgpu_dev_coredump.h"
80
81	#include <linux/suspend.h>
82	#include <drm/task_barrier.h>
83	#include <linux/pm_runtime.h>
84
85	#include <drm/drm_drv.h>
86
87	#if IS_ENABLED(CONFIG_X86)
88	#include <asm/intel-family.h>
89	#include <asm/cpu_device_id.h>
90	#endif
91
92	MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
93	MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
94	MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
95	MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
96	MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
97	MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
98	MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
99	MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
100
101	#define AMDGPU_RESUME_MS 2000
102	#define AMDGPU_MAX_RETRY_LIMIT 2
103	#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY \|\| (r) == -ETIMEDOUT \|\| (r) == -EINVAL)
104	#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
105	#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
106	#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
107
108	#define AMDGPU_VBIOS_SKIP (1U << 0)
109	#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
110
111	static const struct drm_driver amdgpu_kms_driver;
112
113	const char *amdgpu_asic_name[] = {
114	"TAHITI",
115	"PITCAIRN",
116	"VERDE",
117	"OLAND",
118	"HAINAN",
119	"BONAIRE",
120	"KAVERI",
121	"KABINI",
122	"HAWAII",
123	"MULLINS",
124	"TOPAZ",
125	"TONGA",
126	"FIJI",
127	"CARRIZO",
128	"STONEY",
129	"POLARIS10",
130	"POLARIS11",
131	"POLARIS12",
132	"VEGAM",
133	"VEGA10",
134	"VEGA12",
135	"VEGA20",
136	"RAVEN",
137	"ARCTURUS",
138	"RENOIR",
139	"ALDEBARAN",
140	"NAVI10",
141	"CYAN_SKILLFISH",
142	"NAVI14",
143	"NAVI12",
144	"SIENNA_CICHLID",
145	"NAVY_FLOUNDER",
146	"VANGOGH",
147	"DIMGREY_CAVEFISH",
148	"BEIGE_GOBY",
149	"YELLOW_CARP",
150	"IP DISCOVERY",
151	"LAST",
152	};
153
154	#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
155	/*
156	* Default init level where all blocks are expected to be initialized. This is
157	* the level of initialization expected by default and also after a full reset
158	* of the device.
159	*/
160	struct amdgpu_init_level amdgpu_init_default = {
161	.level = AMDGPU_INIT_LEVEL_DEFAULT,
162	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
163	};
164
165	struct amdgpu_init_level amdgpu_init_recovery = {
166	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
167	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
168	};
169
170	/*
171	* Minimal blocks needed to be initialized before a XGMI hive can be reset. This
172	* is used for cases like reset on initialization where the entire hive needs to
173	* be reset before first use.
174	*/
175	struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
176	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
177	.hwini_ip_block_mask =
178	BIT(AMD_IP_BLOCK_TYPE_GMC) \| BIT(AMD_IP_BLOCK_TYPE_SMC) \|
179	BIT(AMD_IP_BLOCK_TYPE_COMMON) \| BIT(AMD_IP_BLOCK_TYPE_IH) \|
180	BIT(AMD_IP_BLOCK_TYPE_PSP)
181	};
182
183	static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
184	static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
185	static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
186
187	static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
188
189	static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
190	enum amd_ip_block_type block)
191	{
192	return (adev->init_lvl->hwini_ip_block_mask & (`1U` << block)) != `0`;
193	}
194
195	void amdgpu_set_init_level(struct amdgpu_device *adev,
196	enum amdgpu_init_lvl_id lvl)
197	{
198	switch (lvl) {
199	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
200	adev->init_lvl = &amdgpu_init_minimal_xgmi;
201	break;
202	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
203	adev->init_lvl = &amdgpu_init_recovery;
204	break;
205	case AMDGPU_INIT_LEVEL_DEFAULT:
206	fallthrough;
207	default:
208	adev->init_lvl = &amdgpu_init_default;
209	break;
210	}
211	}
212
213	static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
214	static int amdgpu_device_pm_notifier(struct notifier_block nb, unsigned* long mode,
215	void *data);
216
217	/**
218	* DOC: pcie_replay_count
219	*
220	* The amdgpu driver provides a sysfs API for reporting the total number
221	* of PCIe replays (NAKs).
222	* The file pcie_replay_count is used for this and returns the total
223	* number of replays as a sum of the NAKs generated and NAKs received.
224	*/
225
226	static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
227	struct device_attribute attr, char* *buf)
228	{
229	struct drm_device *ddev = dev_get_drvdata(dev);
230	struct amdgpu_device *adev = drm_to_adev(ddev);
231	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
232
233	return sysfs_emit(buf, fmt: "%llu\n", cnt);
234	}
235
236	static DEVICE_ATTR(pcie_replay_count, `0444`,
237	amdgpu_device_get_pcie_replay_count, NULL);
238
239	static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
240	{
241	int ret = `0`;
242
243	if (amdgpu_nbio_is_replay_cnt_supported(adev))
244	ret = sysfs_create_file(kobj: &adev->dev->kobj,
245	attr: &dev_attr_pcie_replay_count.attr);
246
247	return ret;
248	}
249
250	static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
251	{
252	if (amdgpu_nbio_is_replay_cnt_supported(adev))
253	sysfs_remove_file(kobj: &adev->dev->kobj,
254	attr: &dev_attr_pcie_replay_count.attr);
255	}
256
257	static ssize_t amdgpu_sysfs_reg_state_get(struct file f, struct* kobject *kobj,
258	const struct bin_attribute attr, char* *buf,
259	loff_t ppos, size_t count)
260	{
261	struct device *dev = kobj_to_dev(kobj);
262	struct drm_device *ddev = dev_get_drvdata(dev);
263	struct amdgpu_device *adev = drm_to_adev(ddev);
264	ssize_t bytes_read;
265
266	switch (ppos) {
267	case AMDGPU_SYS_REG_STATE_XGMI:
268	bytes_read = amdgpu_asic_get_reg_state(
269	adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
270	break;
271	case AMDGPU_SYS_REG_STATE_WAFL:
272	bytes_read = amdgpu_asic_get_reg_state(
273	adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
274	break;
275	case AMDGPU_SYS_REG_STATE_PCIE:
276	bytes_read = amdgpu_asic_get_reg_state(
277	adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
278	break;
279	case AMDGPU_SYS_REG_STATE_USR:
280	bytes_read = amdgpu_asic_get_reg_state(
281	adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
282	break;
283	case AMDGPU_SYS_REG_STATE_USR_1:
284	bytes_read = amdgpu_asic_get_reg_state(
285	adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
286	break;
287	default:
288	return -EINVAL;
289	}
290
291	return bytes_read;
292	}
293
294	static const BIN_ATTR(reg_state, `0444`, amdgpu_sysfs_reg_state_get, NULL,
295	AMDGPU_SYS_REG_STATE_END);
296
297	int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
298	{
299	int ret;
300
301	if (!amdgpu_asic_get_reg_state_supported(adev))
302	return `0`;
303
304	ret = sysfs_create_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state);
305
306	return ret;
307	}
308
309	void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
310	{
311	if (!amdgpu_asic_get_reg_state_supported(adev))
312	return;
313	sysfs_remove_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state);
314	}
315
316	int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
317	{
318	int r;
319
320	if (ip_block->version->funcs->suspend) {
321	r = ip_block->version->funcs->suspend(ip_block);
322	if (r) {
323	dev_err(ip_block->adev->dev,
324	"suspend of IP block <%s> failed %d\n",
325	ip_block->version->funcs->name, r);
326	return r;
327	}
328	}
329
330	ip_block->status.hw = false;
331	return `0`;
332	}
333
334	int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
335	{
336	int r;
337
338	if (ip_block->version->funcs->resume) {
339	r = ip_block->version->funcs->resume(ip_block);
340	if (r) {
341	dev_err(ip_block->adev->dev,
342	"resume of IP block <%s> failed %d\n",
343	ip_block->version->funcs->name, r);
344	return r;
345	}
346	}
347
348	ip_block->status.hw = true;
349	return `0`;
350	}
351
352	/**
353	* DOC: board_info
354	*
355	* The amdgpu driver provides a sysfs API for giving board related information.
356	* It provides the form factor information in the format
357	*
358	* type : form factor
359	*
360	* Possible form factor values
361	*
362	* - "cem" - PCIE CEM card
363	* - "oam" - Open Compute Accelerator Module
364	* - "unknown" - Not known
365	*
366	*/
367
368	static ssize_t amdgpu_device_get_board_info(struct device *dev,
369	struct device_attribute *attr,
370	char *buf)
371	{
372	struct drm_device *ddev = dev_get_drvdata(dev);
373	struct amdgpu_device *adev = drm_to_adev(ddev);
374	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
375	const char *pkg;
376
377	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
378	pkg_type = adev->smuio.funcs->get_pkg_type(adev);
379
380	switch (pkg_type) {
381	case AMDGPU_PKG_TYPE_CEM:
382	pkg = "cem";
383	break;
384	case AMDGPU_PKG_TYPE_OAM:
385	pkg = "oam";
386	break;
387	default:
388	pkg = "unknown";
389	break;
390	}
391
392	return sysfs_emit(buf, fmt: "%s : %s\n", "type", pkg);
393	}
394
395	static DEVICE_ATTR(board_info, `0444`, amdgpu_device_get_board_info, NULL);
396
397	static struct attribute *amdgpu_board_attrs[] = {
398	&dev_attr_board_info.attr,
399	NULL,
400	};
401
402	static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
403	struct attribute attr, int* n)
404	{
405	struct device *dev = kobj_to_dev(kobj);
406	struct drm_device *ddev = dev_get_drvdata(dev);
407	struct amdgpu_device *adev = drm_to_adev(ddev);
408
409	if (adev->flags & AMD_IS_APU)
410	return `0`;
411
412	return attr->mode;
413	}
414
415	static const struct attribute_group amdgpu_board_attrs_group = {
416	.attrs = amdgpu_board_attrs,
417	.is_visible = amdgpu_board_attrs_is_visible
418	};
419
420	static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
421
422	/**
423	* amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
424	*
425	* @adev: amdgpu device pointer
426	*
427	* Returns true if the device is a dGPU with ATPX power control,
428	* otherwise return false.
429	*/
430	bool amdgpu_device_supports_px(struct amdgpu_device *adev)
431	{
432	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
433	return true;
434	return false;
435	}
436
437	/**
438	* amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
439	*
440	* @adev: amdgpu device pointer
441	*
442	* Returns true if the device is a dGPU with ACPI power control,
443	* otherwise return false.
444	*/
445	bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
446	{
447	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
448	return false;
449
450	if (adev->has_pr3 \|\|
451	((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
452	return true;
453	return false;
454	}
455
456	/**
457	* amdgpu_device_supports_baco - Does the device support BACO
458	*
459	* @adev: amdgpu device pointer
460	*
461	* Return:
462	* 1 if the device supports BACO;
463	* 3 if the device supports MACO (only works if BACO is supported)
464	* otherwise return 0.
465	*/
466	int amdgpu_device_supports_baco(struct amdgpu_device *adev)
467	{
468	return amdgpu_asic_supports_baco(adev);
469	}
470
471	void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
472	{
473	int bamaco_support;
474
475	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
476	bamaco_support = amdgpu_device_supports_baco(adev);
477
478	switch (amdgpu_runtime_pm) {
479	case `2`:
480	if (bamaco_support & MACO_SUPPORT) {
481	adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
482	dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
483	} else if (bamaco_support == BACO_SUPPORT) {
484	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
485	dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
486	}
487	break;
488	case `1`:
489	if (bamaco_support & BACO_SUPPORT) {
490	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
491	dev_info(adev->dev, "Forcing BACO for runtime pm\n");
492	}
493	break;
494	case -`1`:
495	case -`2`:
496	if (amdgpu_device_supports_px(adev)) {
497	/ enable PX as runtime mode /
498	adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
499	dev_info(adev->dev, "Using ATPX for runtime pm\n");
500	} else if (amdgpu_device_supports_boco(adev)) {
501	/ enable boco as runtime mode /
502	adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
503	dev_info(adev->dev, "Using BOCO for runtime pm\n");
504	} else {
505	if (!bamaco_support)
506	goto no_runtime_pm;
507
508	switch (adev->asic_type) {
509	case CHIP_VEGA20:
510	case CHIP_ARCTURUS:
511	/ BACO are not supported on vega20 and arctrus /
512	break;
513	case CHIP_VEGA10:
514	/ enable BACO as runpm mode if noretry=0 /
515	if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
516	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
517	break;
518	default:
519	/ enable BACO as runpm mode on CI+ /
520	if (!amdgpu_passthrough(adev))
521	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
522	break;
523	}
524
525	if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
526	if (bamaco_support & MACO_SUPPORT) {
527	adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
528	dev_info(adev->dev, "Using BAMACO for runtime pm\n");
529	} else {
530	dev_info(adev->dev, "Using BACO for runtime pm\n");
531	}
532	}
533	}
534	break;
535	case `0`:
536	dev_info(adev->dev, "runtime pm is manually disabled\n");
537	break;
538	default:
539	break;
540	}
541
542	no_runtime_pm:
543	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
544	dev_info(adev->dev, "Runtime PM not available\n");
545	}
546	/**
547	* amdgpu_device_supports_smart_shift - Is the device dGPU with
548	* smart shift support
549	*
550	* @adev: amdgpu device pointer
551	*
552	* Returns true if the device is a dGPU with Smart Shift support,
553	* otherwise returns false.
554	*/
555	bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
556	{
557	return (amdgpu_device_supports_boco(adev) &&
558	amdgpu_acpi_is_power_shift_control_supported());
559	}
560
561	/*
562	* VRAM access helper functions
563	*/
564
565	/**
566	* amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
567	*
568	* @adev: amdgpu_device pointer
569	* @pos: offset of the buffer in vram
570	* @buf: virtual address of the buffer in system memory
571	* @size: read/write size, sizeof(@buf) must > @size
572	* @write: true - write to vram, otherwise - read from vram
573	*/
574	void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
575	void *buf, size_t size, bool write)
576	{
577	unsigned long flags;
578	uint32_t hi = ~`0`, tmp = `0`;
579	uint32_t *data = buf;
580	uint64_t last;
581	int idx;
582
583	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
584	return;
585
586	BUG_ON(!IS_ALIGNED(pos, `4`) \|\| !IS_ALIGNED(size, `4`));
587
588	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
589	for (last = pos + size; pos < last; pos += `4`) {
590	tmp = pos >> `31`;
591
592	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) \| `0x80000000`);
593	if (tmp != hi) {
594	WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
595	hi = tmp;
596	}
597	if (write)
598	WREG32_NO_KIQ(mmMM_DATA, *data++);
599	else
600	*data++ = RREG32_NO_KIQ(mmMM_DATA);
601	}
602
603	spin_unlock_irqrestore(lock: &adev->mmio_idx_lock, flags);
604	drm_dev_exit(idx);
605	}
606
607	/**
608	* amdgpu_device_aper_access - access vram by vram aperture
609	*
610	* @adev: amdgpu_device pointer
611	* @pos: offset of the buffer in vram
612	* @buf: virtual address of the buffer in system memory
613	* @size: read/write size, sizeof(@buf) must > @size
614	* @write: true - write to vram, otherwise - read from vram
615	*
616	* The return value means how many bytes have been transferred.
617	*/
618	size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
619	void *buf, size_t size, bool write)
620	{
621	#ifdef CONFIG_64BIT
622	void __iomem *addr;
623	size_t count = `0`;
624	uint64_t last;
625
626	if (!adev->mman.aper_base_kaddr)
627	return `0`;
628
629	last = min(pos + size, adev->gmc.visible_vram_size);
630	if (last > pos) {
631	addr = adev->mman.aper_base_kaddr + pos;
632	count = last - pos;
633
634	if (write) {
635	memcpy_toio(addr, buf, count);
636	/ Make sure HDP write cache flush happens without any reordering*
637	* after the system memory contents are sent over PCIe device
638	*/
639	mb();
640	amdgpu_device_flush_hdp(adev, NULL);
641	} else {
642	amdgpu_device_invalidate_hdp(adev, NULL);
643	/ Make sure HDP read cache is invalidated before issuing a read*
644	* to the PCIe device
645	*/
646	mb();
647	memcpy_fromio(buf, addr, count);
648	}
649
650	}
651
652	return count;
653	#else
654	return `0`;
655	#endif
656	}
657
658	/**
659	* amdgpu_device_vram_access - read/write a buffer in vram
660	*
661	* @adev: amdgpu_device pointer
662	* @pos: offset of the buffer in vram
663	* @buf: virtual address of the buffer in system memory
664	* @size: read/write size, sizeof(@buf) must > @size
665	* @write: true - write to vram, otherwise - read from vram
666	*/
667	void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
668	void *buf, size_t size, bool write)
669	{
670	size_t count;
671
672	/ try to using vram apreature to access vram first /
673	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
674	size -= count;
675	if (size) {
676	/ using MM to access rest vram /
677	pos += count;
678	buf += count;
679	amdgpu_device_mm_access(adev, pos, buf, size, write);
680	}
681	}
682
683	/*
684	* register access helper functions.
685	*/
686
687	/ Check if hw access should be skipped because of hotplug or device error /
688	bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
689	{
690	if (adev->no_hw_access)
691	return true;
692
693	#ifdef CONFIG_LOCKDEP
694	/*
695	* This is a bit complicated to understand, so worth a comment. What we assert
696	* here is that the GPU reset is not running on another thread in parallel.
697	*
698	* For this we trylock the read side of the reset semaphore, if that succeeds
699	* we know that the reset is not running in parallel.
700	*
701	* If the trylock fails we assert that we are either already holding the read
702	* side of the lock or are the reset thread itself and hold the write side of
703	* the lock.
704	*/
705	if (in_task()) {
706	if (down_read_trylock(sem: &adev->reset_domain->sem))
707	up_read(sem: &adev->reset_domain->sem);
708	else
709	lockdep_assert_held(&adev->reset_domain->sem);
710	}
711	#endif
712	return false;
713	}
714
715	/**
716	* amdgpu_device_rreg - read a memory mapped IO or indirect register
717	*
718	* @adev: amdgpu_device pointer
719	* @reg: dword aligned register offset
720	* @acc_flags: access flags which require special behavior
721	*
722	* Returns the 32 bit value from the offset specified.
723	*/
724	uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
725	uint32_t reg, uint32_t acc_flags)
726	{
727	uint32_t ret;
728
729	if (amdgpu_device_skip_hw_access(adev))
730	return `0`;
731
732	if ((reg * `4`) < adev->rmmio_size) {
733	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
734	amdgpu_sriov_runtime(adev) &&
735	down_read_trylock(sem: &adev->reset_domain->sem)) {
736	ret = amdgpu_kiq_rreg(adev, reg, xcc_id: `0`);
737	up_read(sem: &adev->reset_domain->sem);
738	} else {
739	ret = readl(addr: ((void __iomem )adev->rmmio) + (reg `4`));
740	}
741	} else {
742	ret = adev->pcie_rreg(adev, reg * `4`);
743	}
744
745	trace_amdgpu_device_rreg(did: adev->pdev->device, reg, value: ret);
746
747	return ret;
748	}
749
750	/*
751	* MMIO register read with bytes helper functions
752	* @offset:bytes offset from MMIO start
753	*/
754
755	/**
756	* amdgpu_mm_rreg8 - read a memory mapped IO register
757	*
758	* @adev: amdgpu_device pointer
759	* @offset: byte aligned register offset
760	*
761	* Returns the 8 bit value from the offset specified.
762	*/
763	uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
764	{
765	if (amdgpu_device_skip_hw_access(adev))
766	return `0`;
767
768	if (offset < adev->rmmio_size)
769	return (readb(addr: adev->rmmio + offset));
770	BUG();
771	}
772
773
774	/**
775	* amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
776	*
777	* @adev: amdgpu_device pointer
778	* @reg: dword aligned register offset
779	* @acc_flags: access flags which require special behavior
780	* @xcc_id: xcc accelerated compute core id
781	*
782	* Returns the 32 bit value from the offset specified.
783	*/
784	uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
785	uint32_t reg, uint32_t acc_flags,
786	uint32_t xcc_id)
787	{
788	uint32_t ret, rlcg_flag;
789
790	if (amdgpu_device_skip_hw_access(adev))
791	return `0`;
792
793	if ((reg * `4`) < adev->rmmio_size) {
794	if (amdgpu_sriov_vf(adev) &&
795	!amdgpu_sriov_runtime(adev) &&
796	adev->gfx.rlc.rlcg_reg_access_supported &&
797	amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
798	hwip: GC_HWIP, write: false,
799	rlcg_flag: &rlcg_flag)) {
800	ret = amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v: `0`, flag: rlcg_flag, GET_INST(GC, xcc_id));
801	} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
802	amdgpu_sriov_runtime(adev) &&
803	down_read_trylock(sem: &adev->reset_domain->sem)) {
804	ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
805	up_read(sem: &adev->reset_domain->sem);
806	} else {
807	ret = readl(addr: ((void __iomem )adev->rmmio) + (reg `4`));
808	}
809	} else {
810	ret = adev->pcie_rreg(adev, reg * `4`);
811	}
812
813	return ret;
814	}
815
816	/*
817	* MMIO register write with bytes helper functions
818	* @offset:bytes offset from MMIO start
819	* @value: the value want to be written to the register
820	*/
821
822	/**
823	* amdgpu_mm_wreg8 - read a memory mapped IO register
824	*
825	* @adev: amdgpu_device pointer
826	* @offset: byte aligned register offset
827	* @value: 8 bit value to write
828	*
829	* Writes the value specified to the offset specified.
830	*/
831	void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
832	{
833	if (amdgpu_device_skip_hw_access(adev))
834	return;
835
836	if (offset < adev->rmmio_size)
837	writeb(val: value, addr: adev->rmmio + offset);
838	else
839	BUG();
840	}
841
842	/**
843	* amdgpu_device_wreg - write to a memory mapped IO or indirect register
844	*
845	* @adev: amdgpu_device pointer
846	* @reg: dword aligned register offset
847	* @v: 32 bit value to write to the register
848	* @acc_flags: access flags which require special behavior
849	*
850	* Writes the value specified to the offset specified.
851	*/
852	void amdgpu_device_wreg(struct amdgpu_device *adev,
853	uint32_t reg, uint32_t v,
854	uint32_t acc_flags)
855	{
856	if (amdgpu_device_skip_hw_access(adev))
857	return;
858
859	if ((reg * `4`) < adev->rmmio_size) {
860	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
861	amdgpu_sriov_runtime(adev) &&
862	down_read_trylock(sem: &adev->reset_domain->sem)) {
863	amdgpu_kiq_wreg(adev, reg, v, xcc_id: `0`);
864	up_read(sem: &adev->reset_domain->sem);
865	} else {
866	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
867	}
868	} else {
869	adev->pcie_wreg(adev, reg * `4`, v);
870	}
871
872	trace_amdgpu_device_wreg(did: adev->pdev->device, reg, value: v);
873	}
874
875	/**
876	* amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
877	*
878	* @adev: amdgpu_device pointer
879	* @reg: mmio/rlc register
880	* @v: value to write
881	* @xcc_id: xcc accelerated compute core id
882	*
883	* this function is invoked only for the debugfs register access
884	*/
885	void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
886	uint32_t reg, uint32_t v,
887	uint32_t xcc_id)
888	{
889	if (amdgpu_device_skip_hw_access(adev))
890	return;
891
892	if (amdgpu_sriov_fullaccess(adev) &&
893	adev->gfx.rlc.funcs &&
894	adev->gfx.rlc.funcs->is_rlcg_access_range) {
895	if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
896	return amdgpu_sriov_wreg(adev, offset: reg, value: v, acc_flags: `0`, hwip: `0`, xcc_id);
897	} else if ((reg * `4`) >= adev->rmmio_size) {
898	adev->pcie_wreg(adev, reg * `4`, v);
899	} else {
900	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
901	}
902	}
903
904	/**
905	* amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
906	*
907	* @adev: amdgpu_device pointer
908	* @reg: dword aligned register offset
909	* @v: 32 bit value to write to the register
910	* @acc_flags: access flags which require special behavior
911	* @xcc_id: xcc accelerated compute core id
912	*
913	* Writes the value specified to the offset specified.
914	*/
915	void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
916	uint32_t reg, uint32_t v,
917	uint32_t acc_flags, uint32_t xcc_id)
918	{
919	uint32_t rlcg_flag;
920
921	if (amdgpu_device_skip_hw_access(adev))
922	return;
923
924	if ((reg * `4`) < adev->rmmio_size) {
925	if (amdgpu_sriov_vf(adev) &&
926	!amdgpu_sriov_runtime(adev) &&
927	adev->gfx.rlc.rlcg_reg_access_supported &&
928	amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
929	hwip: GC_HWIP, write: true,
930	rlcg_flag: &rlcg_flag)) {
931	amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v, flag: rlcg_flag, GET_INST(GC, xcc_id));
932	} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
933	amdgpu_sriov_runtime(adev) &&
934	down_read_trylock(sem: &adev->reset_domain->sem)) {
935	amdgpu_kiq_wreg(adev, reg, v, xcc_id);
936	up_read(sem: &adev->reset_domain->sem);
937	} else {
938	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
939	}
940	} else {
941	adev->pcie_wreg(adev, reg * `4`, v);
942	}
943	}
944
945	/**
946	* amdgpu_device_indirect_rreg - read an indirect register
947	*
948	* @adev: amdgpu_device pointer
949	* @reg_addr: indirect register address to read from
950	*
951	* Returns the value of indirect register @reg_addr
952	*/
953	u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
954	u32 reg_addr)
955	{
956	unsigned long flags, pcie_index, pcie_data;
957	void __iomem *pcie_index_offset;
958	void __iomem *pcie_data_offset;
959	u32 r;
960
961	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
962	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
963
964	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
965	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
966	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
967
968	writel(val: reg_addr, addr: pcie_index_offset);
969	readl(addr: pcie_index_offset);
970	r = readl(addr: pcie_data_offset);
971	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
972
973	return r;
974	}
975
976	u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
977	u64 reg_addr)
978	{
979	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
980	u32 r;
981	void __iomem *pcie_index_offset;
982	void __iomem *pcie_index_hi_offset;
983	void __iomem *pcie_data_offset;
984
985	if (unlikely(!adev->nbio.funcs)) {
986	pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
987	pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
988	} else {
989	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
990	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
991	}
992
993	if (reg_addr >> `32`) {
994	if (unlikely(!adev->nbio.funcs))
995	pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
996	else
997	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
998	} else {
999	pcie_index_hi = `0`;
1000	}
1001
1002	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1003	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1004	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1005	if (pcie_index_hi != `0`)
1006	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1007	pcie_index_hi * `4`;
1008
1009	writel(val: reg_addr, addr: pcie_index_offset);
1010	readl(addr: pcie_index_offset);
1011	if (pcie_index_hi != `0`) {
1012	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1013	readl(addr: pcie_index_hi_offset);
1014	}
1015	r = readl(addr: pcie_data_offset);
1016
1017	/ clear the high bits /
1018	if (pcie_index_hi != `0`) {
1019	writel(val: `0`, addr: pcie_index_hi_offset);
1020	readl(addr: pcie_index_hi_offset);
1021	}
1022
1023	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1024
1025	return r;
1026	}
1027
1028	/**
1029	* amdgpu_device_indirect_rreg64 - read a 64bits indirect register
1030	*
1031	* @adev: amdgpu_device pointer
1032	* @reg_addr: indirect register address to read from
1033	*
1034	* Returns the value of indirect register @reg_addr
1035	*/
1036	u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1037	u32 reg_addr)
1038	{
1039	unsigned long flags, pcie_index, pcie_data;
1040	void __iomem *pcie_index_offset;
1041	void __iomem *pcie_data_offset;
1042	u64 r;
1043
1044	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1045	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1046
1047	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1048	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1049	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1050
1051	/ read low 32 bits /
1052	writel(val: reg_addr, addr: pcie_index_offset);
1053	readl(addr: pcie_index_offset);
1054	r = readl(addr: pcie_data_offset);
1055	/ read high 32 bits /
1056	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1057	readl(addr: pcie_index_offset);
1058	r \|= ((u64)readl(addr: pcie_data_offset) << `32`);
1059	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1060
1061	return r;
1062	}
1063
1064	u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1065	u64 reg_addr)
1066	{
1067	unsigned long flags, pcie_index, pcie_data;
1068	unsigned long pcie_index_hi = `0`;
1069	void __iomem *pcie_index_offset;
1070	void __iomem *pcie_index_hi_offset;
1071	void __iomem *pcie_data_offset;
1072	u64 r;
1073
1074	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1075	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1076	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1077	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1078
1079	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1080	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1081	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1082	if (pcie_index_hi != `0`)
1083	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1084	pcie_index_hi * `4`;
1085
1086	/ read low 32 bits /
1087	writel(val: reg_addr, addr: pcie_index_offset);
1088	readl(addr: pcie_index_offset);
1089	if (pcie_index_hi != `0`) {
1090	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1091	readl(addr: pcie_index_hi_offset);
1092	}
1093	r = readl(addr: pcie_data_offset);
1094	/ read high 32 bits /
1095	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1096	readl(addr: pcie_index_offset);
1097	if (pcie_index_hi != `0`) {
1098	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1099	readl(addr: pcie_index_hi_offset);
1100	}
1101	r \|= ((u64)readl(addr: pcie_data_offset) << `32`);
1102
1103	/ clear the high bits /
1104	if (pcie_index_hi != `0`) {
1105	writel(val: `0`, addr: pcie_index_hi_offset);
1106	readl(addr: pcie_index_hi_offset);
1107	}
1108
1109	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1110
1111	return r;
1112	}
1113
1114	/**
1115	* amdgpu_device_indirect_wreg - write an indirect register address
1116	*
1117	* @adev: amdgpu_device pointer
1118	* @reg_addr: indirect register offset
1119	* @reg_data: indirect register data
1120	*
1121	*/
1122	void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1123	u32 reg_addr, u32 reg_data)
1124	{
1125	unsigned long flags, pcie_index, pcie_data;
1126	void __iomem *pcie_index_offset;
1127	void __iomem *pcie_data_offset;
1128
1129	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1130	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1131
1132	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1133	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1134	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1135
1136	writel(val: reg_addr, addr: pcie_index_offset);
1137	readl(addr: pcie_index_offset);
1138	writel(val: reg_data, addr: pcie_data_offset);
1139	readl(addr: pcie_data_offset);
1140	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1141	}
1142
1143	void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1144	u64 reg_addr, u32 reg_data)
1145	{
1146	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1147	void __iomem *pcie_index_offset;
1148	void __iomem *pcie_index_hi_offset;
1149	void __iomem *pcie_data_offset;
1150
1151	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1152	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1153	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1154	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1155	else
1156	pcie_index_hi = `0`;
1157
1158	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1159	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1160	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1161	if (pcie_index_hi != `0`)
1162	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1163	pcie_index_hi * `4`;
1164
1165	writel(val: reg_addr, addr: pcie_index_offset);
1166	readl(addr: pcie_index_offset);
1167	if (pcie_index_hi != `0`) {
1168	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1169	readl(addr: pcie_index_hi_offset);
1170	}
1171	writel(val: reg_data, addr: pcie_data_offset);
1172	readl(addr: pcie_data_offset);
1173
1174	/ clear the high bits /
1175	if (pcie_index_hi != `0`) {
1176	writel(val: `0`, addr: pcie_index_hi_offset);
1177	readl(addr: pcie_index_hi_offset);
1178	}
1179
1180	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1181	}
1182
1183	/**
1184	* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1185	*
1186	* @adev: amdgpu_device pointer
1187	* @reg_addr: indirect register offset
1188	* @reg_data: indirect register data
1189	*
1190	*/
1191	void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1192	u32 reg_addr, u64 reg_data)
1193	{
1194	unsigned long flags, pcie_index, pcie_data;
1195	void __iomem *pcie_index_offset;
1196	void __iomem *pcie_data_offset;
1197
1198	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1199	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1200
1201	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1202	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1203	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1204
1205	/ write low 32 bits /
1206	writel(val: reg_addr, addr: pcie_index_offset);
1207	readl(addr: pcie_index_offset);
1208	writel(val: (u32)(reg_data & `0xffffffffULL`), addr: pcie_data_offset);
1209	readl(addr: pcie_data_offset);
1210	/ write high 32 bits /
1211	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1212	readl(addr: pcie_index_offset);
1213	writel(val: (u32)(reg_data >> `32`), addr: pcie_data_offset);
1214	readl(addr: pcie_data_offset);
1215	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1216	}
1217
1218	void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1219	u64 reg_addr, u64 reg_data)
1220	{
1221	unsigned long flags, pcie_index, pcie_data;
1222	unsigned long pcie_index_hi = `0`;
1223	void __iomem *pcie_index_offset;
1224	void __iomem *pcie_index_hi_offset;
1225	void __iomem *pcie_data_offset;
1226
1227	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1228	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1229	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1230	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1231
1232	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1233	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1234	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1235	if (pcie_index_hi != `0`)
1236	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1237	pcie_index_hi * `4`;
1238
1239	/ write low 32 bits /
1240	writel(val: reg_addr, addr: pcie_index_offset);
1241	readl(addr: pcie_index_offset);
1242	if (pcie_index_hi != `0`) {
1243	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1244	readl(addr: pcie_index_hi_offset);
1245	}
1246	writel(val: (u32)(reg_data & `0xffffffffULL`), addr: pcie_data_offset);
1247	readl(addr: pcie_data_offset);
1248	/ write high 32 bits /
1249	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1250	readl(addr: pcie_index_offset);
1251	if (pcie_index_hi != `0`) {
1252	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1253	readl(addr: pcie_index_hi_offset);
1254	}
1255	writel(val: (u32)(reg_data >> `32`), addr: pcie_data_offset);
1256	readl(addr: pcie_data_offset);
1257
1258	/ clear the high bits /
1259	if (pcie_index_hi != `0`) {
1260	writel(val: `0`, addr: pcie_index_hi_offset);
1261	readl(addr: pcie_index_hi_offset);
1262	}
1263
1264	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1265	}
1266
1267	/**
1268	* amdgpu_device_get_rev_id - query device rev_id
1269	*
1270	* @adev: amdgpu_device pointer
1271	*
1272	* Return device rev_id
1273	*/
1274	u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1275	{
1276	return adev->nbio.funcs->get_rev_id(adev);
1277	}
1278
1279	/**
1280	* amdgpu_invalid_rreg - dummy reg read function
1281	*
1282	* @adev: amdgpu_device pointer
1283	* @reg: offset of register
1284	*
1285	* Dummy register read function. Used for register blocks
1286	* that certain asics don't have (all asics).
1287	* Returns the value in the register.
1288	*/
1289	static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1290	{
1291	dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
1292	BUG();
1293	return `0`;
1294	}
1295
1296	static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1297	{
1298	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1299	BUG();
1300	return `0`;
1301	}
1302
1303	/**
1304	* amdgpu_invalid_wreg - dummy reg write function
1305	*
1306	* @adev: amdgpu_device pointer
1307	* @reg: offset of register
1308	* @v: value to write to the register
1309	*
1310	* Dummy register read function. Used for register blocks
1311	* that certain asics don't have (all asics).
1312	*/
1313	static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1314	{
1315	dev_err(adev->dev,
1316	"Invalid callback to write register 0x%04X with 0x%08X\n", reg,
1317	v);
1318	BUG();
1319	}
1320
1321	static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1322	{
1323	dev_err(adev->dev,
1324	"Invalid callback to write register 0x%llX with 0x%08X\n", reg,
1325	v);
1326	BUG();
1327	}
1328
1329	/**
1330	* amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1331	*
1332	* @adev: amdgpu_device pointer
1333	* @reg: offset of register
1334	*
1335	* Dummy register read function. Used for register blocks
1336	* that certain asics don't have (all asics).
1337	* Returns the value in the register.
1338	*/
1339	static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1340	{
1341	dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
1342	reg);
1343	BUG();
1344	return `0`;
1345	}
1346
1347	static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1348	{
1349	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
1350	BUG();
1351	return `0`;
1352	}
1353
1354	/**
1355	* amdgpu_invalid_wreg64 - dummy reg write function
1356	*
1357	* @adev: amdgpu_device pointer
1358	* @reg: offset of register
1359	* @v: value to write to the register
1360	*
1361	* Dummy register read function. Used for register blocks
1362	* that certain asics don't have (all asics).
1363	*/
1364	static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1365	{
1366	dev_err(adev->dev,
1367	"Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1368	reg, v);
1369	BUG();
1370	}
1371
1372	static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1373	{
1374	dev_err(adev->dev,
1375	"Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1376	reg, v);
1377	BUG();
1378	}
1379
1380	/**
1381	* amdgpu_block_invalid_rreg - dummy reg read function
1382	*
1383	* @adev: amdgpu_device pointer
1384	* @block: offset of instance
1385	* @reg: offset of register
1386	*
1387	* Dummy register read function. Used for register blocks
1388	* that certain asics don't have (all asics).
1389	* Returns the value in the register.
1390	*/
1391	static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1392	uint32_t block, uint32_t reg)
1393	{
1394	dev_err(adev->dev,
1395	"Invalid callback to read register 0x%04X in block 0x%04X\n",
1396	reg, block);
1397	BUG();
1398	return `0`;
1399	}
1400
1401	/**
1402	* amdgpu_block_invalid_wreg - dummy reg write function
1403	*
1404	* @adev: amdgpu_device pointer
1405	* @block: offset of instance
1406	* @reg: offset of register
1407	* @v: value to write to the register
1408	*
1409	* Dummy register read function. Used for register blocks
1410	* that certain asics don't have (all asics).
1411	*/
1412	static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1413	uint32_t block,
1414	uint32_t reg, uint32_t v)
1415	{
1416	dev_err(adev->dev,
1417	"Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1418	reg, block, v);
1419	BUG();
1420	}
1421
1422	static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
1423	{
1424	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1425	return AMDGPU_VBIOS_SKIP;
1426
1427	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
1428	return AMDGPU_VBIOS_OPTIONAL;
1429
1430	return `0`;
1431	}
1432
1433	/**
1434	* amdgpu_device_asic_init - Wrapper for atom asic_init
1435	*
1436	* @adev: amdgpu_device pointer
1437	*
1438	* Does any asic specific work and then calls atom asic init.
1439	*/
1440	static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1441	{
1442	uint32_t flags;
1443	bool optional;
1444	int ret;
1445
1446	amdgpu_asic_pre_asic_init(adev);
1447	flags = amdgpu_device_get_vbios_flags(adev);
1448	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL \| AMDGPU_VBIOS_SKIP));
1449
1450	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) \|\|
1451	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`) \|\|
1452	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `5`, `0`) \|\|
1453	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`11`, `0`, `0`)) {
1454	amdgpu_psp_wait_for_bootloader(adev);
1455	if (optional && !adev->bios)
1456	return `0`;
1457
1458	ret = amdgpu_atomfirmware_asic_init(adev, fb_reset: true);
1459	return ret;
1460	} else {
1461	if (optional && !adev->bios)
1462	return `0`;
1463
1464	return amdgpu_atom_asic_init(ctx: adev->mode_info.atom_context);
1465	}
1466
1467	return `0`;
1468	}
1469
1470	/**
1471	* amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1472	*
1473	* @adev: amdgpu_device pointer
1474	*
1475	* Allocates a scratch page of VRAM for use by various things in the
1476	* driver.
1477	*/
1478	static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1479	{
1480	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1481	AMDGPU_GEM_DOMAIN_VRAM \|
1482	AMDGPU_GEM_DOMAIN_GTT,
1483	bo_ptr: &adev->mem_scratch.robj,
1484	gpu_addr: &adev->mem_scratch.gpu_addr,
1485	cpu_addr: (void **)&adev->mem_scratch.ptr);
1486	}
1487
1488	/**
1489	* amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1490	*
1491	* @adev: amdgpu_device pointer
1492	*
1493	* Frees the VRAM scratch page.
1494	*/
1495	static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1496	{
1497	amdgpu_bo_free_kernel(bo: &adev->mem_scratch.robj, NULL, NULL);
1498	}
1499
1500	/**
1501	* amdgpu_device_program_register_sequence - program an array of registers.
1502	*
1503	* @adev: amdgpu_device pointer
1504	* @registers: pointer to the register array
1505	* @array_size: size of the register array
1506	*
1507	* Programs an array or registers with and or masks.
1508	* This is a helper for setting golden registers.
1509	*/
1510	void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1511	const u32 *registers,
1512	const u32 array_size)
1513	{
1514	u32 tmp, reg, and_mask, or_mask;
1515	int i;
1516
1517	if (array_size % `3`)
1518	return;
1519
1520	for (i = `0`; i < array_size; i += `3`) {
1521	reg = registers[i + `0`];
1522	and_mask = registers[i + `1`];
1523	or_mask = registers[i + `2`];
1524
1525	if (and_mask == `0xffffffff`) {
1526	tmp = or_mask;
1527	} else {
1528	tmp = RREG32(reg);
1529	tmp &= ~and_mask;
1530	if (adev->family >= AMDGPU_FAMILY_AI)
1531	tmp \|= (or_mask & and_mask);
1532	else
1533	tmp \|= or_mask;
1534	}
1535	WREG32(reg, tmp);
1536	}
1537	}
1538
1539	/**
1540	* amdgpu_device_pci_config_reset - reset the GPU
1541	*
1542	* @adev: amdgpu_device pointer
1543	*
1544	* Resets the GPU using the pci config reset sequence.
1545	* Only applicable to asics prior to vega10.
1546	*/
1547	void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1548	{
1549	pci_write_config_dword(dev: adev->pdev, where: `0x7c`, AMDGPU_ASIC_RESET_DATA);
1550	}
1551
1552	/**
1553	* amdgpu_device_pci_reset - reset the GPU using generic PCI means
1554	*
1555	* @adev: amdgpu_device pointer
1556	*
1557	* Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1558	*/
1559	int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1560	{
1561	return pci_reset_function(dev: adev->pdev);
1562	}
1563
1564	/*
1565	* amdgpu_device_wb_*()
1566	* Writeback is the method by which the GPU updates special pages in memory
1567	* with the status of certain GPU events (fences, ring pointers,etc.).
1568	*/
1569
1570	/**
1571	* amdgpu_device_wb_fini - Disable Writeback and free memory
1572	*
1573	* @adev: amdgpu_device pointer
1574	*
1575	* Disables Writeback and frees the Writeback memory (all asics).
1576	* Used at driver shutdown.
1577	*/
1578	static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1579	{
1580	if (adev->wb.wb_obj) {
1581	amdgpu_bo_free_kernel(bo: &adev->wb.wb_obj,
1582	gpu_addr: &adev->wb.gpu_addr,
1583	cpu_addr: (void **)&adev->wb.wb);
1584	adev->wb.wb_obj = NULL;
1585	}
1586	}
1587
1588	/**
1589	* amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1590	*
1591	* @adev: amdgpu_device pointer
1592	*
1593	* Initializes writeback and allocates writeback memory (all asics).
1594	* Used at driver startup.
1595	* Returns 0 on success or an -error on failure.
1596	*/
1597	static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1598	{
1599	int r;
1600
1601	if (adev->wb.wb_obj == NULL) {
1602	/ AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots /
1603	r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * `8`,
1604	PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1605	bo_ptr: &adev->wb.wb_obj, gpu_addr: &adev->wb.gpu_addr,
1606	cpu_addr: (void **)&adev->wb.wb);
1607	if (r) {
1608	dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1609	return r;
1610	}
1611
1612	adev->wb.num_wb = AMDGPU_MAX_WB;
1613	memset(&adev->wb.used, `0`, sizeof(adev->wb.used));
1614
1615	/ clear wb memory /
1616	memset((char )adev->wb.wb, `0`, AMDGPU_MAX_WB sizeof(uint32_t) * `8`);
1617	}
1618
1619	return `0`;
1620	}
1621
1622	/**
1623	* amdgpu_device_wb_get - Allocate a wb entry
1624	*
1625	* @adev: amdgpu_device pointer
1626	* @wb: wb index
1627	*
1628	* Allocate a wb slot for use by the driver (all asics).
1629	* Returns 0 on success or -EINVAL on failure.
1630	*/
1631	int amdgpu_device_wb_get(struct amdgpu_device adev, u32 wb)
1632	{
1633	unsigned long flags, offset;
1634
1635	spin_lock_irqsave(&adev->wb.lock, flags);
1636	offset = find_first_zero_bit(addr: adev->wb.used, size: adev->wb.num_wb);
1637	if (offset < adev->wb.num_wb) {
1638	__set_bit(offset, adev->wb.used);
1639	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1640	wb = offset << `3`; /* convert to dw offset /
1641	return `0`;
1642	} else {
1643	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1644	return -EINVAL;
1645	}
1646	}
1647
1648	/**
1649	* amdgpu_device_wb_free - Free a wb entry
1650	*
1651	* @adev: amdgpu_device pointer
1652	* @wb: wb index
1653	*
1654	* Free a wb slot allocated for use by the driver (all asics)
1655	*/
1656	void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1657	{
1658	unsigned long flags;
1659
1660	wb >>= `3`;
1661	spin_lock_irqsave(&adev->wb.lock, flags);
1662	if (wb < adev->wb.num_wb)
1663	__clear_bit(wb, adev->wb.used);
1664	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1665	}
1666
1667	/**
1668	* amdgpu_device_resize_fb_bar - try to resize FB BAR
1669	*
1670	* @adev: amdgpu_device pointer
1671	*
1672	* Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1673	* to fail, but if any of the BARs is not accessible after the size we abort
1674	* driver loading by returning -ENODEV.
1675	*/
1676	int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1677	{
1678	int rbar_size = pci_rebar_bytes_to_size(bytes: adev->gmc.real_vram_size);
1679	struct pci_bus *root;
1680	struct resource *res;
1681	int max_size, r;
1682	unsigned int i;
1683	u16 cmd;
1684
1685	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1686	return `0`;
1687
1688	/ Bypass for VF /
1689	if (amdgpu_sriov_vf(adev))
1690	return `0`;
1691
1692	if (!amdgpu_rebar)
1693	return `0`;
1694
1695	/ resizing on Dell G5 SE platforms causes problems with runtime pm /
1696	if ((amdgpu_runtime_pm != `0`) &&
1697	adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1698	adev->pdev->device == `0x731f` &&
1699	adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1700	return `0`;
1701
1702	/ PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 /
1703	if (!pci_find_ext_capability(dev: adev->pdev, PCI_EXT_CAP_ID_VNDR))
1704	dev_warn(
1705	adev->dev,
1706	"System can't access extended configuration space, please check!!\n");
1707
1708	/ skip if the bios has already enabled large BAR /
1709	if (adev->gmc.real_vram_size &&
1710	(pci_resource_len(adev->pdev, `0`) >= adev->gmc.real_vram_size))
1711	return `0`;
1712
1713	/ Check if the root BUS has 64bit memory resources /
1714	root = adev->pdev->bus;
1715	while (root->parent)
1716	root = root->parent;
1717
1718	pci_bus_for_each_resource(root, res, i) {
1719	if (res && res->flags & (IORESOURCE_MEM \| IORESOURCE_MEM_64) &&
1720	res->start > `0x100000000ull`)
1721	break;
1722	}
1723
1724	/ Trying to resize is pointless without a root hub window above 4GB /
1725	if (!res)
1726	return `0`;
1727
1728	/ Limit the BAR size to what is available /
1729	max_size = pci_rebar_get_max_size(pdev: adev->pdev, bar: `0`);
1730	if (max_size < `0`)
1731	return `0`;
1732	rbar_size = min(max_size, rbar_size);
1733
1734	/ Disable memory decoding while we change the BAR addresses and size /
1735	pci_read_config_word(dev: adev->pdev, PCI_COMMAND, val: &cmd);
1736	pci_write_config_word(dev: adev->pdev, PCI_COMMAND,
1737	val: cmd & ~PCI_COMMAND_MEMORY);
1738
1739	/ Tear down doorbell as resizing will release BARs /
1740	amdgpu_doorbell_fini(adev);
1741
1742	r = pci_resize_resource(dev: adev->pdev, i: `0`, size: rbar_size,
1743	exclude_bars: (adev->asic_type >= CHIP_BONAIRE) ? `1` << `5`
1744	: `1` << `2`);
1745	if (r == -ENOSPC)
1746	dev_info(adev->dev,
1747	"Not enough PCI address space for a large BAR.");
1748	else if (r && r != -ENOTSUPP)
1749	dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
1750
1751	/ When the doorbell or fb BAR isn't available we have no chance of*
1752	* using the device.
1753	*/
1754	r = amdgpu_doorbell_init(adev);
1755	if (r \|\| (pci_resource_flags(adev->pdev, `0`) & IORESOURCE_UNSET))
1756	return -ENODEV;
1757
1758	pci_write_config_word(dev: adev->pdev, PCI_COMMAND, val: cmd);
1759
1760	return `0`;
1761	}
1762
1763	/*
1764	* GPU helpers function.
1765	*/
1766	/**
1767	* amdgpu_device_need_post - check if the hw need post or not
1768	*
1769	* @adev: amdgpu_device pointer
1770	*
1771	* Check if the asic has been initialized (all asics) at driver startup
1772	* or post is needed if hw reset is performed.
1773	* Returns true if need or false if not.
1774	*/
1775	bool amdgpu_device_need_post(struct amdgpu_device *adev)
1776	{
1777	uint32_t reg, flags;
1778
1779	if (amdgpu_sriov_vf(adev))
1780	return false;
1781
1782	flags = amdgpu_device_get_vbios_flags(adev);
1783	if (flags & AMDGPU_VBIOS_SKIP)
1784	return false;
1785	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1786	return false;
1787
1788	if (amdgpu_passthrough(adev)) {
1789	/ for FIJI: In whole GPU pass-through virtualization case, after VM reboot*
1790	* some old smc fw still need driver do vPost otherwise gpu hang, while
1791	* those smc fw version above 22.15 doesn't have this flaw, so we force
1792	* vpost executed for smc version below 22.15
1793	*/
1794	if (adev->asic_type == CHIP_FIJI) {
1795	int err;
1796	uint32_t fw_ver;
1797
1798	err = request_firmware(fw: &adev->pm.fw, name: "amdgpu/fiji_smc.bin", device: adev->dev);
1799	/ force vPost if error occurred /
1800	if (err)
1801	return true;
1802
1803	fw_ver = ((uint32_t )adev->pm.fw->data + `69`);
1804	release_firmware(fw: adev->pm.fw);
1805	if (fw_ver < `0x00160e00`)
1806	return true;
1807	}
1808	}
1809
1810	/ Don't post if we need to reset whole hive on init /
1811	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1812	return false;
1813
1814	if (adev->has_hw_reset) {
1815	adev->has_hw_reset = false;
1816	return true;
1817	}
1818
1819	/ bios scratch used on CIK+ /
1820	if (adev->asic_type >= CHIP_BONAIRE)
1821	return amdgpu_atombios_scratch_need_asic_init(adev);
1822
1823	/ check MEM_SIZE for older asics /
1824	reg = amdgpu_asic_get_config_memsize(adev);
1825
1826	if ((reg != `0`) && (reg != `0xffffffff`))
1827	return false;
1828
1829	return true;
1830	}
1831
1832	/*
1833	* Check whether seamless boot is supported.
1834	*
1835	* So far we only support seamless boot on DCE 3.0 or later.
1836	* If users report that it works on older ASICS as well, we may
1837	* loosen this.
1838	*/
1839	bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1840	{
1841	switch (amdgpu_seamless) {
1842	case -`1`:
1843	break;
1844	case `1`:
1845	return true;
1846	case `0`:
1847	return false;
1848	default:
1849	dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
1850	amdgpu_seamless);
1851	return false;
1852	}
1853
1854	if (!(adev->flags & AMD_IS_APU))
1855	return false;
1856
1857	if (adev->mman.keep_stolen_vga_memory)
1858	return false;
1859
1860	return amdgpu_ip_version(adev, ip: DCE_HWIP, inst: `0`) >= IP_VERSION(`3`, `0`, `0`);
1861	}
1862
1863	/*
1864	* Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1865	* don't support dynamic speed switching. Until we have confirmation from Intel
1866	* that a specific host supports it, it's safer that we keep it disabled for all.
1867	*
1868	* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1869	* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1870	*/
1871	static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1872	{
1873	#if IS_ENABLED(CONFIG_X86)
1874	struct cpuinfo_x86 *c = &cpu_data(`0`);
1875
1876	/ eGPU change speeds based on USB4 fabric conditions /
1877	if (dev_is_removable(dev: adev->dev))
1878	return true;
1879
1880	if (c->x86_vendor == X86_VENDOR_INTEL)
1881	return false;
1882	#endif
1883	return true;
1884	}
1885
1886	static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1887	{
1888	/ Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.*
1889	* It's unclear if this is a platform-specific or GPU-specific issue.
1890	* Disable ASPM on SI for the time being.
1891	*/
1892	if (adev->family == AMDGPU_FAMILY_SI)
1893	return true;
1894
1895	#if IS_ENABLED(CONFIG_X86)
1896	struct cpuinfo_x86 *c = &cpu_data(`0`);
1897
1898	if (!(amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`12`, `0`, `0`) \|\|
1899	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`12`, `0`, `1`)))
1900	return false;
1901
1902	if (c->x86 == `6` &&
1903	adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1904	switch (c->x86_model) {
1905	case VFM_MODEL(INTEL_ALDERLAKE):
1906	case VFM_MODEL(INTEL_ALDERLAKE_L):
1907	case VFM_MODEL(INTEL_RAPTORLAKE):
1908	case VFM_MODEL(INTEL_RAPTORLAKE_P):
1909	case VFM_MODEL(INTEL_RAPTORLAKE_S):
1910	return true;
1911	default:
1912	return false;
1913	}
1914	} else {
1915	return false;
1916	}
1917	#else
1918	return false;
1919	#endif
1920	}
1921
1922	/**
1923	* amdgpu_device_should_use_aspm - check if the device should program ASPM
1924	*
1925	* @adev: amdgpu_device pointer
1926	*
1927	* Confirm whether the module parameter and pcie bridge agree that ASPM should
1928	* be set for this device.
1929	*
1930	* Returns true if it should be used or false if not.
1931	*/
1932	bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1933	{
1934	switch (amdgpu_aspm) {
1935	case -`1`:
1936	break;
1937	case `0`:
1938	return false;
1939	case `1`:
1940	return true;
1941	default:
1942	return false;
1943	}
1944	if (adev->flags & AMD_IS_APU)
1945	return false;
1946	if (amdgpu_device_aspm_support_quirk(adev))
1947	return false;
1948	return pcie_aspm_enabled(pdev: adev->pdev);
1949	}
1950
1951	/ if we get transitioned to only one device, take VGA back /
1952	/**
1953	* amdgpu_device_vga_set_decode - enable/disable vga decode
1954	*
1955	* @pdev: PCI device pointer
1956	* @state: enable/disable vga decode
1957	*
1958	* Enable/disable vga decode (all asics).
1959	* Returns VGA resource flags.
1960	*/
1961	static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1962	bool state)
1963	{
1964	struct amdgpu_device *adev = drm_to_adev(ddev: pci_get_drvdata(pdev));
1965
1966	amdgpu_asic_set_vga_state(adev, state);
1967	if (state)
1968	return VGA_RSRC_LEGACY_IO \| VGA_RSRC_LEGACY_MEM \|
1969	VGA_RSRC_NORMAL_IO \| VGA_RSRC_NORMAL_MEM;
1970	else
1971	return VGA_RSRC_NORMAL_IO \| VGA_RSRC_NORMAL_MEM;
1972	}
1973
1974	/**
1975	* amdgpu_device_check_block_size - validate the vm block size
1976	*
1977	* @adev: amdgpu_device pointer
1978	*
1979	* Validates the vm block size specified via module parameter.
1980	* The vm block size defines number of bits in page table versus page directory,
1981	* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1982	* page table and the remaining bits are in the page directory.
1983	*/
1984	static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1985	{
1986	/ defines number of bits in page table versus page directory,*
1987	* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1988	* page table and the remaining bits are in the page directory
1989	*/
1990	if (amdgpu_vm_block_size == -`1`)
1991	return;
1992
1993	if (amdgpu_vm_block_size < `9`) {
1994	dev_warn(adev->dev, "VM page table size (%d) too small\n",
1995	amdgpu_vm_block_size);
1996	amdgpu_vm_block_size = -`1`;
1997	}
1998	}
1999
2000	/**
2001	* amdgpu_device_check_vm_size - validate the vm size
2002	*
2003	* @adev: amdgpu_device pointer
2004	*
2005	* Validates the vm size in GB specified via module parameter.
2006	* The VM size is the size of the GPU virtual memory space in GB.
2007	*/
2008	static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
2009	{
2010	/ no need to check the default value /
2011	if (amdgpu_vm_size == -`1`)
2012	return;
2013
2014	if (amdgpu_vm_size < `1`) {
2015	dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
2016	amdgpu_vm_size);
2017	amdgpu_vm_size = -`1`;
2018	}
2019	}
2020
2021	static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
2022	{
2023	struct sysinfo si;
2024	bool is_os_64 = (sizeof(void *) == `8`);
2025	uint64_t total_memory;
2026	uint64_t dram_size_seven_GB = `0x1B8000000`;
2027	uint64_t dram_size_three_GB = `0xB8000000`;
2028
2029	if (amdgpu_smu_memory_pool_size == `0`)
2030	return;
2031
2032	if (!is_os_64) {
2033	dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
2034	goto def_value;
2035	}
2036	si_meminfo(val: &si);
2037	total_memory = (uint64_t)si.totalram * si.mem_unit;
2038
2039	if ((amdgpu_smu_memory_pool_size == `1`) \|\|
2040	(amdgpu_smu_memory_pool_size == `2`)) {
2041	if (total_memory < dram_size_three_GB)
2042	goto def_value1;
2043	} else if ((amdgpu_smu_memory_pool_size == `4`) \|\|
2044	(amdgpu_smu_memory_pool_size == `8`)) {
2045	if (total_memory < dram_size_seven_GB)
2046	goto def_value1;
2047	} else {
2048	dev_warn(adev->dev, "Smu memory pool size not supported\n");
2049	goto def_value;
2050	}
2051	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << `28`;
2052
2053	return;
2054
2055	def_value1:
2056	dev_warn(adev->dev, "No enough system memory\n");
2057	def_value:
2058	adev->pm.smu_prv_buffer_size = `0`;
2059	}
2060
2061	static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
2062	{
2063	if (!(adev->flags & AMD_IS_APU) \|\|
2064	adev->asic_type < CHIP_RAVEN)
2065	return `0`;
2066
2067	switch (adev->asic_type) {
2068	case CHIP_RAVEN:
2069	if (adev->pdev->device == `0x15dd`)
2070	adev->apu_flags \|= AMD_APU_IS_RAVEN;
2071	if (adev->pdev->device == `0x15d8`)
2072	adev->apu_flags \|= AMD_APU_IS_PICASSO;
2073	break;
2074	case CHIP_RENOIR:
2075	if ((adev->pdev->device == `0x1636`) \|\|
2076	(adev->pdev->device == `0x164c`))
2077	adev->apu_flags \|= AMD_APU_IS_RENOIR;
2078	else
2079	adev->apu_flags \|= AMD_APU_IS_GREEN_SARDINE;
2080	break;
2081	case CHIP_VANGOGH:
2082	adev->apu_flags \|= AMD_APU_IS_VANGOGH;
2083	break;
2084	case CHIP_YELLOW_CARP:
2085	break;
2086	case CHIP_CYAN_SKILLFISH:
2087	if ((adev->pdev->device == `0x13FE`) \|\|
2088	(adev->pdev->device == `0x143F`))
2089	adev->apu_flags \|= AMD_APU_IS_CYAN_SKILLFISH2;
2090	break;
2091	default:
2092	break;
2093	}
2094
2095	return `0`;
2096	}
2097
2098	/**
2099	* amdgpu_device_check_arguments - validate module params
2100	*
2101	* @adev: amdgpu_device pointer
2102	*
2103	* Validates certain module parameters and updates
2104	* the associated values used by the driver (all asics).
2105	*/
2106	static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2107	{
2108	int i;
2109
2110	if (amdgpu_sched_jobs < `4`) {
2111	dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2112	amdgpu_sched_jobs);
2113	amdgpu_sched_jobs = `4`;
2114	} else if (!is_power_of_2(n: amdgpu_sched_jobs)) {
2115	dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2116	amdgpu_sched_jobs);
2117	amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2118	}
2119
2120	if (amdgpu_gart_size != -`1` && amdgpu_gart_size < `32`) {
2121	/ gart size must be greater or equal to 32M /
2122	dev_warn(adev->dev, "gart size (%d) too small\n",
2123	amdgpu_gart_size);
2124	amdgpu_gart_size = -`1`;
2125	}
2126
2127	if (amdgpu_gtt_size != -`1` && amdgpu_gtt_size < `32`) {
2128	/ gtt size must be greater or equal to 32M /
2129	dev_warn(adev->dev, "gtt size (%d) too small\n",
2130	amdgpu_gtt_size);
2131	amdgpu_gtt_size = -`1`;
2132	}
2133
2134	/ valid range is between 4 and 9 inclusive /
2135	if (amdgpu_vm_fragment_size != -`1` &&
2136	(amdgpu_vm_fragment_size > `9` \|\| amdgpu_vm_fragment_size < `4`)) {
2137	dev_warn(adev->dev, "valid range is between 4 and 9\n");
2138	amdgpu_vm_fragment_size = -`1`;
2139	}
2140
2141	if (amdgpu_sched_hw_submission < `2`) {
2142	dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2143	amdgpu_sched_hw_submission);
2144	amdgpu_sched_hw_submission = `2`;
2145	} else if (!is_power_of_2(n: amdgpu_sched_hw_submission)) {
2146	dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2147	amdgpu_sched_hw_submission);
2148	amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2149	}
2150
2151	if (amdgpu_reset_method < -`1` \|\| amdgpu_reset_method > `4`) {
2152	dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2153	amdgpu_reset_method = -`1`;
2154	}
2155
2156	amdgpu_device_check_smu_prv_buffer_size(adev);
2157
2158	amdgpu_device_check_vm_size(adev);
2159
2160	amdgpu_device_check_block_size(adev);
2161
2162	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, load_type: amdgpu_fw_load_type);
2163
2164	for (i = `0`; i < MAX_XCP; i++) {
2165	switch (amdgpu_enforce_isolation) {
2166	case -`1`:
2167	case `0`:
2168	default:
2169	/ disable /
2170	adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
2171	break;
2172	case `1`:
2173	/ enable /
2174	adev->enforce_isolation[i] =
2175	AMDGPU_ENFORCE_ISOLATION_ENABLE;
2176	break;
2177	case `2`:
2178	/ enable legacy mode /
2179	adev->enforce_isolation[i] =
2180	AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
2181	break;
2182	case `3`:
2183	/ enable only process isolation without submitting cleaner shader /
2184	adev->enforce_isolation[i] =
2185	AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
2186	break;
2187	}
2188	}
2189
2190	return `0`;
2191	}
2192
2193	/**
2194	* amdgpu_switcheroo_set_state - set switcheroo state
2195	*
2196	* @pdev: pci dev pointer
2197	* @state: vga_switcheroo state
2198	*
2199	* Callback for the switcheroo driver. Suspends or resumes
2200	* the asics before or after it is powered up using ACPI methods.
2201	*/
2202	static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2203	enum vga_switcheroo_state state)
2204	{
2205	struct drm_device *dev = pci_get_drvdata(pdev);
2206	int r;
2207
2208	if (amdgpu_device_supports_px(adev: drm_to_adev(ddev: dev)) &&
2209	state == VGA_SWITCHEROO_OFF)
2210	return;
2211
2212	if (state == VGA_SWITCHEROO_ON) {
2213	pr_info("switched on\n");
2214	/ don't suspend or resume card normally /
2215	dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2216
2217	pci_set_power_state(dev: pdev, PCI_D0);
2218	amdgpu_device_load_pci_state(pdev);
2219	r = pci_enable_device(dev: pdev);
2220	if (r)
2221	dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
2222	r);
2223	amdgpu_device_resume(dev, fbcon: true);
2224
2225	dev->switch_power_state = DRM_SWITCH_POWER_ON;
2226	} else {
2227	dev_info(&pdev->dev, "switched off\n");
2228	dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2229	amdgpu_device_prepare(dev);
2230	amdgpu_device_suspend(dev, fbcon: true);
2231	amdgpu_device_cache_pci_state(pdev);
2232	/ Shut down the device /
2233	pci_disable_device(dev: pdev);
2234	pci_set_power_state(dev: pdev, PCI_D3cold);
2235	dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2236	}
2237	}
2238
2239	/**
2240	* amdgpu_switcheroo_can_switch - see if switcheroo state can change
2241	*
2242	* @pdev: pci dev pointer
2243	*
2244	* Callback for the switcheroo driver. Check of the switcheroo
2245	* state can be changed.
2246	* Returns true if the state can be changed, false if not.
2247	*/
2248	static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2249	{
2250	struct drm_device *dev = pci_get_drvdata(pdev);
2251
2252	/*
2253	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2254	* locking inversion with the driver load path. And the access here is
2255	* completely racy anyway. So don't bother with locking for now.
2256	*/
2257	return atomic_read(v: &dev->open_count) == `0`;
2258	}
2259
2260	static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2261	.set_gpu_state = amdgpu_switcheroo_set_state,
2262	.reprobe = NULL,
2263	.can_switch = amdgpu_switcheroo_can_switch,
2264	};
2265
2266	/**
2267	* amdgpu_device_ip_set_clockgating_state - set the CG state
2268	*
2269	* @dev: amdgpu_device pointer
2270	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2271	* @state: clockgating state (gate or ungate)
2272	*
2273	* Sets the requested clockgating state for all instances of
2274	* the hardware IP specified.
2275	* Returns the error code from the last instance.
2276	*/
2277	int amdgpu_device_ip_set_clockgating_state(void *dev,
2278	enum amd_ip_block_type block_type,
2279	enum amd_clockgating_state state)
2280	{
2281	struct amdgpu_device *adev = dev;
2282	int i, r = `0`;
2283
2284	for (i = `0`; i < adev->num_ip_blocks; i++) {
2285	if (!adev->ip_blocks[i].status.valid)
2286	continue;
2287	if (adev->ip_blocks[i].version->type != block_type)
2288	continue;
2289	if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2290	continue;
2291	r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2292	&adev->ip_blocks[i], state);
2293	if (r)
2294	dev_err(adev->dev,
2295	"set_clockgating_state of IP block <%s> failed %d\n",
2296	adev->ip_blocks[i].version->funcs->name, r);
2297	}
2298	return r;
2299	}
2300
2301	/**
2302	* amdgpu_device_ip_set_powergating_state - set the PG state
2303	*
2304	* @dev: amdgpu_device pointer
2305	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2306	* @state: powergating state (gate or ungate)
2307	*
2308	* Sets the requested powergating state for all instances of
2309	* the hardware IP specified.
2310	* Returns the error code from the last instance.
2311	*/
2312	int amdgpu_device_ip_set_powergating_state(void *dev,
2313	enum amd_ip_block_type block_type,
2314	enum amd_powergating_state state)
2315	{
2316	struct amdgpu_device *adev = dev;
2317	int i, r = `0`;
2318
2319	for (i = `0`; i < adev->num_ip_blocks; i++) {
2320	if (!adev->ip_blocks[i].status.valid)
2321	continue;
2322	if (adev->ip_blocks[i].version->type != block_type)
2323	continue;
2324	if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2325	continue;
2326	r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2327	&adev->ip_blocks[i], state);
2328	if (r)
2329	dev_err(adev->dev,
2330	"set_powergating_state of IP block <%s> failed %d\n",
2331	adev->ip_blocks[i].version->funcs->name, r);
2332	}
2333	return r;
2334	}
2335
2336	/**
2337	* amdgpu_device_ip_get_clockgating_state - get the CG state
2338	*
2339	* @adev: amdgpu_device pointer
2340	* @flags: clockgating feature flags
2341	*
2342	* Walks the list of IPs on the device and updates the clockgating
2343	* flags for each IP.
2344	* Updates @flags with the feature flags for each hardware IP where
2345	* clockgating is enabled.
2346	*/
2347	void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2348	u64 *flags)
2349	{
2350	int i;
2351
2352	for (i = `0`; i < adev->num_ip_blocks; i++) {
2353	if (!adev->ip_blocks[i].status.valid)
2354	continue;
2355	if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2356	adev->ip_blocks[i].version->funcs->get_clockgating_state(
2357	&adev->ip_blocks[i], flags);
2358	}
2359	}
2360
2361	/**
2362	* amdgpu_device_ip_wait_for_idle - wait for idle
2363	*
2364	* @adev: amdgpu_device pointer
2365	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2366	*
2367	* Waits for the request hardware IP to be idle.
2368	* Returns 0 for success or a negative error code on failure.
2369	*/
2370	int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2371	enum amd_ip_block_type block_type)
2372	{
2373	int i, r;
2374
2375	for (i = `0`; i < adev->num_ip_blocks; i++) {
2376	if (!adev->ip_blocks[i].status.valid)
2377	continue;
2378	if (adev->ip_blocks[i].version->type == block_type) {
2379	if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2380	r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2381	&adev->ip_blocks[i]);
2382	if (r)
2383	return r;
2384	}
2385	break;
2386	}
2387	}
2388	return `0`;
2389
2390	}
2391
2392	/**
2393	* amdgpu_device_ip_is_hw - is the hardware IP enabled
2394	*
2395	* @adev: amdgpu_device pointer
2396	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2397	*
2398	* Check if the hardware IP is enable or not.
2399	* Returns true if it the IP is enable, false if not.
2400	*/
2401	bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
2402	enum amd_ip_block_type block_type)
2403	{
2404	int i;
2405
2406	for (i = `0`; i < adev->num_ip_blocks; i++) {
2407	if (adev->ip_blocks[i].version->type == block_type)
2408	return adev->ip_blocks[i].status.hw;
2409	}
2410	return false;
2411	}
2412
2413	/**
2414	* amdgpu_device_ip_is_valid - is the hardware IP valid
2415	*
2416	* @adev: amdgpu_device pointer
2417	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2418	*
2419	* Check if the hardware IP is valid or not.
2420	* Returns true if it the IP is valid, false if not.
2421	*/
2422	bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2423	enum amd_ip_block_type block_type)
2424	{
2425	int i;
2426
2427	for (i = `0`; i < adev->num_ip_blocks; i++) {
2428	if (adev->ip_blocks[i].version->type == block_type)
2429	return adev->ip_blocks[i].status.valid;
2430	}
2431	return false;
2432
2433	}
2434
2435	/**
2436	* amdgpu_device_ip_get_ip_block - get a hw IP pointer
2437	*
2438	* @adev: amdgpu_device pointer
2439	* @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2440	*
2441	* Returns a pointer to the hardware IP block structure
2442	* if it exists for the asic, otherwise NULL.
2443	*/
2444	struct amdgpu_ip_block *
2445	amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2446	enum amd_ip_block_type type)
2447	{
2448	int i;
2449
2450	for (i = `0`; i < adev->num_ip_blocks; i++)
2451	if (adev->ip_blocks[i].version->type == type)
2452	return &adev->ip_blocks[i];
2453
2454	return NULL;
2455	}
2456
2457	/**
2458	* amdgpu_device_ip_block_version_cmp
2459	*
2460	* @adev: amdgpu_device pointer
2461	* @type: enum amd_ip_block_type
2462	* @major: major version
2463	* @minor: minor version
2464	*
2465	* return 0 if equal or greater
2466	* return 1 if smaller or the ip_block doesn't exist
2467	*/
2468	int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2469	enum amd_ip_block_type type,
2470	u32 major, u32 minor)
2471	{
2472	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2473
2474	if (ip_block && ((ip_block->version->major > major) \|\|
2475	((ip_block->version->major == major) &&
2476	(ip_block->version->minor >= minor))))
2477	return `0`;
2478
2479	return `1`;
2480	}
2481
2482	static const char *ip_block_names[] = {
2483	[AMD_IP_BLOCK_TYPE_COMMON] = "common",
2484	[AMD_IP_BLOCK_TYPE_GMC] = "gmc",
2485	[AMD_IP_BLOCK_TYPE_IH] = "ih",
2486	[AMD_IP_BLOCK_TYPE_SMC] = "smu",
2487	[AMD_IP_BLOCK_TYPE_PSP] = "psp",
2488	[AMD_IP_BLOCK_TYPE_DCE] = "dce",
2489	[AMD_IP_BLOCK_TYPE_GFX] = "gfx",
2490	[AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
2491	[AMD_IP_BLOCK_TYPE_UVD] = "uvd",
2492	[AMD_IP_BLOCK_TYPE_VCE] = "vce",
2493	[AMD_IP_BLOCK_TYPE_ACP] = "acp",
2494	[AMD_IP_BLOCK_TYPE_VCN] = "vcn",
2495	[AMD_IP_BLOCK_TYPE_MES] = "mes",
2496	[AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
2497	[AMD_IP_BLOCK_TYPE_VPE] = "vpe",
2498	[AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
2499	[AMD_IP_BLOCK_TYPE_ISP] = "isp",
2500	[AMD_IP_BLOCK_TYPE_RAS] = "ras",
2501	};
2502
2503	static const char ip_block_name(struct* amdgpu_device adev, enum* amd_ip_block_type type)
2504	{
2505	int idx = (int)type;
2506
2507	return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
2508	}
2509
2510	/**
2511	* amdgpu_device_ip_block_add
2512	*
2513	* @adev: amdgpu_device pointer
2514	* @ip_block_version: pointer to the IP to add
2515	*
2516	* Adds the IP block driver information to the collection of IPs
2517	* on the asic.
2518	*/
2519	int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2520	const struct amdgpu_ip_block_version *ip_block_version)
2521	{
2522	if (!ip_block_version)
2523	return -EINVAL;
2524
2525	switch (ip_block_version->type) {
2526	case AMD_IP_BLOCK_TYPE_VCN:
2527	if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2528	return `0`;
2529	break;
2530	case AMD_IP_BLOCK_TYPE_JPEG:
2531	if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2532	return `0`;
2533	break;
2534	default:
2535	break;
2536	}
2537
2538	dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
2539	adev->num_ip_blocks,
2540	ip_block_name(adev, ip_block_version->type),
2541	ip_block_version->major,
2542	ip_block_version->minor,
2543	ip_block_version->rev,
2544	ip_block_version->funcs->name);
2545
2546	adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2547
2548	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2549
2550	return `0`;
2551	}
2552
2553	/**
2554	* amdgpu_device_enable_virtual_display - enable virtual display feature
2555	*
2556	* @adev: amdgpu_device pointer
2557	*
2558	* Enabled the virtual display feature if the user has enabled it via
2559	* the module parameter virtual_display. This feature provides a virtual
2560	* display hardware on headless boards or in virtualized environments.
2561	* This function parses and validates the configuration string specified by
2562	* the user and configures the virtual display configuration (number of
2563	* virtual connectors, crtcs, etc.) specified.
2564	*/
2565	static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2566	{
2567	adev->enable_virtual_display = false;
2568
2569	if (amdgpu_virtual_display) {
2570	const char *pci_address_name = pci_name(pdev: adev->pdev);
2571	char pciaddstr, pciaddstr_tmp, pciaddname_tmp, pciaddname;
2572
2573	pciaddstr = kstrdup(s: amdgpu_virtual_display, GFP_KERNEL);
2574	pciaddstr_tmp = pciaddstr;
2575	while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2576	pciaddname = strsep(&pciaddname_tmp, ",");
2577	if (!strcmp("all", pciaddname)
2578	\|\| !strcmp(pci_address_name, pciaddname)) {
2579	long num_crtc;
2580	int res = -`1`;
2581
2582	adev->enable_virtual_display = true;
2583
2584	if (pciaddname_tmp)
2585	res = kstrtol(s: pciaddname_tmp, base: `10`,
2586	res: &num_crtc);
2587
2588	if (!res) {
2589	if (num_crtc < `1`)
2590	num_crtc = `1`;
2591	if (num_crtc > `6`)
2592	num_crtc = `6`;
2593	adev->mode_info.num_crtc = num_crtc;
2594	} else {
2595	adev->mode_info.num_crtc = `1`;
2596	}
2597	break;
2598	}
2599	}
2600
2601	dev_info(
2602	adev->dev,
2603	"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2604	amdgpu_virtual_display, pci_address_name,
2605	adev->enable_virtual_display, adev->mode_info.num_crtc);
2606
2607	kfree(objp: pciaddstr);
2608	}
2609	}
2610
2611	void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2612	{
2613	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2614	adev->mode_info.num_crtc = `1`;
2615	adev->enable_virtual_display = true;
2616	dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
2617	adev->enable_virtual_display,
2618	adev->mode_info.num_crtc);
2619	}
2620	}
2621
2622	/**
2623	* amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2624	*
2625	* @adev: amdgpu_device pointer
2626	*
2627	* Parses the asic configuration parameters specified in the gpu info
2628	* firmware and makes them available to the driver for use in configuring
2629	* the asic.
2630	* Returns 0 on success, -EINVAL on failure.
2631	*/
2632	static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2633	{
2634	const char *chip_name;
2635	int err;
2636	const struct gpu_info_firmware_header_v1_0 *hdr;
2637
2638	adev->firmware.gpu_info_fw = NULL;
2639
2640	switch (adev->asic_type) {
2641	default:
2642	return `0`;
2643	case CHIP_VEGA10:
2644	chip_name = "vega10";
2645	break;
2646	case CHIP_VEGA12:
2647	chip_name = "vega12";
2648	break;
2649	case CHIP_RAVEN:
2650	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2651	chip_name = "raven2";
2652	else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2653	chip_name = "picasso";
2654	else
2655	chip_name = "raven";
2656	break;
2657	case CHIP_ARCTURUS:
2658	chip_name = "arcturus";
2659	break;
2660	case CHIP_NAVI12:
2661	if (adev->discovery.bin)
2662	return `0`;
2663	chip_name = "navi12";
2664	break;
2665	case CHIP_CYAN_SKILLFISH:
2666	if (adev->discovery.bin)
2667	return `0`;
2668	chip_name = "cyan_skillfish";
2669	break;
2670	}
2671
2672	err = amdgpu_ucode_request(adev, fw: &adev->firmware.gpu_info_fw,
2673	required: AMDGPU_UCODE_OPTIONAL,
2674	fmt: "amdgpu/%s_gpu_info.bin", chip_name);
2675	if (err) {
2676	dev_err(adev->dev,
2677	"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2678	chip_name);
2679	goto out;
2680	}
2681
2682	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2683	amdgpu_ucode_print_gpu_info_hdr(hdr: &hdr->header);
2684
2685	switch (hdr->version_major) {
2686	case `1`:
2687	{
2688	const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2689	(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2690	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2691
2692	/*
2693	* Should be dropped when DAL no longer needs it.
2694	*/
2695	if (adev->asic_type == CHIP_NAVI12)
2696	goto parse_soc_bounding_box;
2697
2698	adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2699	adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2700	adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2701	adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2702	adev->gfx.config.max_texture_channel_caches =
2703	le32_to_cpu(gpu_info_fw->gc_num_tccs);
2704	adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2705	adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2706	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2707	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2708	adev->gfx.config.double_offchip_lds_buf =
2709	le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2710	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2711	adev->gfx.cu_info.max_waves_per_simd =
2712	le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2713	adev->gfx.cu_info.max_scratch_slots_per_cu =
2714	le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2715	adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2716	if (hdr->version_minor >= `1`) {
2717	const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2718	(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2719	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2720	adev->gfx.config.num_sc_per_sh =
2721	le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2722	adev->gfx.config.num_packer_per_sc =
2723	le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2724	}
2725
2726	parse_soc_bounding_box:
2727	/*
2728	* soc bounding box info is not integrated in disocovery table,
2729	* we always need to parse it from gpu info firmware if needed.
2730	*/
2731	if (hdr->version_minor == `2`) {
2732	const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2733	(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2734	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2735	adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2736	}
2737	break;
2738	}
2739	default:
2740	dev_err(adev->dev,
2741	"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2742	err = -EINVAL;
2743	goto out;
2744	}
2745	out:
2746	return err;
2747	}
2748
2749	static void amdgpu_uid_init(struct amdgpu_device *adev)
2750	{
2751	/ Initialize the UID for the device /
2752	adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
2753	if (!adev->uid_info) {
2754	dev_warn(adev->dev, "Failed to allocate memory for UID\n");
2755	return;
2756	}
2757	adev->uid_info->adev = adev;
2758	}
2759
2760	static void amdgpu_uid_fini(struct amdgpu_device *adev)
2761	{
2762	/ Free the UID memory /
2763	kfree(objp: adev->uid_info);
2764	adev->uid_info = NULL;
2765	}
2766
2767	/**
2768	* amdgpu_device_ip_early_init - run early init for hardware IPs
2769	*
2770	* @adev: amdgpu_device pointer
2771	*
2772	* Early initialization pass for hardware IPs. The hardware IPs that make
2773	* up each asic are discovered each IP's early_init callback is run. This
2774	* is the first stage in initializing the asic.
2775	* Returns 0 on success, negative error code on failure.
2776	*/
2777	static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2778	{
2779	struct amdgpu_ip_block *ip_block;
2780	struct pci_dev *parent;
2781	bool total, skip_bios;
2782	uint32_t bios_flags;
2783	int i, r;
2784
2785	amdgpu_device_enable_virtual_display(adev);
2786
2787	if (amdgpu_sriov_vf(adev)) {
2788	r = amdgpu_virt_request_full_gpu(adev, init: true);
2789	if (r)
2790	return r;
2791
2792	r = amdgpu_virt_init_critical_region(adev);
2793	if (r)
2794	return r;
2795	}
2796
2797	switch (adev->asic_type) {
2798	#ifdef CONFIG_DRM_AMDGPU_SI
2799	case CHIP_VERDE:
2800	case CHIP_TAHITI:
2801	case CHIP_PITCAIRN:
2802	case CHIP_OLAND:
2803	case CHIP_HAINAN:
2804	adev->family = AMDGPU_FAMILY_SI;
2805	r = si_set_ip_blocks(adev);
2806	if (r)
2807	return r;
2808	break;
2809	#endif
2810	#ifdef CONFIG_DRM_AMDGPU_CIK
2811	case CHIP_BONAIRE:
2812	case CHIP_HAWAII:
2813	case CHIP_KAVERI:
2814	case CHIP_KABINI:
2815	case CHIP_MULLINS:
2816	if (adev->flags & AMD_IS_APU)
2817	adev->family = AMDGPU_FAMILY_KV;
2818	else
2819	adev->family = AMDGPU_FAMILY_CI;
2820
2821	r = cik_set_ip_blocks(adev);
2822	if (r)
2823	return r;
2824	break;
2825	#endif
2826	case CHIP_TOPAZ:
2827	case CHIP_TONGA:
2828	case CHIP_FIJI:
2829	case CHIP_POLARIS10:
2830	case CHIP_POLARIS11:
2831	case CHIP_POLARIS12:
2832	case CHIP_VEGAM:
2833	case CHIP_CARRIZO:
2834	case CHIP_STONEY:
2835	if (adev->flags & AMD_IS_APU)
2836	adev->family = AMDGPU_FAMILY_CZ;
2837	else
2838	adev->family = AMDGPU_FAMILY_VI;
2839
2840	r = vi_set_ip_blocks(adev);
2841	if (r)
2842	return r;
2843	break;
2844	default:
2845	r = amdgpu_discovery_set_ip_blocks(adev);
2846	if (r)
2847	return r;
2848	break;
2849	}
2850
2851	/ Check for IP version 9.4.3 with A0 hardware /
2852	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) &&
2853	!amdgpu_device_get_rev_id(adev)) {
2854	dev_err(adev->dev, "Unsupported A0 hardware\n");
2855	return -ENODEV; / device unsupported - no device error /
2856	}
2857
2858	if (amdgpu_has_atpx() &&
2859	(amdgpu_is_atpx_hybrid() \|\|
2860	amdgpu_has_atpx_dgpu_power_cntl()) &&
2861	((adev->flags & AMD_IS_APU) == `0`) &&
2862	!dev_is_removable(dev: &adev->pdev->dev))
2863	adev->flags \|= AMD_IS_PX;
2864
2865	if (!(adev->flags & AMD_IS_APU)) {
2866	parent = pcie_find_root_port(dev: adev->pdev);
2867	adev->has_pr3 = parent ? pci_pr3_present(pdev: parent) : false;
2868	}
2869
2870	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2871	if (amdgpu_sriov_vf(adev) \|\| sched_policy == KFD_SCHED_POLICY_NO_HWS)
2872	adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2873	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2874	adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2875	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2876	adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2877
2878	adev->virt.is_xgmi_node_migrate_enabled = false;
2879	if (amdgpu_sriov_vf(adev)) {
2880	adev->virt.is_xgmi_node_migrate_enabled =
2881	amdgpu_ip_version((adev), ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`);
2882	}
2883
2884	total = true;
2885	for (i = `0`; i < adev->num_ip_blocks; i++) {
2886	ip_block = &adev->ip_blocks[i];
2887
2888	if ((amdgpu_ip_block_mask & (`1` << i)) == `0`) {
2889	dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
2890	adev->ip_blocks[i].version->funcs->name);
2891	adev->ip_blocks[i].status.valid = false;
2892	} else if (ip_block->version->funcs->early_init) {
2893	r = ip_block->version->funcs->early_init(ip_block);
2894	if (r == -ENOENT) {
2895	adev->ip_blocks[i].status.valid = false;
2896	} else if (r) {
2897	dev_err(adev->dev,
2898	"early_init of IP block <%s> failed %d\n",
2899	adev->ip_blocks[i].version->funcs->name,
2900	r);
2901	total = false;
2902	} else {
2903	adev->ip_blocks[i].status.valid = true;
2904	}
2905	} else {
2906	adev->ip_blocks[i].status.valid = true;
2907	}
2908	/ get the vbios after the asic_funcs are set up /
2909	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2910	r = amdgpu_device_parse_gpu_info_fw(adev);
2911	if (r)
2912	return r;
2913
2914	bios_flags = amdgpu_device_get_vbios_flags(adev);
2915	skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2916	/ Read BIOS /
2917	if (!skip_bios) {
2918	bool optional =
2919	!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2920	if (!amdgpu_get_bios(adev) && !optional)
2921	return -EINVAL;
2922
2923	if (optional && !adev->bios)
2924	dev_info(
2925	adev->dev,
2926	"VBIOS image optional, proceeding without VBIOS image");
2927
2928	if (adev->bios) {
2929	r = amdgpu_atombios_init(adev);
2930	if (r) {
2931	dev_err(adev->dev,
2932	"amdgpu_atombios_init failed\n");
2933	amdgpu_vf_error_put(
2934	adev,
2935	sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2936	error_flags: `0`, error_data: `0`);
2937	return r;
2938	}
2939	}
2940	}
2941
2942	/get pf2vf msg info at it's earliest time/
2943	if (amdgpu_sriov_vf(adev))
2944	amdgpu_virt_init_data_exchange(adev);
2945
2946	}
2947	}
2948	if (!total)
2949	return -ENODEV;
2950
2951	if (adev->gmc.xgmi.supported)
2952	amdgpu_xgmi_early_init(adev);
2953
2954	if (amdgpu_is_multi_aid(adev))
2955	amdgpu_uid_init(adev);
2956	ip_block = amdgpu_device_ip_get_ip_block(adev, type: AMD_IP_BLOCK_TYPE_GFX);
2957	if (ip_block->status.valid != false)
2958	amdgpu_amdkfd_device_probe(adev);
2959
2960	adev->cg_flags &= amdgpu_cg_mask;
2961	adev->pg_flags &= amdgpu_pg_mask;
2962
2963	return `0`;
2964	}
2965
2966	static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2967	{
2968	int i, r;
2969
2970	for (i = `0`; i < adev->num_ip_blocks; i++) {
2971	if (!adev->ip_blocks[i].status.sw)
2972	continue;
2973	if (adev->ip_blocks[i].status.hw)
2974	continue;
2975	if (!amdgpu_ip_member_of_hwini(
2976	adev, block: adev->ip_blocks[i].version->type))
2977	continue;
2978	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
2979	(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) \|\|
2980	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2981	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2982	if (r) {
2983	dev_err(adev->dev,
2984	"hw_init of IP block <%s> failed %d\n",
2985	adev->ip_blocks[i].version->funcs->name,
2986	r);
2987	return r;
2988	}
2989	adev->ip_blocks[i].status.hw = true;
2990	}
2991	}
2992
2993	return `0`;
2994	}
2995
2996	static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2997	{
2998	int i, r;
2999
3000	for (i = `0`; i < adev->num_ip_blocks; i++) {
3001	if (!adev->ip_blocks[i].status.sw)
3002	continue;
3003	if (adev->ip_blocks[i].status.hw)
3004	continue;
3005	if (!amdgpu_ip_member_of_hwini(
3006	adev, block: adev->ip_blocks[i].version->type))
3007	continue;
3008	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3009	if (r) {
3010	dev_err(adev->dev,
3011	"hw_init of IP block <%s> failed %d\n",
3012	adev->ip_blocks[i].version->funcs->name, r);
3013	return r;
3014	}
3015	adev->ip_blocks[i].status.hw = true;
3016	}
3017
3018	return `0`;
3019	}
3020
3021	static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
3022	{
3023	int r = `0`;
3024	int i;
3025	uint32_t smu_version;
3026
3027	if (adev->asic_type >= CHIP_VEGA10) {
3028	for (i = `0`; i < adev->num_ip_blocks; i++) {
3029	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
3030	continue;
3031
3032	if (!amdgpu_ip_member_of_hwini(adev,
3033	block: AMD_IP_BLOCK_TYPE_PSP))
3034	break;
3035
3036	if (!adev->ip_blocks[i].status.sw)
3037	continue;
3038
3039	/ no need to do the fw loading again if already done/
3040	if (adev->ip_blocks[i].status.hw == true)
3041	break;
3042
3043	if (amdgpu_in_reset(adev) \|\| adev->in_suspend) {
3044	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
3045	if (r)
3046	return r;
3047	} else {
3048	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3049	if (r) {
3050	dev_err(adev->dev,
3051	"hw_init of IP block <%s> failed %d\n",
3052	adev->ip_blocks[i]
3053	.version->funcs->name,
3054	r);
3055	return r;
3056	}
3057	adev->ip_blocks[i].status.hw = true;
3058	}
3059	break;
3060	}
3061	}
3062
3063	if (!amdgpu_sriov_vf(adev) \|\| adev->asic_type == CHIP_TONGA)
3064	r = amdgpu_pm_load_smu_firmware(adev, smu_version: &smu_version);
3065
3066	return r;
3067	}
3068
3069	static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
3070	{
3071	struct drm_sched_init_args args = {
3072	.ops = &amdgpu_sched_ops,
3073	.num_rqs = DRM_SCHED_PRIORITY_COUNT,
3074	.timeout_wq = adev->reset_domain->wq,
3075	.dev = adev->dev,
3076	};
3077	long timeout;
3078	int r, i;
3079
3080	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
3081	struct amdgpu_ring *ring = adev->rings[i];
3082
3083	/ No need to setup the GPU scheduler for rings that don't need it /
3084	if (!ring \|\| ring->no_scheduler)
3085	continue;
3086
3087	switch (ring->funcs->type) {
3088	case AMDGPU_RING_TYPE_GFX:
3089	timeout = adev->gfx_timeout;
3090	break;
3091	case AMDGPU_RING_TYPE_COMPUTE:
3092	timeout = adev->compute_timeout;
3093	break;
3094	case AMDGPU_RING_TYPE_SDMA:
3095	timeout = adev->sdma_timeout;
3096	break;
3097	default:
3098	timeout = adev->video_timeout;
3099	break;
3100	}
3101
3102	args.timeout = timeout;
3103	args.credit_limit = ring->num_hw_submission;
3104	args.score = ring->sched_score;
3105	args.name = ring->name;
3106
3107	r = drm_sched_init(sched: &ring->sched, args: &args);
3108	if (r) {
3109	dev_err(adev->dev,
3110	"Failed to create scheduler on ring %s.\n",
3111	ring->name);
3112	return r;
3113	}
3114	r = amdgpu_uvd_entity_init(adev, ring);
3115	if (r) {
3116	dev_err(adev->dev,
3117	"Failed to create UVD scheduling entity on ring %s.\n",
3118	ring->name);
3119	return r;
3120	}
3121	r = amdgpu_vce_entity_init(adev, ring);
3122	if (r) {
3123	dev_err(adev->dev,
3124	"Failed to create VCE scheduling entity on ring %s.\n",
3125	ring->name);
3126	return r;
3127	}
3128	}
3129
3130	if (adev->xcp_mgr)
3131	amdgpu_xcp_update_partition_sched_list(adev);
3132
3133	return `0`;
3134	}
3135
3136
3137	/**
3138	* amdgpu_device_ip_init - run init for hardware IPs
3139	*
3140	* @adev: amdgpu_device pointer
3141	*
3142	* Main initialization pass for hardware IPs. The list of all the hardware
3143	* IPs that make up the asic is walked and the sw_init and hw_init callbacks
3144	* are run. sw_init initializes the software state associated with each IP
3145	* and hw_init initializes the hardware associated with each IP.
3146	* Returns 0 on success, negative error code on failure.
3147	*/
3148	static int amdgpu_device_ip_init(struct amdgpu_device *adev)
3149	{
3150	bool init_badpage;
3151	int i, r;
3152
3153	r = amdgpu_ras_init(adev);
3154	if (r)
3155	return r;
3156
3157	for (i = `0`; i < adev->num_ip_blocks; i++) {
3158	if (!adev->ip_blocks[i].status.valid)
3159	continue;
3160	if (adev->ip_blocks[i].version->funcs->sw_init) {
3161	r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
3162	if (r) {
3163	dev_err(adev->dev,
3164	"sw_init of IP block <%s> failed %d\n",
3165	adev->ip_blocks[i].version->funcs->name,
3166	r);
3167	goto init_failed;
3168	}
3169	}
3170	adev->ip_blocks[i].status.sw = true;
3171
3172	if (!amdgpu_ip_member_of_hwini(
3173	adev, block: adev->ip_blocks[i].version->type))
3174	continue;
3175
3176	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
3177	/ need to do common hw init early so everything is set up for gmc /
3178	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3179	if (r) {
3180	dev_err(adev->dev, "hw_init %d failed %d\n", i,
3181	r);
3182	goto init_failed;
3183	}
3184	adev->ip_blocks[i].status.hw = true;
3185	} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3186	/ need to do gmc hw init early so we can allocate gpu mem /
3187	/ Try to reserve bad pages early /
3188	if (amdgpu_sriov_vf(adev))
3189	amdgpu_virt_exchange_data(adev);
3190
3191	r = amdgpu_device_mem_scratch_init(adev);
3192	if (r) {
3193	dev_err(adev->dev,
3194	"amdgpu_mem_scratch_init failed %d\n",
3195	r);
3196	goto init_failed;
3197	}
3198	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3199	if (r) {
3200	dev_err(adev->dev, "hw_init %d failed %d\n", i,
3201	r);
3202	goto init_failed;
3203	}
3204	r = amdgpu_device_wb_init(adev);
3205	if (r) {
3206	dev_err(adev->dev,
3207	"amdgpu_device_wb_init failed %d\n", r);
3208	goto init_failed;
3209	}
3210	adev->ip_blocks[i].status.hw = true;
3211
3212	/ right after GMC hw init, we create CSA /
3213	if (adev->gfx.mcbp) {
3214	r = amdgpu_allocate_static_csa(adev, bo: &adev->virt.csa_obj,
3215	AMDGPU_GEM_DOMAIN_VRAM \|
3216	AMDGPU_GEM_DOMAIN_GTT,
3217	AMDGPU_CSA_SIZE);
3218	if (r) {
3219	dev_err(adev->dev,
3220	"allocate CSA failed %d\n", r);
3221	goto init_failed;
3222	}
3223	}
3224
3225	r = amdgpu_seq64_init(adev);
3226	if (r) {
3227	dev_err(adev->dev, "allocate seq64 failed %d\n",
3228	r);
3229	goto init_failed;
3230	}
3231	}
3232	}
3233
3234	if (amdgpu_sriov_vf(adev))
3235	amdgpu_virt_init_data_exchange(adev);
3236
3237	r = amdgpu_ib_pool_init(adev);
3238	if (r) {
3239	dev_err(adev->dev, "IB initialization failed (%d).\n", r);
3240	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_IB_INIT_FAIL, error_flags: `0`, error_data: r);
3241	goto init_failed;
3242	}
3243
3244	r = amdgpu_ucode_create_bo(adev); / create ucode bo when sw_init complete/
3245	if (r)
3246	goto init_failed;
3247
3248	r = amdgpu_device_ip_hw_init_phase1(adev);
3249	if (r)
3250	goto init_failed;
3251
3252	r = amdgpu_device_fw_loading(adev);
3253	if (r)
3254	goto init_failed;
3255
3256	r = amdgpu_device_ip_hw_init_phase2(adev);
3257	if (r)
3258	goto init_failed;
3259
3260	/*
3261	* retired pages will be loaded from eeprom and reserved here,
3262	* it should be called after amdgpu_device_ip_hw_init_phase2 since
3263	* for some ASICs the RAS EEPROM code relies on SMU fully functioning
3264	* for I2C communication which only true at this point.
3265	*
3266	* amdgpu_ras_recovery_init may fail, but the upper only cares the
3267	* failure from bad gpu situation and stop amdgpu init process
3268	* accordingly. For other failed cases, it will still release all
3269	* the resource and print error message, rather than returning one
3270	* negative value to upper level.
3271	*
3272	* Note: theoretically, this should be called before all vram allocations
3273	* to protect retired page from abusing
3274	*/
3275	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3276	r = amdgpu_ras_recovery_init(adev, init_bp_info: init_badpage);
3277	if (r)
3278	goto init_failed;
3279
3280	/**
3281	* In case of XGMI grab extra reference for reset domain for this device
3282	*/
3283	if (adev->gmc.xgmi.num_physical_nodes > `1`) {
3284	if (amdgpu_xgmi_add_device(adev) == `0`) {
3285	if (!amdgpu_sriov_vf(adev)) {
3286	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3287
3288	if (WARN_ON(!hive)) {
3289	r = -ENOENT;
3290	goto init_failed;
3291	}
3292
3293	if (!hive->reset_domain \|\|
3294	!amdgpu_reset_get_reset_domain(domain: hive->reset_domain)) {
3295	r = -ENOENT;
3296	amdgpu_put_xgmi_hive(hive);
3297	goto init_failed;
3298	}
3299
3300	/ Drop the early temporary reset domain we created for device /
3301	amdgpu_reset_put_reset_domain(domain: adev->reset_domain);
3302	adev->reset_domain = hive->reset_domain;
3303	amdgpu_put_xgmi_hive(hive);
3304	}
3305	}
3306	}
3307
3308	r = amdgpu_device_init_schedulers(adev);
3309	if (r)
3310	goto init_failed;
3311
3312	if (adev->mman.buffer_funcs_ring->sched.ready)
3313	amdgpu_ttm_set_buffer_funcs_status(adev, enable: true);
3314
3315	/ Don't init kfd if whole hive need to be reset during init /
3316	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3317	kgd2kfd_init_zone_device(adev);
3318	amdgpu_amdkfd_device_init(adev);
3319	}
3320
3321	amdgpu_fru_get_product_info(adev);
3322
3323	if (!amdgpu_sriov_vf(adev) \|\| amdgpu_sriov_ras_cper_en(adev))
3324	r = amdgpu_cper_init(adev);
3325
3326	init_failed:
3327
3328	return r;
3329	}
3330
3331	/**
3332	* amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3333	*
3334	* @adev: amdgpu_device pointer
3335	*
3336	* Writes a reset magic value to the gart pointer in VRAM. The driver calls
3337	* this function before a GPU reset. If the value is retained after a
3338	* GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
3339	*/
3340	static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3341	{
3342	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3343	}
3344
3345	/**
3346	* amdgpu_device_check_vram_lost - check if vram is valid
3347	*
3348	* @adev: amdgpu_device pointer
3349	*
3350	* Checks the reset magic value written to the gart pointer in VRAM.
3351	* The driver calls this after a GPU reset to see if the contents of
3352	* VRAM is lost or now.
3353	* returns true if vram is lost, false if not.
3354	*/
3355	static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3356	{
3357	if (memcmp(p: adev->gart.ptr, q: adev->reset_magic,
3358	AMDGPU_RESET_MAGIC_NUM))
3359	return true;
3360
3361	if (!amdgpu_in_reset(adev))
3362	return false;
3363
3364	/*
3365	* For all ASICs with baco/mode1 reset, the VRAM is
3366	* always assumed to be lost.
3367	*/
3368	switch (amdgpu_asic_reset_method(adev)) {
3369	case AMD_RESET_METHOD_LEGACY:
3370	case AMD_RESET_METHOD_LINK:
3371	case AMD_RESET_METHOD_BACO:
3372	case AMD_RESET_METHOD_MODE1:
3373	return true;
3374	default:
3375	return false;
3376	}
3377	}
3378
3379	/**
3380	* amdgpu_device_set_cg_state - set clockgating for amdgpu device
3381	*
3382	* @adev: amdgpu_device pointer
3383	* @state: clockgating state (gate or ungate)
3384	*
3385	* The list of all the hardware IPs that make up the asic is walked and the
3386	* set_clockgating_state callbacks are run.
3387	* Late initialization pass enabling clockgating for hardware IPs.
3388	* Fini or suspend, pass disabling clockgating for hardware IPs.
3389	* Returns 0 on success, negative error code on failure.
3390	*/
3391
3392	int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3393	enum amd_clockgating_state state)
3394	{
3395	int i, j, r;
3396
3397	if (amdgpu_emu_mode == `1`)
3398	return `0`;
3399
3400	for (j = `0`; j < adev->num_ip_blocks; j++) {
3401	i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - `1`;
3402	if (!adev->ip_blocks[i].status.late_initialized)
3403	continue;
3404	/ skip CG for GFX, SDMA on S0ix /
3405	if (adev->in_s0ix &&
3406	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3407	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3408	continue;
3409	/ skip CG for VCE/UVD, it's handled specially /
3410	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3411	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3412	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3413	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3414	adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3415	/ enable clockgating to save power /
3416	r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
3417	state);
3418	if (r) {
3419	dev_err(adev->dev,
3420	"set_clockgating_state(gate) of IP block <%s> failed %d\n",
3421	adev->ip_blocks[i].version->funcs->name,
3422	r);
3423	return r;
3424	}
3425	}
3426	}
3427
3428	return `0`;
3429	}
3430
3431	int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3432	enum amd_powergating_state state)
3433	{
3434	int i, j, r;
3435
3436	if (amdgpu_emu_mode == `1`)
3437	return `0`;
3438
3439	for (j = `0`; j < adev->num_ip_blocks; j++) {
3440	i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - `1`;
3441	if (!adev->ip_blocks[i].status.late_initialized)
3442	continue;
3443	/ skip PG for GFX, SDMA on S0ix /
3444	if (adev->in_s0ix &&
3445	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3446	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3447	continue;
3448	/ skip CG for VCE/UVD, it's handled specially /
3449	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3450	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3451	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3452	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3453	adev->ip_blocks[i].version->funcs->set_powergating_state) {
3454	/ enable powergating to save power /
3455	r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
3456	state);
3457	if (r) {
3458	dev_err(adev->dev,
3459	"set_powergating_state(gate) of IP block <%s> failed %d\n",
3460	adev->ip_blocks[i].version->funcs->name,
3461	r);
3462	return r;
3463	}
3464	}
3465	}
3466	return `0`;
3467	}
3468
3469	static int amdgpu_device_enable_mgpu_fan_boost(void)
3470	{
3471	struct amdgpu_gpu_instance *gpu_ins;
3472	struct amdgpu_device *adev;
3473	int i, ret = `0`;
3474
3475	mutex_lock(&mgpu_info.mutex);
3476
3477	/*
3478	* MGPU fan boost feature should be enabled
3479	* only when there are two or more dGPUs in
3480	* the system
3481	*/
3482	if (mgpu_info.num_dgpu < `2`)
3483	goto out;
3484
3485	for (i = `0`; i < mgpu_info.num_dgpu; i++) {
3486	gpu_ins = &(mgpu_info.gpu_ins[i]);
3487	adev = gpu_ins->adev;
3488	if (!(adev->flags & AMD_IS_APU \|\| amdgpu_sriov_multi_vf_mode(adev)) &&
3489	!gpu_ins->mgpu_fan_enabled) {
3490	ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3491	if (ret)
3492	break;
3493
3494	gpu_ins->mgpu_fan_enabled = `1`;
3495	}
3496	}
3497
3498	out:
3499	mutex_unlock(lock: &mgpu_info.mutex);
3500
3501	return ret;
3502	}
3503
3504	/**
3505	* amdgpu_device_ip_late_init - run late init for hardware IPs
3506	*
3507	* @adev: amdgpu_device pointer
3508	*
3509	* Late initialization pass for hardware IPs. The list of all the hardware
3510	* IPs that make up the asic is walked and the late_init callbacks are run.
3511	* late_init covers any special initialization that an IP requires
3512	* after all of the have been initialized or something that needs to happen
3513	* late in the init process.
3514	* Returns 0 on success, negative error code on failure.
3515	*/
3516	static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3517	{
3518	struct amdgpu_gpu_instance *gpu_instance;
3519	int i = `0`, r;
3520
3521	for (i = `0`; i < adev->num_ip_blocks; i++) {
3522	if (!adev->ip_blocks[i].status.hw)
3523	continue;
3524	if (adev->ip_blocks[i].version->funcs->late_init) {
3525	r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3526	if (r) {
3527	dev_err(adev->dev,
3528	"late_init of IP block <%s> failed %d\n",
3529	adev->ip_blocks[i].version->funcs->name,
3530	r);
3531	return r;
3532	}
3533	}
3534	adev->ip_blocks[i].status.late_initialized = true;
3535	}
3536
3537	r = amdgpu_ras_late_init(adev);
3538	if (r) {
3539	dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
3540	return r;
3541	}
3542
3543	if (!amdgpu_reset_in_recovery(adev))
3544	amdgpu_ras_set_error_query_ready(adev, ready: true);
3545
3546	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_GATE);
3547	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_GATE);
3548
3549	amdgpu_device_fill_reset_magic(adev);
3550
3551	r = amdgpu_device_enable_mgpu_fan_boost();
3552	if (r)
3553	dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
3554
3555	/ For passthrough configuration on arcturus and aldebaran, enable special handling SBR /
3556	if (amdgpu_passthrough(adev) &&
3557	((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > `1`) \|\|
3558	adev->asic_type == CHIP_ALDEBARAN))
3559	amdgpu_dpm_handle_passthrough_sbr(adev, enable: true);
3560
3561	if (adev->gmc.xgmi.num_physical_nodes > `1`) {
3562	mutex_lock(&mgpu_info.mutex);
3563
3564	/*
3565	* Reset device p-state to low as this was booted with high.
3566	*
3567	* This should be performed only after all devices from the same
3568	* hive get initialized.
3569	*
3570	* However, it's unknown how many device in the hive in advance.
3571	* As this is counted one by one during devices initializations.
3572	*
3573	* So, we wait for all XGMI interlinked devices initialized.
3574	* This may bring some delays as those devices may come from
3575	* different hives. But that should be OK.
3576	*/
3577	if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3578	for (i = `0`; i < mgpu_info.num_gpu; i++) {
3579	gpu_instance = &(mgpu_info.gpu_ins[i]);
3580	if (gpu_instance->adev->flags & AMD_IS_APU)
3581	continue;
3582
3583	r = amdgpu_xgmi_set_pstate(adev: gpu_instance->adev,
3584	pstate: AMDGPU_XGMI_PSTATE_MIN);
3585	if (r) {
3586	dev_err(adev->dev,
3587	"pstate setting failed (%d).\n",
3588	r);
3589	break;
3590	}
3591	}
3592	}
3593
3594	mutex_unlock(lock: &mgpu_info.mutex);
3595	}
3596
3597	return `0`;
3598	}
3599
3600	static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3601	{
3602	struct amdgpu_device *adev = ip_block->adev;
3603	int r;
3604
3605	if (!ip_block->version->funcs->hw_fini) {
3606	dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
3607	ip_block->version->funcs->name);
3608	} else {
3609	r = ip_block->version->funcs->hw_fini(ip_block);
3610	/ XXX handle errors /
3611	if (r) {
3612	dev_dbg(adev->dev,
3613	"hw_fini of IP block <%s> failed %d\n",
3614	ip_block->version->funcs->name, r);
3615	}
3616	}
3617
3618	ip_block->status.hw = false;
3619	}
3620
3621	/**
3622	* amdgpu_device_smu_fini_early - smu hw_fini wrapper
3623	*
3624	* @adev: amdgpu_device pointer
3625	*
3626	* For ASICs need to disable SMC first
3627	*/
3628	static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3629	{
3630	int i;
3631
3632	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) > IP_VERSION(`9`, `0`, `0`))
3633	return;
3634
3635	for (i = `0`; i < adev->num_ip_blocks; i++) {
3636	if (!adev->ip_blocks[i].status.hw)
3637	continue;
3638	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3639	amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]);
3640	break;
3641	}
3642	}
3643	}
3644
3645	static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3646	{
3647	int i, r;
3648
3649	for (i = `0`; i < adev->num_ip_blocks; i++) {
3650	if (!adev->ip_blocks[i].version->funcs->early_fini)
3651	continue;
3652
3653	r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3654	if (r) {
3655	dev_dbg(adev->dev,
3656	"early_fini of IP block <%s> failed %d\n",
3657	adev->ip_blocks[i].version->funcs->name, r);
3658	}
3659	}
3660
3661	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE);
3662	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE);
3663
3664	amdgpu_amdkfd_suspend(adev, suspend_proc: true);
3665	amdgpu_userq_suspend(adev);
3666
3667	/ Workaround for ASICs need to disable SMC first /
3668	amdgpu_device_smu_fini_early(adev);
3669
3670	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3671	if (!adev->ip_blocks[i].status.hw)
3672	continue;
3673
3674	amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]);
3675	}
3676
3677	if (amdgpu_sriov_vf(adev)) {
3678	if (amdgpu_virt_release_full_gpu(adev, init: false))
3679	dev_err(adev->dev,
3680	"failed to release exclusive mode on fini\n");
3681	}
3682
3683	/*
3684	* Driver reload on the APU can fail due to firmware validation because
3685	* the PSP is always running, as it is shared across the whole SoC.
3686	* This same issue does not occur on dGPU because it has a mechanism
3687	* that checks whether the PSP is running. A solution for those issues
3688	* in the APU is to trigger a GPU reset, but this should be done during
3689	* the unload phase to avoid adding boot latency and screen flicker.
3690	*/
3691	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
3692	r = amdgpu_asic_reset(adev);
3693	if (r)
3694	dev_err(adev->dev, "asic reset on %s failed\n", __func__);
3695	}
3696
3697	return `0`;
3698	}
3699
3700	/**
3701	* amdgpu_device_ip_fini - run fini for hardware IPs
3702	*
3703	* @adev: amdgpu_device pointer
3704	*
3705	* Main teardown pass for hardware IPs. The list of all the hardware
3706	* IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3707	* are run. hw_fini tears down the hardware associated with each IP
3708	* and sw_fini tears down any software state associated with each IP.
3709	* Returns 0 on success, negative error code on failure.
3710	*/
3711	static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3712	{
3713	int i, r;
3714
3715	amdgpu_cper_fini(adev);
3716
3717	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3718	amdgpu_virt_release_ras_err_handler_data(adev);
3719
3720	if (adev->gmc.xgmi.num_physical_nodes > `1`)
3721	amdgpu_xgmi_remove_device(adev);
3722
3723	amdgpu_amdkfd_device_fini_sw(adev);
3724
3725	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3726	if (!adev->ip_blocks[i].status.sw)
3727	continue;
3728
3729	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3730	amdgpu_ucode_free_bo(adev);
3731	amdgpu_free_static_csa(bo: &adev->virt.csa_obj);
3732	amdgpu_device_wb_fini(adev);
3733	amdgpu_device_mem_scratch_fini(adev);
3734	amdgpu_ib_pool_fini(adev);
3735	amdgpu_seq64_fini(adev);
3736	amdgpu_doorbell_fini(adev);
3737	}
3738	if (adev->ip_blocks[i].version->funcs->sw_fini) {
3739	r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3740	/ XXX handle errors /
3741	if (r) {
3742	dev_dbg(adev->dev,
3743	"sw_fini of IP block <%s> failed %d\n",
3744	adev->ip_blocks[i].version->funcs->name,
3745	r);
3746	}
3747	}
3748	adev->ip_blocks[i].status.sw = false;
3749	adev->ip_blocks[i].status.valid = false;
3750	}
3751
3752	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3753	if (!adev->ip_blocks[i].status.late_initialized)
3754	continue;
3755	if (adev->ip_blocks[i].version->funcs->late_fini)
3756	adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3757	adev->ip_blocks[i].status.late_initialized = false;
3758	}
3759
3760	amdgpu_ras_fini(adev);
3761	amdgpu_uid_fini(adev);
3762
3763	return `0`;
3764	}
3765
3766	/**
3767	* amdgpu_device_delayed_init_work_handler - work handler for IB tests
3768	*
3769	* @work: work_struct.
3770	*/
3771	static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3772	{
3773	struct amdgpu_device *adev =
3774	container_of(work, struct amdgpu_device, delayed_init_work.work);
3775	int r;
3776
3777	r = amdgpu_ib_ring_tests(adev);
3778	if (r)
3779	dev_err(adev->dev, "ib ring test failed (%d).\n", r);
3780	}
3781
3782	static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3783	{
3784	struct amdgpu_device *adev =
3785	container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3786
3787	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3788	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3789
3790	if (!amdgpu_dpm_set_powergating_by_smu(adev, block_type: AMD_IP_BLOCK_TYPE_GFX, gate: true, inst: `0`))
3791	adev->gfx.gfx_off_state = true;
3792	}
3793
3794	/**
3795	* amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3796	*
3797	* @adev: amdgpu_device pointer
3798	*
3799	* Main suspend function for hardware IPs. The list of all the hardware
3800	* IPs that make up the asic is walked, clockgating is disabled and the
3801	* suspend callbacks are run. suspend puts the hardware and software state
3802	* in each IP into a state suitable for suspend.
3803	* Returns 0 on success, negative error code on failure.
3804	*/
3805	static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3806	{
3807	int i, r, rec;
3808
3809	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE);
3810	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE);
3811
3812	/*
3813	* Per PMFW team's suggestion, driver needs to handle gfxoff
3814	* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3815	* scenario. Add the missing df cstate disablement here.
3816	*/
3817	if (amdgpu_dpm_set_df_cstate(adev, cstate: DF_CSTATE_DISALLOW))
3818	dev_warn(adev->dev, "Failed to disallow df cstate");
3819
3820	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3821	if (!adev->ip_blocks[i].status.valid)
3822	continue;
3823
3824	/ displays are handled separately /
3825	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3826	continue;
3827
3828	r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]);
3829	if (r)
3830	goto unwind;
3831	}
3832
3833	return `0`;
3834	unwind:
3835	rec = amdgpu_device_ip_resume_phase3(adev);
3836	if (rec)
3837	dev_err(adev->dev,
3838	"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
3839	rec);
3840
3841	amdgpu_dpm_set_df_cstate(adev, cstate: DF_CSTATE_ALLOW);
3842
3843	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_GATE);
3844	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_GATE);
3845
3846	return r;
3847	}
3848
3849	/**
3850	* amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3851	*
3852	* @adev: amdgpu_device pointer
3853	*
3854	* Main suspend function for hardware IPs. The list of all the hardware
3855	* IPs that make up the asic is walked, clockgating is disabled and the
3856	* suspend callbacks are run. suspend puts the hardware and software state
3857	* in each IP into a state suitable for suspend.
3858	* Returns 0 on success, negative error code on failure.
3859	*/
3860	static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3861	{
3862	int i, r, rec;
3863
3864	if (adev->in_s0ix)
3865	amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D3Entry);
3866
3867	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3868	if (!adev->ip_blocks[i].status.valid)
3869	continue;
3870	/ displays are handled in phase1 /
3871	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3872	continue;
3873	/ PSP lost connection when err_event_athub occurs /
3874	if (amdgpu_ras_intr_triggered() &&
3875	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3876	adev->ip_blocks[i].status.hw = false;
3877	continue;
3878	}
3879
3880	/ skip unnecessary suspend if we do not initialize them yet /
3881	if (!amdgpu_ip_member_of_hwini(
3882	adev, block: adev->ip_blocks[i].version->type))
3883	continue;
3884
3885	/ Since we skip suspend for S0i3, we need to cancel the delayed*
3886	* idle work here as the suspend callback never gets called.
3887	*/
3888	if (adev->in_s0ix &&
3889	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3890	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`10`, `0`, `0`))
3891	cancel_delayed_work_sync(dwork: &adev->gfx.idle_work);
3892	/ skip suspend of gfx/mes and psp for S0ix*
3893	* gfx is in gfxoff state, so on resume it will exit gfxoff just
3894	* like at runtime. PSP is also part of the always on hardware
3895	* so no need to suspend it.
3896	*/
3897	if (adev->in_s0ix &&
3898	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP \|\|
3899	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3900	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3901	continue;
3902
3903	/ SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF /
3904	if (adev->in_s0ix &&
3905	(amdgpu_ip_version(adev, ip: SDMA0_HWIP, inst: `0`) >=
3906	IP_VERSION(`5`, `0`, `0`)) &&
3907	(adev->ip_blocks[i].version->type ==
3908	AMD_IP_BLOCK_TYPE_SDMA))
3909	continue;
3910
3911	/ Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.*
3912	* These are in TMR, hence are expected to be reused by PSP-TOS to reload
3913	* from this location and RLC Autoload automatically also gets loaded
3914	* from here based on PMFW -> PSP message during re-init sequence.
3915	* Therefore, the psp suspend & resume should be skipped to avoid destroy
3916	* the TMR and reload FWs again for IMU enabled APU ASICs.
3917	*/
3918	if (amdgpu_in_reset(adev) &&
3919	(adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3920	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3921	continue;
3922
3923	r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]);
3924	if (r)
3925	goto unwind;
3926
3927	/ handle putting the SMC in the appropriate state /
3928	if (!amdgpu_sriov_vf(adev)) {
3929	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3930	r = amdgpu_dpm_set_mp1_state(adev, mp1_state: adev->mp1_state);
3931	if (r) {
3932	dev_err(adev->dev,
3933	"SMC failed to set mp1 state %d, %d\n",
3934	adev->mp1_state, r);
3935	goto unwind;
3936	}
3937	}
3938	}
3939	}
3940
3941	return `0`;
3942	unwind:
3943	/ suspend phase 2 = resume phase 1 + resume phase 2 /
3944	rec = amdgpu_device_ip_resume_phase1(adev);
3945	if (rec) {
3946	dev_err(adev->dev,
3947	"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
3948	rec);
3949	return r;
3950	}
3951
3952	rec = amdgpu_device_fw_loading(adev);
3953	if (rec) {
3954	dev_err(adev->dev,
3955	"amdgpu_device_fw_loading failed during unwind: %d\n",
3956	rec);
3957	return r;
3958	}
3959
3960	rec = amdgpu_device_ip_resume_phase2(adev);
3961	if (rec) {
3962	dev_err(adev->dev,
3963	"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
3964	rec);
3965	return r;
3966	}
3967
3968	return r;
3969	}
3970
3971	/**
3972	* amdgpu_device_ip_suspend - run suspend for hardware IPs
3973	*
3974	* @adev: amdgpu_device pointer
3975	*
3976	* Main suspend function for hardware IPs. The list of all the hardware
3977	* IPs that make up the asic is walked, clockgating is disabled and the
3978	* suspend callbacks are run. suspend puts the hardware and software state
3979	* in each IP into a state suitable for suspend.
3980	* Returns 0 on success, negative error code on failure.
3981	*/
3982	static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3983	{
3984	int r;
3985
3986	if (amdgpu_sriov_vf(adev)) {
3987	amdgpu_virt_fini_data_exchange(adev);
3988	amdgpu_virt_request_full_gpu(adev, init: false);
3989	}
3990
3991	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
3992
3993	r = amdgpu_device_ip_suspend_phase1(adev);
3994	if (r)
3995	return r;
3996	r = amdgpu_device_ip_suspend_phase2(adev);
3997
3998	if (amdgpu_sriov_vf(adev))
3999	amdgpu_virt_release_full_gpu(adev, init: false);
4000
4001	return r;
4002	}
4003
4004	static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
4005	{
4006	int i, r;
4007
4008	static enum amd_ip_block_type ip_order[] = {
4009	AMD_IP_BLOCK_TYPE_COMMON,
4010	AMD_IP_BLOCK_TYPE_GMC,
4011	AMD_IP_BLOCK_TYPE_PSP,
4012	AMD_IP_BLOCK_TYPE_IH,
4013	};
4014
4015	for (i = `0`; i < adev->num_ip_blocks; i++) {
4016	int j;
4017	struct amdgpu_ip_block *block;
4018
4019	block = &adev->ip_blocks[i];
4020	block->status.hw = false;
4021
4022	for (j = `0`; j < ARRAY_SIZE(ip_order); j++) {
4023
4024	if (block->version->type != ip_order[j] \|\|
4025	!block->status.valid)
4026	continue;
4027
4028	r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
4029	if (r) {
4030	dev_err(adev->dev, "RE-INIT-early: %s failed\n",
4031	block->version->funcs->name);
4032	return r;
4033	}
4034	block->status.hw = true;
4035	}
4036	}
4037
4038	return `0`;
4039	}
4040
4041	static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
4042	{
4043	struct amdgpu_ip_block *block;
4044	int i, r = `0`;
4045
4046	static enum amd_ip_block_type ip_order[] = {
4047	AMD_IP_BLOCK_TYPE_SMC,
4048	AMD_IP_BLOCK_TYPE_DCE,
4049	AMD_IP_BLOCK_TYPE_GFX,
4050	AMD_IP_BLOCK_TYPE_SDMA,
4051	AMD_IP_BLOCK_TYPE_MES,
4052	AMD_IP_BLOCK_TYPE_UVD,
4053	AMD_IP_BLOCK_TYPE_VCE,
4054	AMD_IP_BLOCK_TYPE_VCN,
4055	AMD_IP_BLOCK_TYPE_JPEG
4056	};
4057
4058	for (i = `0`; i < ARRAY_SIZE(ip_order); i++) {
4059	block = amdgpu_device_ip_get_ip_block(adev, type: ip_order[i]);
4060
4061	if (!block)
4062	continue;
4063
4064	if (block->status.valid && !block->status.hw) {
4065	if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
4066	r = amdgpu_ip_block_resume(ip_block: block);
4067	} else {
4068	r = block->version->funcs->hw_init(block);
4069	}
4070
4071	if (r) {
4072	dev_err(adev->dev, "RE-INIT-late: %s failed\n",
4073	block->version->funcs->name);
4074	break;
4075	}
4076	block->status.hw = true;
4077	}
4078	}
4079
4080	return r;
4081	}
4082
4083	/**
4084	* amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
4085	*
4086	* @adev: amdgpu_device pointer
4087	*
4088	* First resume function for hardware IPs. The list of all the hardware
4089	* IPs that make up the asic is walked and the resume callbacks are run for
4090	* COMMON, GMC, and IH. resume puts the hardware into a functional state
4091	* after a suspend and updates the software state as necessary. This
4092	* function is also used for restoring the GPU after a GPU reset.
4093	* Returns 0 on success, negative error code on failure.
4094	*/
4095	static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
4096	{
4097	int i, r;
4098
4099	for (i = `0`; i < adev->num_ip_blocks; i++) {
4100	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
4101	continue;
4102	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
4103	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC \|\|
4104	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH \|\|
4105	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
4106
4107	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
4108	if (r)
4109	return r;
4110	}
4111	}
4112
4113	return `0`;
4114	}
4115
4116	/**
4117	* amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
4118	*
4119	* @adev: amdgpu_device pointer
4120	*
4121	* Second resume function for hardware IPs. The list of all the hardware
4122	* IPs that make up the asic is walked and the resume callbacks are run for
4123	* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
4124	* functional state after a suspend and updates the software state as
4125	* necessary. This function is also used for restoring the GPU after a GPU
4126	* reset.
4127	* Returns 0 on success, negative error code on failure.
4128	*/
4129	static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
4130	{
4131	int i, r;
4132
4133	for (i = `0`; i < adev->num_ip_blocks; i++) {
4134	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
4135	continue;
4136	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
4137	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC \|\|
4138	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH \|\|
4139	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE \|\|
4140	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
4141	continue;
4142	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
4143	if (r)
4144	return r;
4145	}
4146
4147	return `0`;
4148	}
4149
4150	/**
4151	* amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
4152	*
4153	* @adev: amdgpu_device pointer
4154	*
4155	* Third resume function for hardware IPs. The list of all the hardware
4156	* IPs that make up the asic is walked and the resume callbacks are run for
4157	* all DCE. resume puts the hardware into a functional state after a suspend
4158	* and updates the software state as necessary. This function is also used
4159	* for restoring the GPU after a GPU reset.
4160	*
4161	* Returns 0 on success, negative error code on failure.
4162	*/
4163	static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
4164	{
4165	int i, r;
4166
4167	for (i = `0`; i < adev->num_ip_blocks; i++) {
4168	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
4169	continue;
4170	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
4171	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
4172	if (r)
4173	return r;
4174	}
4175	}
4176
4177	return `0`;
4178	}
4179
4180	/**
4181	* amdgpu_device_ip_resume - run resume for hardware IPs
4182	*
4183	* @adev: amdgpu_device pointer
4184	*
4185	* Main resume function for hardware IPs. The hardware IPs
4186	* are split into two resume functions because they are
4187	* also used in recovering from a GPU reset and some additional
4188	* steps need to be take between them. In this case (S3/S4) they are
4189	* run sequentially.
4190	* Returns 0 on success, negative error code on failure.
4191	*/
4192	static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
4193	{
4194	int r;
4195
4196	r = amdgpu_device_ip_resume_phase1(adev);
4197	if (r)
4198	return r;
4199
4200	r = amdgpu_device_fw_loading(adev);
4201	if (r)
4202	return r;
4203
4204	r = amdgpu_device_ip_resume_phase2(adev);
4205
4206	if (adev->mman.buffer_funcs_ring->sched.ready)
4207	amdgpu_ttm_set_buffer_funcs_status(adev, enable: true);
4208
4209	if (r)
4210	return r;
4211
4212	amdgpu_fence_driver_hw_init(adev);
4213
4214	r = amdgpu_device_ip_resume_phase3(adev);
4215
4216	return r;
4217	}
4218
4219	/**
4220	* amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
4221	*
4222	* @adev: amdgpu_device pointer
4223	*
4224	* Query the VBIOS data tables to determine if the board supports SR-IOV.
4225	*/
4226	static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
4227	{
4228	if (amdgpu_sriov_vf(adev)) {
4229	if (adev->is_atom_fw) {
4230	if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
4231	adev->virt.caps \|= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4232	} else {
4233	if (amdgpu_atombios_has_gpu_virtualization_table(adev))
4234	adev->virt.caps \|= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4235	}
4236
4237	if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
4238	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_NO_VBIOS, error_flags: `0`, error_data: `0`);
4239	}
4240	}
4241
4242	/**
4243	* amdgpu_device_asic_has_dc_support - determine if DC supports the asic
4244	*
4245	* @pdev : pci device context
4246	* @asic_type: AMD asic type
4247	*
4248	* Check if there is DC (new modesetting infrastructre) support for an asic.
4249	* returns true if DC has support, false if not.
4250	*/
4251	bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
4252	enum amd_asic_type asic_type)
4253	{
4254	switch (asic_type) {
4255	#ifdef CONFIG_DRM_AMDGPU_SI
4256	case CHIP_HAINAN:
4257	#endif
4258	case CHIP_TOPAZ:
4259	/ chips with no display hardware /
4260	return false;
4261	#if defined(CONFIG_DRM_AMD_DC)
4262	case CHIP_TAHITI:
4263	case CHIP_PITCAIRN:
4264	case CHIP_VERDE:
4265	case CHIP_OLAND:
4266	return amdgpu_dc != `0` && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
4267	case CHIP_KAVERI:
4268	case CHIP_KABINI:
4269	case CHIP_MULLINS:
4270	/*
4271	* We have systems in the wild with these ASICs that require
4272	* TRAVIS and NUTMEG support which is not supported with DC.
4273	*
4274	* Fallback to the non-DC driver here by default so as not to
4275	* cause regressions.
4276	*/
4277	return amdgpu_dc > `0`;
4278	default:
4279	return amdgpu_dc != `0`;
4280	#else
4281	default:
4282	if (amdgpu_dc > `0`)
4283	dev_info_once(
4284	&pdev->dev,
4285	"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4286	return false;
4287	#endif
4288	}
4289	}
4290
4291	/**
4292	* amdgpu_device_has_dc_support - check if dc is supported
4293	*
4294	* @adev: amdgpu_device pointer
4295	*
4296	* Returns true for supported, false for not supported
4297	*/
4298	bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
4299	{
4300	if (adev->enable_virtual_display \|\|
4301	(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
4302	return false;
4303
4304	return amdgpu_device_asic_has_dc_support(pdev: adev->pdev, asic_type: adev->asic_type);
4305	}
4306
4307	static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
4308	{
4309	struct amdgpu_device *adev =
4310	container_of(__work, struct amdgpu_device, xgmi_reset_work);
4311	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
4312
4313	/ It's a bug to not have a hive within this function /
4314	if (WARN_ON(!hive))
4315	return;
4316
4317	/*
4318	* Use task barrier to synchronize all xgmi reset works across the
4319	* hive. task_barrier_enter and task_barrier_exit will block
4320	* until all the threads running the xgmi reset works reach
4321	* those points. task_barrier_full will do both blocks.
4322	*/
4323	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
4324
4325	task_barrier_enter(tb: &hive->tb);
4326	adev->asic_reset_res = amdgpu_device_baco_enter(adev);
4327
4328	if (adev->asic_reset_res)
4329	goto fail;
4330
4331	task_barrier_exit(tb: &hive->tb);
4332	adev->asic_reset_res = amdgpu_device_baco_exit(adev);
4333
4334	if (adev->asic_reset_res)
4335	goto fail;
4336
4337	amdgpu_ras_reset_error_count(adev, block: AMDGPU_RAS_BLOCK__MMHUB);
4338	} else {
4339
4340	task_barrier_full(tb: &hive->tb);
4341	adev->asic_reset_res = amdgpu_asic_reset(adev);
4342	}
4343
4344	fail:
4345	if (adev->asic_reset_res)
4346	dev_warn(adev->dev,
4347	"ASIC reset failed with error, %d for drm dev, %s",
4348	adev->asic_reset_res, adev_to_drm(adev)->unique);
4349	amdgpu_put_xgmi_hive(hive);
4350	}
4351
4352	static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
4353	{
4354	char *input = amdgpu_lockup_timeout;
4355	char *timeout_setting = NULL;
4356	int index = `0`;
4357	long timeout;
4358	int ret = `0`;
4359
4360	/ By default timeout for all queues is 2 sec /
4361	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4362	adev->video_timeout = msecs_to_jiffies(m: `2000`);
4363
4364	if (!strnlen(p: input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
4365	return `0`;
4366
4367	while ((timeout_setting = strsep(&input, ",")) &&
4368	strnlen(p: timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
4369	ret = kstrtol(s: timeout_setting, base: `0`, res: &timeout);
4370	if (ret)
4371	return ret;
4372
4373	if (timeout == `0`) {
4374	index++;
4375	continue;
4376	} else if (timeout < `0`) {
4377	timeout = MAX_SCHEDULE_TIMEOUT;
4378	dev_warn(adev->dev, "lockup timeout disabled");
4379	add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
4380	} else {
4381	timeout = msecs_to_jiffies(m: timeout);
4382	}
4383
4384	switch (index++) {
4385	case `0`:
4386	adev->gfx_timeout = timeout;
4387	break;
4388	case `1`:
4389	adev->compute_timeout = timeout;
4390	break;
4391	case `2`:
4392	adev->sdma_timeout = timeout;
4393	break;
4394	case `3`:
4395	adev->video_timeout = timeout;
4396	break;
4397	default:
4398	break;
4399	}
4400	}
4401
4402	/ When only one value specified apply it to all queues. /
4403	if (index == `1`)
4404	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
4405	adev->video_timeout = timeout;
4406
4407	return ret;
4408	}
4409
4410	/**
4411	* amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4412	*
4413	* @adev: amdgpu_device pointer
4414	*
4415	* RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4416	*/
4417	static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4418	{
4419	struct iommu_domain *domain;
4420
4421	domain = iommu_get_domain_for_dev(dev: adev->dev);
4422	if (!domain \|\| domain->type == IOMMU_DOMAIN_IDENTITY)
4423	adev->ram_is_direct_mapped = true;
4424	}
4425
4426	#if defined(CONFIG_HSA_AMD_P2P)
4427	/**
4428	* amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4429	*
4430	* @adev: amdgpu_device pointer
4431	*
4432	* return if IOMMU remapping bar address
4433	*/
4434	static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4435	{
4436	struct iommu_domain *domain;
4437
4438	domain = iommu_get_domain_for_dev(dev: adev->dev);
4439	if (domain && (domain->type == IOMMU_DOMAIN_DMA \|\|
4440	domain->type == IOMMU_DOMAIN_DMA_FQ))
4441	return true;
4442
4443	return false;
4444	}
4445	#endif
4446
4447	static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4448	{
4449	if (amdgpu_mcbp == `1`)
4450	adev->gfx.mcbp = true;
4451	else if (amdgpu_mcbp == `0`)
4452	adev->gfx.mcbp = false;
4453
4454	if (amdgpu_sriov_vf(adev))
4455	adev->gfx.mcbp = true;
4456
4457	if (adev->gfx.mcbp)
4458	dev_info(adev->dev, "MCBP is enabled\n");
4459	}
4460
4461	static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
4462	{
4463	int r;
4464
4465	r = amdgpu_atombios_sysfs_init(adev);
4466	if (r)
4467	drm_err(&adev->ddev,
4468	"registering atombios sysfs failed (%d).\n", r);
4469
4470	r = amdgpu_pm_sysfs_init(adev);
4471	if (r)
4472	dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
4473
4474	r = amdgpu_ucode_sysfs_init(adev);
4475	if (r) {
4476	adev->ucode_sysfs_en = false;
4477	dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
4478	} else
4479	adev->ucode_sysfs_en = true;
4480
4481	r = amdgpu_device_attr_sysfs_init(adev);
4482	if (r)
4483	dev_err(adev->dev, "Could not create amdgpu device attr\n");
4484
4485	r = devm_device_add_group(dev: adev->dev, grp: &amdgpu_board_attrs_group);
4486	if (r)
4487	dev_err(adev->dev,
4488	"Could not create amdgpu board attributes\n");
4489
4490	amdgpu_fru_sysfs_init(adev);
4491	amdgpu_reg_state_sysfs_init(adev);
4492	amdgpu_xcp_sysfs_init(adev);
4493
4494	return r;
4495	}
4496
4497	static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
4498	{
4499	if (adev->pm.sysfs_initialized)
4500	amdgpu_pm_sysfs_fini(adev);
4501	if (adev->ucode_sysfs_en)
4502	amdgpu_ucode_sysfs_fini(adev);
4503	amdgpu_device_attr_sysfs_fini(adev);
4504	amdgpu_fru_sysfs_fini(adev);
4505
4506	amdgpu_reg_state_sysfs_fini(adev);
4507	amdgpu_xcp_sysfs_fini(adev);
4508	}
4509
4510	/**
4511	* amdgpu_device_init - initialize the driver
4512	*
4513	* @adev: amdgpu_device pointer
4514	* @flags: driver flags
4515	*
4516	* Initializes the driver info and hw (all asics).
4517	* Returns 0 for success or an error on failure.
4518	* Called at driver startup.
4519	*/
4520	int amdgpu_device_init(struct amdgpu_device *adev,
4521	uint32_t flags)
4522	{
4523	struct pci_dev *pdev = adev->pdev;
4524	int r, i;
4525	bool px = false;
4526	u32 max_MBps;
4527	int tmp;
4528
4529	adev->shutdown = false;
4530	adev->flags = flags;
4531
4532	if (amdgpu_force_asic_type >= `0` && amdgpu_force_asic_type < CHIP_LAST)
4533	adev->asic_type = amdgpu_force_asic_type;
4534	else
4535	adev->asic_type = flags & AMD_ASIC_MASK;
4536
4537	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4538	if (amdgpu_emu_mode == `1`)
4539	adev->usec_timeout *= `10`;
4540	adev->gmc.gart_size = `512` * `1024` * `1024`;
4541	adev->accel_working = false;
4542	adev->num_rings = `0`;
4543	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4544	adev->mman.buffer_funcs = NULL;
4545	adev->mman.buffer_funcs_ring = NULL;
4546	adev->vm_manager.vm_pte_funcs = NULL;
4547	adev->vm_manager.vm_pte_num_scheds = `0`;
4548	adev->gmc.gmc_funcs = NULL;
4549	adev->harvest_ip_mask = `0x0`;
4550	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4551	bitmap_zero(dst: adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4552
4553	adev->smc_rreg = &amdgpu_invalid_rreg;
4554	adev->smc_wreg = &amdgpu_invalid_wreg;
4555	adev->pcie_rreg = &amdgpu_invalid_rreg;
4556	adev->pcie_wreg = &amdgpu_invalid_wreg;
4557	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4558	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4559	adev->pciep_rreg = &amdgpu_invalid_rreg;
4560	adev->pciep_wreg = &amdgpu_invalid_wreg;
4561	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4562	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4563	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4564	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4565	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4566	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4567	adev->didt_rreg = &amdgpu_invalid_rreg;
4568	adev->didt_wreg = &amdgpu_invalid_wreg;
4569	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4570	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4571	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4572	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4573
4574	dev_info(
4575	adev->dev,
4576	"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4577	amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4578	pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4579
4580	/ mutex initialization are all done here so we*
4581	* can recall function without having locking issues
4582	*/
4583	mutex_init(&adev->firmware.mutex);
4584	mutex_init(&adev->pm.mutex);
4585	mutex_init(&adev->gfx.gpu_clock_mutex);
4586	mutex_init(&adev->srbm_mutex);
4587	mutex_init(&adev->gfx.pipe_reserve_mutex);
4588	mutex_init(&adev->gfx.gfx_off_mutex);
4589	mutex_init(&adev->gfx.partition_mutex);
4590	mutex_init(&adev->grbm_idx_mutex);
4591	mutex_init(&adev->mn_lock);
4592	mutex_init(&adev->virt.vf_errors.lock);
4593	hash_init(adev->mn_hash);
4594	mutex_init(&adev->psp.mutex);
4595	mutex_init(&adev->notifier_lock);
4596	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4597	mutex_init(&adev->benchmark_mutex);
4598	mutex_init(&adev->gfx.reset_sem_mutex);
4599	/ Initialize the mutex for cleaner shader isolation between GFX and compute processes /
4600	mutex_init(&adev->enforce_isolation_mutex);
4601	for (i = `0`; i < MAX_XCP; ++i) {
4602	adev->isolation[i].spearhead = dma_fence_get_stub();
4603	amdgpu_sync_create(sync: &adev->isolation[i].active);
4604	amdgpu_sync_create(sync: &adev->isolation[i].prev);
4605	}
4606	mutex_init(&adev->gfx.userq_sch_mutex);
4607	mutex_init(&adev->gfx.workload_profile_mutex);
4608	mutex_init(&adev->vcn.workload_profile_mutex);
4609
4610	amdgpu_device_init_apu_flags(adev);
4611
4612	r = amdgpu_device_check_arguments(adev);
4613	if (r)
4614	return r;
4615
4616	spin_lock_init(&adev->mmio_idx_lock);
4617	spin_lock_init(&adev->smc_idx_lock);
4618	spin_lock_init(&adev->pcie_idx_lock);
4619	spin_lock_init(&adev->uvd_ctx_idx_lock);
4620	spin_lock_init(&adev->didt_idx_lock);
4621	spin_lock_init(&adev->gc_cac_idx_lock);
4622	spin_lock_init(&adev->se_cac_idx_lock);
4623	spin_lock_init(&adev->audio_endpt_idx_lock);
4624	spin_lock_init(&adev->mm_stats.lock);
4625	spin_lock_init(&adev->virt.rlcg_reg_lock);
4626	spin_lock_init(&adev->wb.lock);
4627
4628	xa_init_flags(xa: &adev->userq_xa, XA_FLAGS_LOCK_IRQ);
4629
4630	INIT_LIST_HEAD(list: &adev->reset_list);
4631
4632	INIT_LIST_HEAD(list: &adev->ras_list);
4633
4634	INIT_LIST_HEAD(list: &adev->pm.od_kobj_list);
4635
4636	xa_init(xa: &adev->userq_doorbell_xa);
4637
4638	INIT_DELAYED_WORK(&adev->delayed_init_work,
4639	amdgpu_device_delayed_init_work_handler);
4640	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4641	amdgpu_device_delay_enable_gfx_off);
4642	/*
4643	* Initialize the enforce_isolation work structures for each XCP
4644	* partition. This work handler is responsible for enforcing shader
4645	* isolation on AMD GPUs. It counts the number of emitted fences for
4646	* each GFX and compute ring. If there are any fences, it schedules
4647	* the `enforce_isolation_work` to be run after a delay. If there are
4648	* no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4649	* runqueue.
4650	*/
4651	for (i = `0`; i < MAX_XCP; i++) {
4652	INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4653	amdgpu_gfx_enforce_isolation_handler);
4654	adev->gfx.enforce_isolation[i].adev = adev;
4655	adev->gfx.enforce_isolation[i].xcp_id = i;
4656	}
4657
4658	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4659	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
4660
4661	adev->gfx.gfx_off_req_count = `1`;
4662	adev->gfx.gfx_off_residency = `0`;
4663	adev->gfx.gfx_off_entrycount = `0`;
4664	adev->pm.ac_power = power_supply_is_system_supplied() > `0`;
4665
4666	atomic_set(v: &adev->throttling_logging_enabled, i: `1`);
4667	/*
4668	* If throttling continues, logging will be performed every minute
4669	* to avoid log flooding. "-1" is subtracted since the thermal
4670	* throttling interrupt comes every second. Thus, the total logging
4671	* interval is 59 seconds(retelimited printk interval) + 1(waiting
4672	* for throttling interrupt) = 60 seconds.
4673	*/
4674	ratelimit_state_init(rs: &adev->throttling_logging_rs, interval: (`60` - `1`) * HZ, burst: `1`);
4675
4676	ratelimit_set_flags(rs: &adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4677
4678	/ Registers mapping /
4679	/ TODO: block userspace mapping of io register /
4680	if (adev->asic_type >= CHIP_BONAIRE) {
4681	adev->rmmio_base = pci_resource_start(adev->pdev, `5`);
4682	adev->rmmio_size = pci_resource_len(adev->pdev, `5`);
4683	} else {
4684	adev->rmmio_base = pci_resource_start(adev->pdev, `2`);
4685	adev->rmmio_size = pci_resource_len(adev->pdev, `2`);
4686	}
4687
4688	for (i = `0`; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4689	atomic_set(v: &adev->pm.pwr_state[i], i: POWER_STATE_UNKNOWN);
4690
4691	adev->rmmio = ioremap(offset: adev->rmmio_base, size: adev->rmmio_size);
4692	if (!adev->rmmio)
4693	return -ENOMEM;
4694
4695	dev_info(adev->dev, "register mmio base: 0x%08X\n",
4696	(uint32_t)adev->rmmio_base);
4697	dev_info(adev->dev, "register mmio size: %u\n",
4698	(unsigned int)adev->rmmio_size);
4699
4700	/*
4701	* Reset domain needs to be present early, before XGMI hive discovered
4702	* (if any) and initialized to use reset sem and in_gpu reset flag
4703	* early on during init and before calling to RREG32.
4704	*/
4705	adev->reset_domain = amdgpu_reset_create_reset_domain(type: SINGLE_DEVICE, wq_name: "amdgpu-reset-dev");
4706	if (!adev->reset_domain)
4707	return -ENOMEM;
4708
4709	/ detect hw virtualization here /
4710	amdgpu_virt_init(adev);
4711
4712	amdgpu_device_get_pcie_info(adev);
4713
4714	r = amdgpu_device_get_job_timeout_settings(adev);
4715	if (r) {
4716	dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4717	return r;
4718	}
4719
4720	amdgpu_device_set_mcbp(adev);
4721
4722	/*
4723	* By default, use default mode where all blocks are expected to be
4724	* initialized. At present a 'swinit' of blocks is required to be
4725	* completed before the need for a different level is detected.
4726	*/
4727	amdgpu_set_init_level(adev, lvl: AMDGPU_INIT_LEVEL_DEFAULT);
4728	/ early init functions /
4729	r = amdgpu_device_ip_early_init(adev);
4730	if (r)
4731	return r;
4732
4733	/*
4734	* No need to remove conflicting FBs for non-display class devices.
4735	* This prevents the sysfb from being freed accidently.
4736	*/
4737	if ((pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA \|\|
4738	(pdev->class >> `8`) == PCI_CLASS_DISPLAY_OTHER) {
4739	/ Get rid of things like offb /
4740	r = aperture_remove_conflicting_pci_devices(pdev: adev->pdev, name: amdgpu_kms_driver.name);
4741	if (r)
4742	return r;
4743	}
4744
4745	/ Enable TMZ based on IP_VERSION /
4746	amdgpu_gmc_tmz_set(adev);
4747
4748	if (amdgpu_sriov_vf(adev) &&
4749	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`10`, `3`, `0`))
4750	/ VF MMIO access (except mailbox range) from CPU*
4751	* will be blocked during sriov runtime
4752	*/
4753	adev->virt.caps \|= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4754
4755	amdgpu_gmc_noretry_set(adev);
4756	/ Need to get xgmi info early to decide the reset behavior/
4757	if (adev->gmc.xgmi.supported) {
4758	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4759	if (r)
4760	return r;
4761	}
4762
4763	/ enable PCIE atomic ops /
4764	if (amdgpu_sriov_vf(adev)) {
4765	if (adev->virt.fw_reserve.p_pf2vf)
4766	adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4767	adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4768	(PCI_EXP_DEVCAP2_ATOMIC_COMP32 \| PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4769	/ APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a*
4770	* internal path natively support atomics, set have_atomics_support to true.
4771	*/
4772	} else if ((adev->flags & AMD_IS_APU) &&
4773	(amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >
4774	IP_VERSION(`9`, `0`, `0`))) {
4775	adev->have_atomics_support = true;
4776	} else {
4777	adev->have_atomics_support =
4778	!pci_enable_atomic_ops_to_root(dev: adev->pdev,
4779	PCI_EXP_DEVCAP2_ATOMIC_COMP32 \|
4780	PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4781	}
4782
4783	if (!adev->have_atomics_support)
4784	dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4785
4786	/ doorbell bar mapping and doorbell index init/
4787	amdgpu_doorbell_init(adev);
4788
4789	if (amdgpu_emu_mode == `1`) {
4790	/ post the asic on emulation mode /
4791	emu_soc_asic_init(adev);
4792	goto fence_driver_init;
4793	}
4794
4795	amdgpu_reset_init(adev);
4796
4797	/ detect if we are with an SRIOV vbios /
4798	if (adev->bios)
4799	amdgpu_device_detect_sriov_bios(adev);
4800
4801	/ check if we need to reset the asic*
4802	* E.g., driver was not cleanly unloaded previously, etc.
4803	*/
4804	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4805	if (adev->gmc.xgmi.num_physical_nodes) {
4806	dev_info(adev->dev, "Pending hive reset.\n");
4807	amdgpu_set_init_level(adev,
4808	lvl: AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4809	} else if (amdgpu_ip_version(adev, ip: MP1_HWIP, inst: `0`) == IP_VERSION(`13`, `0`, `10`) &&
4810	!amdgpu_device_has_display_hardware(adev)) {
4811	r = psp_gpu_reset(adev);
4812	} else {
4813	tmp = amdgpu_reset_method;
4814	/ It should do a default reset when loading or reloading the driver,*
4815	* regardless of the module parameter reset_method.
4816	*/
4817	amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4818	r = amdgpu_asic_reset(adev);
4819	amdgpu_reset_method = tmp;
4820	}
4821
4822	if (r) {
4823	dev_err(adev->dev, "asic reset on init failed\n");
4824	goto failed;
4825	}
4826	}
4827
4828	/ Post card if necessary /
4829	if (amdgpu_device_need_post(adev)) {
4830	if (!adev->bios) {
4831	dev_err(adev->dev, "no vBIOS found\n");
4832	r = -EINVAL;
4833	goto failed;
4834	}
4835	dev_info(adev->dev, "GPU posting now...\n");
4836	r = amdgpu_device_asic_init(adev);
4837	if (r) {
4838	dev_err(adev->dev, "gpu post error!\n");
4839	goto failed;
4840	}
4841	}
4842
4843	if (adev->bios) {
4844	if (adev->is_atom_fw) {
4845	/ Initialize clocks /
4846	r = amdgpu_atomfirmware_get_clock_info(adev);
4847	if (r) {
4848	dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4849	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: `0`, error_data: `0`);
4850	goto failed;
4851	}
4852	} else {
4853	/ Initialize clocks /
4854	r = amdgpu_atombios_get_clock_info(adev);
4855	if (r) {
4856	dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4857	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: `0`, error_data: `0`);
4858	goto failed;
4859	}
4860	/ init i2c buses /
4861	amdgpu_i2c_init(adev);
4862	}
4863	}
4864
4865	fence_driver_init:
4866	/ Fence driver /
4867	r = amdgpu_fence_driver_sw_init(adev);
4868	if (r) {
4869	dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4870	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_FENCE_INIT_FAIL, error_flags: `0`, error_data: `0`);
4871	goto failed;
4872	}
4873
4874	/ init the mode config /
4875	drm_mode_config_init(dev: adev_to_drm(adev));
4876
4877	r = amdgpu_device_ip_init(adev);
4878	if (r) {
4879	dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4880	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, error_flags: `0`, error_data: `0`);
4881	goto release_ras_con;
4882	}
4883
4884	amdgpu_fence_driver_hw_init(adev);
4885
4886	dev_info(adev->dev,
4887	"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4888	adev->gfx.config.max_shader_engines,
4889	adev->gfx.config.max_sh_per_se,
4890	adev->gfx.config.max_cu_per_sh,
4891	adev->gfx.cu_info.number);
4892
4893	adev->accel_working = true;
4894
4895	amdgpu_vm_check_compute_bug(adev);
4896
4897	/ Initialize the buffer migration limit. /
4898	if (amdgpu_moverate >= `0`)
4899	max_MBps = amdgpu_moverate;
4900	else
4901	max_MBps = `8`; / Allow 8 MB/s. /
4902	/ Get a log2 for easy divisions. /
4903	adev->mm_stats.log2_max_MBps = ilog2(max(`1u`, max_MBps));
4904
4905	/*
4906	* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4907	* Otherwise the mgpu fan boost feature will be skipped due to the
4908	* gpu instance is counted less.
4909	*/
4910	amdgpu_register_gpu_instance(adev);
4911
4912	/ enable clockgating, etc. after ib tests, etc. since some blocks require*
4913	* explicit gating rather than handling it automatically.
4914	*/
4915	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4916	r = amdgpu_device_ip_late_init(adev);
4917	if (r) {
4918	dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4919	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, error_flags: `0`, error_data: r);
4920	goto release_ras_con;
4921	}
4922	/ must succeed. /
4923	amdgpu_ras_resume(adev);
4924	queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work,
4925	delay: msecs_to_jiffies(AMDGPU_RESUME_MS));
4926	}
4927
4928	if (amdgpu_sriov_vf(adev)) {
4929	amdgpu_virt_release_full_gpu(adev, init: true);
4930	flush_delayed_work(dwork: &adev->delayed_init_work);
4931	}
4932
4933	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4934	amdgpu_xgmi_reset_on_init(adev);
4935	/*
4936	* Place those sysfs registering after `late_init`. As some of those
4937	* operations performed in `late_init` might affect the sysfs
4938	* interfaces creating.
4939	*/
4940	r = amdgpu_device_sys_interface_init(adev);
4941
4942	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4943	r = amdgpu_pmu_init(adev);
4944	if (r)
4945	dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4946
4947	/ Have stored pci confspace at hand for restore in sudden PCI error /
4948	if (amdgpu_device_cache_pci_state(pdev: adev->pdev))
4949	pci_restore_state(dev: pdev);
4950
4951	/ if we have > 1 VGA cards, then disable the amdgpu VGA resources /
4952	/ this will fail for cards that aren't VGA class devices, just*
4953	* ignore it
4954	*/
4955	if ((adev->pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA)
4956	vga_client_register(pdev: adev->pdev, set_decode: amdgpu_device_vga_set_decode);
4957
4958	px = amdgpu_device_supports_px(adev);
4959
4960	if (px \|\| (!dev_is_removable(dev: &adev->pdev->dev) &&
4961	apple_gmux_detect(NULL, NULL)))
4962	vga_switcheroo_register_client(dev: adev->pdev,
4963	ops: &amdgpu_switcheroo_ops, driver_power_control: px);
4964
4965	if (px)
4966	vga_switcheroo_init_domain_pm_ops(dev: adev->dev, domain: &adev->vga_pm_domain);
4967
4968	amdgpu_device_check_iommu_direct_map(adev);
4969
4970	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4971	r = register_pm_notifier(nb: &adev->pm_nb);
4972	if (r)
4973	goto failed;
4974
4975	return `0`;
4976
4977	release_ras_con:
4978	if (amdgpu_sriov_vf(adev))
4979	amdgpu_virt_release_full_gpu(adev, init: true);
4980
4981	/ failed in exclusive mode due to timeout /
4982	if (amdgpu_sriov_vf(adev) &&
4983	!amdgpu_sriov_runtime(adev) &&
4984	amdgpu_virt_mmio_blocked(adev) &&
4985	!amdgpu_virt_wait_reset(adev)) {
4986	dev_err(adev->dev, "VF exclusive mode timeout\n");
4987	/ Don't send request since VF is inactive. /
4988	adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4989	adev->virt.ops = NULL;
4990	r = -EAGAIN;
4991	}
4992	amdgpu_release_ras_context(adev);
4993
4994	failed:
4995	amdgpu_vf_error_trans_all(adev);
4996
4997	return r;
4998	}
4999
5000	static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
5001	{
5002
5003	/ Clear all CPU mappings pointing to this device /
5004	unmap_mapping_range(mapping: adev->ddev.anon_inode->i_mapping, holebegin: `0`, holelen: `0`, even_cows: `1`);
5005
5006	/ Unmap all mapped bars - Doorbell, registers and VRAM /
5007	amdgpu_doorbell_fini(adev);
5008
5009	iounmap(addr: adev->rmmio);
5010	adev->rmmio = NULL;
5011	if (adev->mman.aper_base_kaddr)
5012	iounmap(addr: adev->mman.aper_base_kaddr);
5013	adev->mman.aper_base_kaddr = NULL;
5014
5015	/ Memory manager related /
5016	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
5017	arch_phys_wc_del(handle: adev->gmc.vram_mtrr);
5018	arch_io_free_memtype_wc(start: adev->gmc.aper_base, size: adev->gmc.aper_size);
5019	}
5020	}
5021
5022	/**
5023	* amdgpu_device_fini_hw - tear down the driver
5024	*
5025	* @adev: amdgpu_device pointer
5026	*
5027	* Tear down the driver info (all asics).
5028	* Called at driver shutdown.
5029	*/
5030	void amdgpu_device_fini_hw(struct amdgpu_device *adev)
5031	{
5032	dev_info(adev->dev, "amdgpu: finishing device.\n");
5033	flush_delayed_work(dwork: &adev->delayed_init_work);
5034
5035	if (adev->mman.initialized)
5036	drain_workqueue(wq: adev->mman.bdev.wq);
5037	adev->shutdown = true;
5038
5039	unregister_pm_notifier(nb: &adev->pm_nb);
5040
5041	/ make sure IB test finished before entering exclusive mode*
5042	* to avoid preemption on IB test
5043	*/
5044	if (amdgpu_sriov_vf(adev)) {
5045	amdgpu_virt_request_full_gpu(adev, init: false);
5046	amdgpu_virt_fini_data_exchange(adev);
5047	}
5048
5049	/ disable all interrupts /
5050	amdgpu_irq_disable_all(adev);
5051	if (adev->mode_info.mode_config_initialized) {
5052	if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev)))
5053	drm_helper_force_disable_all(dev: adev_to_drm(adev));
5054	else
5055	drm_atomic_helper_shutdown(dev: adev_to_drm(adev));
5056	}
5057	amdgpu_fence_driver_hw_fini(adev);
5058
5059	amdgpu_device_sys_interface_fini(adev);
5060
5061	/ disable ras feature must before hw fini /
5062	amdgpu_ras_pre_fini(adev);
5063
5064	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
5065
5066	/*
5067	* device went through surprise hotplug; we need to destroy topology
5068	* before ip_fini_early to prevent kfd locking refcount issues by calling
5069	* amdgpu_amdkfd_suspend()
5070	*/
5071	if (drm_dev_is_unplugged(dev: adev_to_drm(adev)))
5072	amdgpu_amdkfd_device_fini_sw(adev);
5073
5074	amdgpu_device_ip_fini_early(adev);
5075
5076	amdgpu_irq_fini_hw(adev);
5077
5078	if (adev->mman.initialized)
5079	ttm_device_clear_dma_mappings(bdev: &adev->mman.bdev);
5080
5081	amdgpu_gart_dummy_page_fini(adev);
5082
5083	if (drm_dev_is_unplugged(dev: adev_to_drm(adev)))
5084	amdgpu_device_unmap_mmio(adev);
5085
5086	}
5087
5088	void amdgpu_device_fini_sw(struct amdgpu_device *adev)
5089	{
5090	int i, idx;
5091	bool px;
5092
5093	amdgpu_device_ip_fini(adev);
5094	amdgpu_fence_driver_sw_fini(adev);
5095	amdgpu_ucode_release(fw: &adev->firmware.gpu_info_fw);
5096	adev->accel_working = false;
5097	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
5098	for (i = `0`; i < MAX_XCP; ++i) {
5099	dma_fence_put(fence: adev->isolation[i].spearhead);
5100	amdgpu_sync_free(sync: &adev->isolation[i].active);
5101	amdgpu_sync_free(sync: &adev->isolation[i].prev);
5102	}
5103
5104	amdgpu_reset_fini(adev);
5105
5106	/ free i2c buses /
5107	amdgpu_i2c_fini(adev);
5108
5109	if (adev->bios) {
5110	if (amdgpu_emu_mode != `1`)
5111	amdgpu_atombios_fini(adev);
5112	amdgpu_bios_release(adev);
5113	}
5114
5115	kfree(objp: adev->fru_info);
5116	adev->fru_info = NULL;
5117
5118	kfree(objp: adev->xcp_mgr);
5119	adev->xcp_mgr = NULL;
5120
5121	px = amdgpu_device_supports_px(adev);
5122
5123	if (px \|\| (!dev_is_removable(dev: &adev->pdev->dev) &&
5124	apple_gmux_detect(NULL, NULL)))
5125	vga_switcheroo_unregister_client(dev: adev->pdev);
5126
5127	if (px)
5128	vga_switcheroo_fini_domain_pm_ops(dev: adev->dev);
5129
5130	if ((adev->pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA)
5131	vga_client_unregister(pdev: adev->pdev);
5132
5133	if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
5134
5135	iounmap(addr: adev->rmmio);
5136	adev->rmmio = NULL;
5137	drm_dev_exit(idx);
5138	}
5139
5140	if (IS_ENABLED(CONFIG_PERF_EVENTS))
5141	amdgpu_pmu_fini(adev);
5142	if (adev->discovery.bin)
5143	amdgpu_discovery_fini(adev);
5144
5145	amdgpu_reset_put_reset_domain(domain: adev->reset_domain);
5146	adev->reset_domain = NULL;
5147
5148	kfree(objp: adev->pci_state);
5149	kfree(objp: adev->pcie_reset_ctx.swds_pcistate);
5150	kfree(objp: adev->pcie_reset_ctx.swus_pcistate);
5151	}
5152
5153	/**
5154	* amdgpu_device_evict_resources - evict device resources
5155	* @adev: amdgpu device object
5156	*
5157	* Evicts all ttm device resources(vram BOs, gart table) from the lru list
5158	* of the vram memory type. Mainly used for evicting device resources
5159	* at suspend time.
5160	*
5161	*/
5162	static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
5163	{
5164	int ret;
5165
5166	/ No need to evict vram on APUs unless going to S4 /
5167	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
5168	return `0`;
5169
5170	/ No need to evict when going to S5 through S4 callbacks /
5171	if (system_state == SYSTEM_POWER_OFF)
5172	return `0`;
5173
5174	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
5175	if (ret) {
5176	dev_warn(adev->dev, "evicting device resources failed\n");
5177	return ret;
5178	}
5179
5180	if (adev->in_s4) {
5181	ret = ttm_device_prepare_hibernation(bdev: &adev->mman.bdev);
5182	if (ret)
5183	dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
5184	}
5185	return ret;
5186	}
5187
5188	/*
5189	* Suspend & resume.
5190	*/
5191	/**
5192	* amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
5193	* @nb: notifier block
5194	* @mode: suspend mode
5195	* @data: data
5196	*
5197	* This function is called when the system is about to suspend or hibernate.
5198	* It is used to set the appropriate flags so that eviction can be optimized
5199	* in the pm prepare callback.
5200	*/
5201	static int amdgpu_device_pm_notifier(struct notifier_block nb, unsigned* long mode,
5202	void *data)
5203	{
5204	struct amdgpu_device adev = container_of(nb, struct* amdgpu_device, pm_nb);
5205
5206	switch (mode) {
5207	case PM_HIBERNATION_PREPARE:
5208	adev->in_s4 = true;
5209	break;
5210	case PM_POST_HIBERNATION:
5211	adev->in_s4 = false;
5212	break;
5213	}
5214
5215	return NOTIFY_DONE;
5216	}
5217
5218	/**
5219	* amdgpu_device_prepare - prepare for device suspend
5220	*
5221	* @dev: drm dev pointer
5222	*
5223	* Prepare to put the hw in the suspend state (all asics).
5224	* Returns 0 for success or an error on failure.
5225	* Called at driver suspend.
5226	*/
5227	int amdgpu_device_prepare(struct drm_device *dev)
5228	{
5229	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5230	int i, r;
5231
5232	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5233	return `0`;
5234
5235	/ Evict the majority of BOs before starting suspend sequence /
5236	r = amdgpu_device_evict_resources(adev);
5237	if (r)
5238	return r;
5239
5240	flush_delayed_work(dwork: &adev->gfx.gfx_off_delay_work);
5241
5242	for (i = `0`; i < adev->num_ip_blocks; i++) {
5243	if (!adev->ip_blocks[i].status.valid)
5244	continue;
5245	if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
5246	continue;
5247	r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
5248	if (r)
5249	return r;
5250	}
5251
5252	return `0`;
5253	}
5254
5255	/**
5256	* amdgpu_device_complete - complete power state transition
5257	*
5258	* @dev: drm dev pointer
5259	*
5260	* Undo the changes from amdgpu_device_prepare. This will be
5261	* called on all resume transitions, including those that failed.
5262	*/
5263	void amdgpu_device_complete(struct drm_device *dev)
5264	{
5265	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5266	int i;
5267
5268	for (i = `0`; i < adev->num_ip_blocks; i++) {
5269	if (!adev->ip_blocks[i].status.valid)
5270	continue;
5271	if (!adev->ip_blocks[i].version->funcs->complete)
5272	continue;
5273	adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
5274	}
5275	}
5276
5277	/**
5278	* amdgpu_device_suspend - initiate device suspend
5279	*
5280	* @dev: drm dev pointer
5281	* @notify_clients: notify in-kernel DRM clients
5282	*
5283	* Puts the hw in the suspend state (all asics).
5284	* Returns 0 for success or an error on failure.
5285	* Called at driver suspend.
5286	*/
5287	int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
5288	{
5289	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5290	int r, rec;
5291
5292	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5293	return `0`;
5294
5295	adev->in_suspend = true;
5296
5297	if (amdgpu_sriov_vf(adev)) {
5298	if (!adev->in_runpm)
5299	amdgpu_amdkfd_suspend_process(adev);
5300	amdgpu_virt_fini_data_exchange(adev);
5301	r = amdgpu_virt_request_full_gpu(adev, init: false);
5302	if (r)
5303	return r;
5304	}
5305
5306	r = amdgpu_acpi_smart_shift_update(adev, ss_state: AMDGPU_SS_DEV_D3);
5307	if (r)
5308	goto unwind_sriov;
5309
5310	if (notify_clients)
5311	drm_client_dev_suspend(dev: adev_to_drm(adev));
5312
5313	cancel_delayed_work_sync(dwork: &adev->delayed_init_work);
5314
5315	amdgpu_ras_suspend(adev);
5316
5317	r = amdgpu_device_ip_suspend_phase1(adev);
5318	if (r)
5319	goto unwind_smartshift;
5320
5321	amdgpu_amdkfd_suspend(adev, suspend_proc: !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5322	r = amdgpu_userq_suspend(adev);
5323	if (r)
5324	goto unwind_ip_phase1;
5325
5326	r = amdgpu_device_evict_resources(adev);
5327	if (r)
5328	goto unwind_userq;
5329
5330	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
5331
5332	amdgpu_fence_driver_hw_fini(adev);
5333
5334	r = amdgpu_device_ip_suspend_phase2(adev);
5335	if (r)
5336	goto unwind_evict;
5337
5338	if (amdgpu_sriov_vf(adev))
5339	amdgpu_virt_release_full_gpu(adev, init: false);
5340
5341	return `0`;
5342
5343	unwind_evict:
5344	if (adev->mman.buffer_funcs_ring->sched.ready)
5345	amdgpu_ttm_set_buffer_funcs_status(adev, enable: true);
5346	amdgpu_fence_driver_hw_init(adev);
5347
5348	unwind_userq:
5349	rec = amdgpu_userq_resume(adev);
5350	if (rec) {
5351	dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
5352	return r;
5353	}
5354	rec = amdgpu_amdkfd_resume(adev, resume_proc: !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5355	if (rec) {
5356	dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
5357	return r;
5358	}
5359
5360	unwind_ip_phase1:
5361	/ suspend phase 1 = resume phase 3 /
5362	rec = amdgpu_device_ip_resume_phase3(adev);
5363	if (rec) {
5364	dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
5365	return r;
5366	}
5367
5368	unwind_smartshift:
5369	rec = amdgpu_acpi_smart_shift_update(adev, ss_state: AMDGPU_SS_DEV_D0);
5370	if (rec) {
5371	dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
5372	return r;
5373	}
5374
5375	if (notify_clients)
5376	drm_client_dev_resume(dev: adev_to_drm(adev));
5377
5378	amdgpu_ras_resume(adev);
5379
5380	unwind_sriov:
5381	if (amdgpu_sriov_vf(adev)) {
5382	rec = amdgpu_virt_request_full_gpu(adev, init: true);
5383	if (rec) {
5384	dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
5385	return r;
5386	}
5387	}
5388
5389	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
5390
5391	return r;
5392	}
5393
5394	static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
5395	{
5396	int r;
5397	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
5398
5399	/ During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)*
5400	* may not work. The access could be blocked by nBIF protection as VF isn't in
5401	* exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
5402	* so that QEMU reprograms MSIX table.
5403	*/
5404	amdgpu_restore_msix(adev);
5405
5406	r = adev->gfxhub.funcs->get_xgmi_info(adev);
5407	if (r)
5408	return r;
5409
5410	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
5411	prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
5412
5413	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
5414	adev->vm_manager.vram_base_offset +=
5415	adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
5416
5417	return `0`;
5418	}
5419
5420	/**
5421	* amdgpu_device_resume - initiate device resume
5422	*
5423	* @dev: drm dev pointer
5424	* @notify_clients: notify in-kernel DRM clients
5425	*
5426	* Bring the hw back to operating state (all asics).
5427	* Returns 0 for success or an error on failure.
5428	* Called at driver resume.
5429	*/
5430	int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
5431	{
5432	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5433	int r = `0`;
5434
5435	if (amdgpu_sriov_vf(adev)) {
5436	r = amdgpu_virt_request_full_gpu(adev, init: true);
5437	if (r)
5438	return r;
5439	}
5440
5441	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
5442	r = amdgpu_virt_resume(adev);
5443	if (r)
5444	goto exit;
5445	}
5446
5447	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5448	return `0`;
5449
5450	if (adev->in_s0ix)
5451	amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D0Entry);
5452
5453	/ post card /
5454	if (amdgpu_device_need_post(adev)) {
5455	r = amdgpu_device_asic_init(adev);
5456	if (r)
5457	dev_err(adev->dev, "amdgpu asic init failed\n");
5458	}
5459
5460	r = amdgpu_device_ip_resume(adev);
5461
5462	if (r) {
5463	dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
5464	goto exit;
5465	}
5466
5467	r = amdgpu_amdkfd_resume(adev, resume_proc: !amdgpu_sriov_vf(adev) && !adev->in_runpm);
5468	if (r)
5469	goto exit;
5470
5471	r = amdgpu_userq_resume(adev);
5472	if (r)
5473	goto exit;
5474
5475	r = amdgpu_device_ip_late_init(adev);
5476	if (r)
5477	goto exit;
5478
5479	queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work,
5480	delay: msecs_to_jiffies(AMDGPU_RESUME_MS));
5481	exit:
5482	if (amdgpu_sriov_vf(adev)) {
5483	amdgpu_virt_init_data_exchange(adev);
5484	amdgpu_virt_release_full_gpu(adev, init: true);
5485
5486	if (!r && !adev->in_runpm)
5487	r = amdgpu_amdkfd_resume_process(adev);
5488	}
5489
5490	if (r)
5491	return r;
5492
5493	/ Make sure IB tests flushed /
5494	flush_delayed_work(dwork: &adev->delayed_init_work);
5495
5496	if (notify_clients)
5497	drm_client_dev_resume(dev: adev_to_drm(adev));
5498
5499	amdgpu_ras_resume(adev);
5500
5501	if (adev->mode_info.num_crtc) {
5502	/*
5503	* Most of the connector probing functions try to acquire runtime pm
5504	* refs to ensure that the GPU is powered on when connector polling is
5505	* performed. Since we're calling this from a runtime PM callback,
5506	* trying to acquire rpm refs will cause us to deadlock.
5507	*
5508	* Since we're guaranteed to be holding the rpm lock, it's safe to
5509	* temporarily disable the rpm helpers so this doesn't deadlock us.
5510	*/
5511	#ifdef CONFIG_PM
5512	dev->dev->power.disable_depth++;
5513	#endif
5514	if (!adev->dc_enabled)
5515	drm_helper_hpd_irq_event(dev);
5516	else
5517	drm_kms_helper_hotplug_event(dev);
5518	#ifdef CONFIG_PM
5519	dev->dev->power.disable_depth--;
5520	#endif
5521	}
5522
5523	amdgpu_vram_mgr_clear_reset_blocks(adev);
5524	adev->in_suspend = false;
5525
5526	if (amdgpu_acpi_smart_shift_update(adev, ss_state: AMDGPU_SS_DEV_D0))
5527	dev_warn(adev->dev, "smart shift update failed\n");
5528
5529	return `0`;
5530	}
5531
5532	/**
5533	* amdgpu_device_ip_check_soft_reset - did soft reset succeed
5534	*
5535	* @adev: amdgpu_device pointer
5536	*
5537	* The list of all the hardware IPs that make up the asic is walked and
5538	* the check_soft_reset callbacks are run. check_soft_reset determines
5539	* if the asic is still hung or not.
5540	* Returns true if any of the IPs are still in a hung state, false if not.
5541	*/
5542	static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
5543	{
5544	int i;
5545	bool asic_hang = false;
5546
5547	if (amdgpu_sriov_vf(adev))
5548	return true;
5549
5550	if (amdgpu_asic_need_full_reset(adev))
5551	return true;
5552
5553	for (i = `0`; i < adev->num_ip_blocks; i++) {
5554	if (!adev->ip_blocks[i].status.valid)
5555	continue;
5556	if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5557	adev->ip_blocks[i].status.hang =
5558	adev->ip_blocks[i].version->funcs->check_soft_reset(
5559	&adev->ip_blocks[i]);
5560	if (adev->ip_blocks[i].status.hang) {
5561	dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
5562	asic_hang = true;
5563	}
5564	}
5565	return asic_hang;
5566	}
5567
5568	/**
5569	* amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5570	*
5571	* @adev: amdgpu_device pointer
5572	*
5573	* The list of all the hardware IPs that make up the asic is walked and the
5574	* pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
5575	* handles any IP specific hardware or software state changes that are
5576	* necessary for a soft reset to succeed.
5577	* Returns 0 on success, negative error code on failure.
5578	*/
5579	static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5580	{
5581	int i, r = `0`;
5582
5583	for (i = `0`; i < adev->num_ip_blocks; i++) {
5584	if (!adev->ip_blocks[i].status.valid)
5585	continue;
5586	if (adev->ip_blocks[i].status.hang &&
5587	adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5588	r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5589	if (r)
5590	return r;
5591	}
5592	}
5593
5594	return `0`;
5595	}
5596
5597	/**
5598	* amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5599	*
5600	* @adev: amdgpu_device pointer
5601	*
5602	* Some hardware IPs cannot be soft reset. If they are hung, a full gpu
5603	* reset is necessary to recover.
5604	* Returns true if a full asic reset is required, false if not.
5605	*/
5606	static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5607	{
5608	int i;
5609
5610	if (amdgpu_asic_need_full_reset(adev))
5611	return true;
5612
5613	for (i = `0`; i < adev->num_ip_blocks; i++) {
5614	if (!adev->ip_blocks[i].status.valid)
5615	continue;
5616	if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) \|\|
5617	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) \|\|
5618	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) \|\|
5619	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) \|\|
5620	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5621	if (adev->ip_blocks[i].status.hang) {
5622	dev_info(adev->dev, "Some block need full reset!\n");
5623	return true;
5624	}
5625	}
5626	}
5627	return false;
5628	}
5629
5630	/**
5631	* amdgpu_device_ip_soft_reset - do a soft reset
5632	*
5633	* @adev: amdgpu_device pointer
5634	*
5635	* The list of all the hardware IPs that make up the asic is walked and the
5636	* soft_reset callbacks are run if the block is hung. soft_reset handles any
5637	* IP specific hardware or software state changes that are necessary to soft
5638	* reset the IP.
5639	* Returns 0 on success, negative error code on failure.
5640	*/
5641	static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5642	{
5643	int i, r = `0`;
5644
5645	for (i = `0`; i < adev->num_ip_blocks; i++) {
5646	if (!adev->ip_blocks[i].status.valid)
5647	continue;
5648	if (adev->ip_blocks[i].status.hang &&
5649	adev->ip_blocks[i].version->funcs->soft_reset) {
5650	r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5651	if (r)
5652	return r;
5653	}
5654	}
5655
5656	return `0`;
5657	}
5658
5659	/**
5660	* amdgpu_device_ip_post_soft_reset - clean up from soft reset
5661	*
5662	* @adev: amdgpu_device pointer
5663	*
5664	* The list of all the hardware IPs that make up the asic is walked and the
5665	* post_soft_reset callbacks are run if the asic was hung. post_soft_reset
5666	* handles any IP specific hardware or software state changes that are
5667	* necessary after the IP has been soft reset.
5668	* Returns 0 on success, negative error code on failure.
5669	*/
5670	static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5671	{
5672	int i, r = `0`;
5673
5674	for (i = `0`; i < adev->num_ip_blocks; i++) {
5675	if (!adev->ip_blocks[i].status.valid)
5676	continue;
5677	if (adev->ip_blocks[i].status.hang &&
5678	adev->ip_blocks[i].version->funcs->post_soft_reset)
5679	r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5680	if (r)
5681	return r;
5682	}
5683
5684	return `0`;
5685	}
5686
5687	/**
5688	* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5689	*
5690	* @adev: amdgpu_device pointer
5691	* @reset_context: amdgpu reset context pointer
5692	*
5693	* do VF FLR and reinitialize Asic
5694	* return 0 means succeeded otherwise failed
5695	*/
5696	static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5697	struct amdgpu_reset_context *reset_context)
5698	{
5699	int r;
5700	struct amdgpu_hive_info *hive = NULL;
5701
5702	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5703	if (!amdgpu_ras_get_fed_status(adev))
5704	amdgpu_virt_ready_to_reset(adev);
5705	amdgpu_virt_wait_reset(adev);
5706	clear_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags);
5707	r = amdgpu_virt_request_full_gpu(adev, init: true);
5708	} else {
5709	r = amdgpu_virt_reset_gpu(adev);
5710	}
5711	if (r)
5712	return r;
5713
5714	amdgpu_ras_clear_err_state(adev);
5715	amdgpu_irq_gpu_reset_resume_helper(adev);
5716
5717	/ some sw clean up VF needs to do before recover /
5718	amdgpu_virt_post_reset(adev);
5719
5720	/ Resume IP prior to SMC /
5721	r = amdgpu_device_ip_reinit_early_sriov(adev);
5722	if (r)
5723	return r;
5724
5725	amdgpu_virt_init_data_exchange(adev);
5726
5727	r = amdgpu_device_fw_loading(adev);
5728	if (r)
5729	return r;
5730
5731	/ now we are okay to resume SMC/CP/SDMA /
5732	r = amdgpu_device_ip_reinit_late_sriov(adev);
5733	if (r)
5734	return r;
5735
5736	hive = amdgpu_get_xgmi_hive(adev);
5737	/ Update PSP FW topology after reset /
5738	if (hive && adev->gmc.xgmi.num_physical_nodes > `1`)
5739	r = amdgpu_xgmi_update_topology(hive, adev);
5740	if (hive)
5741	amdgpu_put_xgmi_hive(hive);
5742	if (r)
5743	return r;
5744
5745	r = amdgpu_ib_ring_tests(adev);
5746	if (r)
5747	return r;
5748
5749	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5750	amdgpu_inc_vram_lost(adev);
5751
5752	/ need to be called during full access so we can't do it later like*
5753	* bare-metal does.
5754	*/
5755	amdgpu_amdkfd_post_reset(adev);
5756	amdgpu_virt_release_full_gpu(adev, init: true);
5757
5758	/ Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset /
5759	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `2`) \|\|
5760	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) \|\|
5761	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`) \|\|
5762	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `5`, `0`) \|\|
5763	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`11`, `0`, `3`))
5764	amdgpu_ras_resume(adev);
5765
5766	amdgpu_virt_ras_telemetry_post_reset(adev);
5767
5768	return `0`;
5769	}
5770
5771	/**
5772	* amdgpu_device_has_job_running - check if there is any unfinished job
5773	*
5774	* @adev: amdgpu_device pointer
5775	*
5776	* check if there is any job running on the device when guest driver receives
5777	* FLR notification from host driver. If there are still jobs running, then
5778	* the guest driver will not respond the FLR reset. Instead, let the job hit
5779	* the timeout and guest driver then issue the reset request.
5780	*/
5781	bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5782	{
5783	int i;
5784
5785	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
5786	struct amdgpu_ring *ring = adev->rings[i];
5787
5788	if (!amdgpu_ring_sched_ready(ring))
5789	continue;
5790
5791	if (amdgpu_fence_count_emitted(ring))
5792	return true;
5793	}
5794	return false;
5795	}
5796
5797	/**
5798	* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5799	*
5800	* @adev: amdgpu_device pointer
5801	*
5802	* Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5803	* a hung GPU.
5804	*/
5805	bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5806	{
5807
5808	if (amdgpu_gpu_recovery == `0`)
5809	goto disabled;
5810
5811	/ Skip soft reset check in fatal error mode /
5812	if (!amdgpu_ras_is_poison_mode_supported(adev))
5813	return true;
5814
5815	if (amdgpu_sriov_vf(adev))
5816	return true;
5817
5818	if (amdgpu_gpu_recovery == -`1`) {
5819	switch (adev->asic_type) {
5820	#ifdef CONFIG_DRM_AMDGPU_SI
5821	case CHIP_VERDE:
5822	case CHIP_TAHITI:
5823	case CHIP_PITCAIRN:
5824	case CHIP_OLAND:
5825	case CHIP_HAINAN:
5826	#endif
5827	#ifdef CONFIG_DRM_AMDGPU_CIK
5828	case CHIP_KAVERI:
5829	case CHIP_KABINI:
5830	case CHIP_MULLINS:
5831	#endif
5832	case CHIP_CARRIZO:
5833	case CHIP_STONEY:
5834	case CHIP_CYAN_SKILLFISH:
5835	goto disabled;
5836	default:
5837	break;
5838	}
5839	}
5840
5841	return true;
5842
5843	disabled:
5844	dev_info(adev->dev, "GPU recovery disabled.\n");
5845	return false;
5846	}
5847
5848	int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5849	{
5850	u32 i;
5851	int ret = `0`;
5852
5853	if (adev->bios)
5854	amdgpu_atombios_scratch_regs_engine_hung(adev, hung: true);
5855
5856	dev_info(adev->dev, "GPU mode1 reset\n");
5857
5858	/ Cache the state before bus master disable. The saved config space*
5859	* values are used in other cases like restore after mode-2 reset.
5860	*/
5861	amdgpu_device_cache_pci_state(pdev: adev->pdev);
5862
5863	/ disable BM /
5864	pci_clear_master(dev: adev->pdev);
5865
5866	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5867	dev_info(adev->dev, "GPU smu mode1 reset\n");
5868	ret = amdgpu_dpm_mode1_reset(adev);
5869	} else {
5870	dev_info(adev->dev, "GPU psp mode1 reset\n");
5871	ret = psp_gpu_reset(adev);
5872	}
5873
5874	if (ret)
5875	goto mode1_reset_failed;
5876
5877	/ enable mmio access after mode 1 reset completed /
5878	adev->no_hw_access = false;
5879
5880	amdgpu_device_load_pci_state(pdev: adev->pdev);
5881	ret = amdgpu_psp_wait_for_bootloader(adev);
5882	if (ret)
5883	goto mode1_reset_failed;
5884
5885	/ wait for asic to come out of reset /
5886	for (i = `0`; i < adev->usec_timeout; i++) {
5887	u32 memsize = adev->nbio.funcs->get_memsize(adev);
5888
5889	if (memsize != `0xffffffff`)
5890	break;
5891	udelay(usec: `1`);
5892	}
5893
5894	if (i >= adev->usec_timeout) {
5895	ret = -ETIMEDOUT;
5896	goto mode1_reset_failed;
5897	}
5898
5899	if (adev->bios)
5900	amdgpu_atombios_scratch_regs_engine_hung(adev, hung: false);
5901
5902	return `0`;
5903
5904	mode1_reset_failed:
5905	dev_err(adev->dev, "GPU mode1 reset failed\n");
5906	return ret;
5907	}
5908
5909	int amdgpu_device_link_reset(struct amdgpu_device *adev)
5910	{
5911	int ret = `0`;
5912
5913	dev_info(adev->dev, "GPU link reset\n");
5914
5915	if (!amdgpu_reset_in_dpc(adev))
5916	ret = amdgpu_dpm_link_reset(adev);
5917
5918	if (ret)
5919	goto link_reset_failed;
5920
5921	ret = amdgpu_psp_wait_for_bootloader(adev);
5922	if (ret)
5923	goto link_reset_failed;
5924
5925	return `0`;
5926
5927	link_reset_failed:
5928	dev_err(adev->dev, "GPU link reset failed\n");
5929	return ret;
5930	}
5931
5932	int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5933	struct amdgpu_reset_context *reset_context)
5934	{
5935	int i, r = `0`;
5936	struct amdgpu_job *job = NULL;
5937	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5938	bool need_full_reset =
5939	test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5940
5941	if (reset_context->reset_req_dev == adev)
5942	job = reset_context->job;
5943
5944	if (amdgpu_sriov_vf(adev))
5945	amdgpu_virt_pre_reset(adev);
5946
5947	amdgpu_fence_driver_isr_toggle(adev, stop: true);
5948
5949	/ block all schedulers and reset given job's ring /
5950	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
5951	struct amdgpu_ring *ring = adev->rings[i];
5952
5953	if (!amdgpu_ring_sched_ready(ring))
5954	continue;
5955
5956	/ after all hw jobs are reset, hw fence is meaningless, so force_completion /
5957	amdgpu_fence_driver_force_completion(ring);
5958	}
5959
5960	amdgpu_fence_driver_isr_toggle(adev, stop: false);
5961
5962	if (job && job->vm)
5963	drm_sched_increase_karma(bad: &job->base);
5964
5965	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5966	/ If reset handler not implemented, continue; otherwise return /
5967	if (r == -EOPNOTSUPP)
5968	r = `0`;
5969	else
5970	return r;
5971
5972	/ Don't suspend on bare metal if we are not going to HW reset the ASIC /
5973	if (!amdgpu_sriov_vf(adev)) {
5974
5975	if (!need_full_reset)
5976	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5977
5978	if (!need_full_reset && amdgpu_gpu_recovery &&
5979	amdgpu_device_ip_check_soft_reset(adev)) {
5980	amdgpu_device_ip_pre_soft_reset(adev);
5981	r = amdgpu_device_ip_soft_reset(adev);
5982	amdgpu_device_ip_post_soft_reset(adev);
5983	if (r \|\| amdgpu_device_ip_check_soft_reset(adev)) {
5984	dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5985	need_full_reset = true;
5986	}
5987	}
5988
5989	if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5990	dev_info(tmp_adev->dev, "Dumping IP State\n");
5991	/ Trigger ip dump before we reset the asic /
5992	for (i = `0`; i < tmp_adev->num_ip_blocks; i++)
5993	if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5994	tmp_adev->ip_blocks[i].version->funcs
5995	->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5996	dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5997	}
5998
5999	if (need_full_reset)
6000	r = amdgpu_device_ip_suspend(adev);
6001	if (need_full_reset)
6002	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
6003	else
6004	clear_bit(nr: AMDGPU_NEED_FULL_RESET,
6005	addr: &reset_context->flags);
6006	}
6007
6008	return r;
6009	}
6010
6011	int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
6012	{
6013	struct list_head *device_list_handle;
6014	bool full_reset, vram_lost = false;
6015	struct amdgpu_device *tmp_adev;
6016	int r, init_level;
6017
6018	device_list_handle = reset_context->reset_device_list;
6019
6020	if (!device_list_handle)
6021	return -EINVAL;
6022
6023	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6024
6025	/**
6026	* If it's reset on init, it's default init level, otherwise keep level
6027	* as recovery level.
6028	*/
6029	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
6030	init_level = AMDGPU_INIT_LEVEL_DEFAULT;
6031	else
6032	init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
6033
6034	r = `0`;
6035	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6036	amdgpu_set_init_level(adev: tmp_adev, lvl: init_level);
6037	if (full_reset) {
6038	/ post card /
6039	amdgpu_reset_set_dpc_status(adev: tmp_adev, status: false);
6040	amdgpu_ras_clear_err_state(adev: tmp_adev);
6041	r = amdgpu_device_asic_init(adev: tmp_adev);
6042	if (r) {
6043	dev_warn(tmp_adev->dev, "asic atom init failed!");
6044	} else {
6045	dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
6046
6047	r = amdgpu_device_ip_resume_phase1(adev: tmp_adev);
6048	if (r)
6049	goto out;
6050
6051	vram_lost = amdgpu_device_check_vram_lost(adev: tmp_adev);
6052
6053	if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
6054	amdgpu_coredump(adev: tmp_adev, skip_vram_check: false, vram_lost, job: reset_context->job);
6055
6056	if (vram_lost) {
6057	dev_info(
6058	tmp_adev->dev,
6059	"VRAM is lost due to GPU reset!\n");
6060	amdgpu_inc_vram_lost(tmp_adev);
6061	}
6062
6063	r = amdgpu_device_fw_loading(adev: tmp_adev);
6064	if (r)
6065	return r;
6066
6067	r = amdgpu_xcp_restore_partition_mode(
6068	xcp_mgr: tmp_adev->xcp_mgr);
6069	if (r)
6070	goto out;
6071
6072	r = amdgpu_device_ip_resume_phase2(adev: tmp_adev);
6073	if (r)
6074	goto out;
6075
6076	if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
6077	amdgpu_ttm_set_buffer_funcs_status(adev: tmp_adev, enable: true);
6078
6079	r = amdgpu_device_ip_resume_phase3(adev: tmp_adev);
6080	if (r)
6081	goto out;
6082
6083	if (vram_lost)
6084	amdgpu_device_fill_reset_magic(adev: tmp_adev);
6085
6086	/*
6087	* Add this ASIC as tracked as reset was already
6088	* complete successfully.
6089	*/
6090	amdgpu_register_gpu_instance(adev: tmp_adev);
6091
6092	if (!reset_context->hive &&
6093	tmp_adev->gmc.xgmi.num_physical_nodes > `1`)
6094	amdgpu_xgmi_add_device(adev: tmp_adev);
6095
6096	r = amdgpu_device_ip_late_init(adev: tmp_adev);
6097	if (r)
6098	goto out;
6099
6100	r = amdgpu_userq_post_reset(adev: tmp_adev, vram_lost);
6101	if (r)
6102	goto out;
6103
6104	drm_client_dev_resume(dev: adev_to_drm(adev: tmp_adev));
6105
6106	/*
6107	* The GPU enters bad state once faulty pages
6108	* by ECC has reached the threshold, and ras
6109	* recovery is scheduled next. So add one check
6110	* here to break recovery if it indeed exceeds
6111	* bad page threshold, and remind user to
6112	* retire this GPU or setting one bigger
6113	* bad_page_threshold value to fix this once
6114	* probing driver again.
6115	*/
6116	if (!amdgpu_ras_is_rma(adev: tmp_adev)) {
6117	/ must succeed. /
6118	amdgpu_ras_resume(adev: tmp_adev);
6119	} else {
6120	r = -EINVAL;
6121	goto out;
6122	}
6123
6124	/ Update PSP FW topology after reset /
6125	if (reset_context->hive &&
6126	tmp_adev->gmc.xgmi.num_physical_nodes > `1`)
6127	r = amdgpu_xgmi_update_topology(
6128	hive: reset_context->hive, adev: tmp_adev);
6129	}
6130	}
6131
6132	out:
6133	if (!r) {
6134	/ IP init is complete now, set level as default /
6135	amdgpu_set_init_level(adev: tmp_adev,
6136	lvl: AMDGPU_INIT_LEVEL_DEFAULT);
6137	amdgpu_irq_gpu_reset_resume_helper(adev: tmp_adev);
6138	r = amdgpu_ib_ring_tests(adev: tmp_adev);
6139	if (r) {
6140	dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
6141	r = -EAGAIN;
6142	goto end;
6143	}
6144	}
6145
6146	if (r)
6147	tmp_adev->asic_reset_res = r;
6148	}
6149
6150	end:
6151	return r;
6152	}
6153
6154	int amdgpu_do_asic_reset(struct list_head *device_list_handle,
6155	struct amdgpu_reset_context *reset_context)
6156	{
6157	struct amdgpu_device *tmp_adev = NULL;
6158	bool need_full_reset, skip_hw_reset;
6159	int r = `0`;
6160
6161	/ Try reset handler method first /
6162	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
6163	reset_list);
6164
6165	reset_context->reset_device_list = device_list_handle;
6166	r = amdgpu_reset_perform_reset(adev: tmp_adev, reset_context);
6167	/ If reset handler not implemented, continue; otherwise return /
6168	if (r == -EOPNOTSUPP)
6169	r = `0`;
6170	else
6171	return r;
6172
6173	/ Reset handler not implemented, use the default method /
6174	need_full_reset =
6175	test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
6176	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
6177
6178	/*
6179	* ASIC reset has to be done on all XGMI hive nodes ASAP
6180	* to allow proper links negotiation in FW (within 1 sec)
6181	*/
6182	if (!skip_hw_reset && need_full_reset) {
6183	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6184	/ For XGMI run all resets in parallel to speed up the process /
6185	if (tmp_adev->gmc.xgmi.num_physical_nodes > `1`) {
6186	if (!queue_work(wq: system_unbound_wq,
6187	work: &tmp_adev->xgmi_reset_work))
6188	r = -EALREADY;
6189	} else
6190	r = amdgpu_asic_reset(tmp_adev);
6191
6192	if (r) {
6193	dev_err(tmp_adev->dev,
6194	"ASIC reset failed with error, %d for drm dev, %s",
6195	r, adev_to_drm(tmp_adev)->unique);
6196	goto out;
6197	}
6198	}
6199
6200	/ For XGMI wait for all resets to complete before proceed /
6201	if (!r) {
6202	list_for_each_entry(tmp_adev, device_list_handle,
6203	reset_list) {
6204	if (tmp_adev->gmc.xgmi.num_physical_nodes > `1`) {
6205	flush_work(work: &tmp_adev->xgmi_reset_work);
6206	r = tmp_adev->asic_reset_res;
6207	if (r)
6208	break;
6209	}
6210	}
6211	}
6212	}
6213
6214	if (!r && amdgpu_ras_intr_triggered()) {
6215	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6216	amdgpu_ras_reset_error_count(adev: tmp_adev,
6217	block: AMDGPU_RAS_BLOCK__MMHUB);
6218	}
6219
6220	amdgpu_ras_intr_cleared();
6221	}
6222
6223	r = amdgpu_device_reinit_after_reset(reset_context);
6224	if (r == -EAGAIN)
6225	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
6226	else
6227	clear_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
6228
6229	out:
6230	return r;
6231	}
6232
6233	static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
6234	{
6235
6236	switch (amdgpu_asic_reset_method(adev)) {
6237	case AMD_RESET_METHOD_MODE1:
6238	case AMD_RESET_METHOD_LINK:
6239	adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
6240	break;
6241	case AMD_RESET_METHOD_MODE2:
6242	adev->mp1_state = PP_MP1_STATE_RESET;
6243	break;
6244	default:
6245	adev->mp1_state = PP_MP1_STATE_NONE;
6246	break;
6247	}
6248	}
6249
6250	static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
6251	{
6252	amdgpu_vf_error_trans_all(adev);
6253	adev->mp1_state = PP_MP1_STATE_NONE;
6254	}
6255
6256	static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
6257	{
6258	struct pci_dev *p = NULL;
6259
6260	p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus),
6261	bus: adev->pdev->bus->number, devfn: `1`);
6262	if (p) {
6263	pm_runtime_enable(dev: &(p->dev));
6264	pm_runtime_resume(dev: &(p->dev));
6265	}
6266
6267	pci_dev_put(dev: p);
6268	}
6269
6270	static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
6271	{
6272	enum amd_reset_method reset_method;
6273	struct pci_dev *p = NULL;
6274	u64 expires;
6275
6276	/*
6277	* For now, only BACO and mode1 reset are confirmed
6278	* to suffer the audio issue without proper suspended.
6279	*/
6280	reset_method = amdgpu_asic_reset_method(adev);
6281	if ((reset_method != AMD_RESET_METHOD_BACO) &&
6282	(reset_method != AMD_RESET_METHOD_MODE1))
6283	return -EINVAL;
6284
6285	p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus),
6286	bus: adev->pdev->bus->number, devfn: `1`);
6287	if (!p)
6288	return -ENODEV;
6289
6290	expires = pm_runtime_autosuspend_expiration(dev: &(p->dev));
6291	if (!expires)
6292	/*
6293	* If we cannot get the audio device autosuspend delay,
6294	* a fixed 4S interval will be used. Considering 3S is
6295	* the audio controller default autosuspend delay setting.
6296	* 4S used here is guaranteed to cover that.
6297	*/
6298	expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * `4ULL`;
6299
6300	while (!pm_runtime_status_suspended(dev: &(p->dev))) {
6301	if (!pm_runtime_suspend(dev: &(p->dev)))
6302	break;
6303
6304	if (expires < ktime_get_mono_fast_ns()) {
6305	dev_warn(adev->dev, "failed to suspend display audio\n");
6306	pci_dev_put(dev: p);
6307	/ TODO: abort the succeeding gpu reset? /
6308	return -ETIMEDOUT;
6309	}
6310	}
6311
6312	pm_runtime_disable(dev: &(p->dev));
6313
6314	pci_dev_put(dev: p);
6315	return `0`;
6316	}
6317
6318	static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
6319	{
6320	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
6321
6322	#if defined(CONFIG_DEBUG_FS)
6323	if (!amdgpu_sriov_vf(adev))
6324	cancel_work(work: &adev->reset_work);
6325	#endif
6326	cancel_work(work: &adev->userq_reset_work);
6327
6328	if (adev->kfd.dev)
6329	cancel_work(work: &adev->kfd.reset_work);
6330
6331	if (amdgpu_sriov_vf(adev))
6332	cancel_work(work: &adev->virt.flr_work);
6333
6334	if (con && adev->ras_enabled)
6335	cancel_work(work: &con->recovery_work);
6336
6337	}
6338
6339	static int amdgpu_device_health_check(struct list_head *device_list_handle)
6340	{
6341	struct amdgpu_device *tmp_adev;
6342	int ret = `0`;
6343
6344	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6345	ret \|= amdgpu_device_bus_status_check(adev: tmp_adev);
6346	}
6347
6348	return ret;
6349	}
6350
6351	static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
6352	struct list_head *device_list,
6353	struct amdgpu_hive_info *hive)
6354	{
6355	struct amdgpu_device *tmp_adev = NULL;
6356
6357	/*
6358	* Build list of devices to reset.
6359	* In case we are in XGMI hive mode, resort the device list
6360	* to put adev in the 1st position.
6361	*/
6362	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > `1`) && hive) {
6363	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6364	list_add_tail(new: &tmp_adev->reset_list, head: device_list);
6365	if (adev->shutdown)
6366	tmp_adev->shutdown = true;
6367	if (amdgpu_reset_in_dpc(adev))
6368	tmp_adev->pcie_reset_ctx.in_link_reset = true;
6369	}
6370	if (!list_is_first(list: &adev->reset_list, head: device_list))
6371	list_rotate_to_front(list: &adev->reset_list, head: device_list);
6372	} else {
6373	list_add_tail(new: &adev->reset_list, head: device_list);
6374	}
6375	}
6376
6377	static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
6378	struct list_head *device_list)
6379	{
6380	struct amdgpu_device *tmp_adev = NULL;
6381
6382	if (list_empty(head: device_list))
6383	return;
6384	tmp_adev =
6385	list_first_entry(device_list, struct amdgpu_device, reset_list);
6386	amdgpu_device_lock_reset_domain(reset_domain: tmp_adev->reset_domain);
6387	}
6388
6389	static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
6390	struct list_head *device_list)
6391	{
6392	struct amdgpu_device *tmp_adev = NULL;
6393
6394	if (list_empty(head: device_list))
6395	return;
6396	tmp_adev =
6397	list_first_entry(device_list, struct amdgpu_device, reset_list);
6398	amdgpu_device_unlock_reset_domain(reset_domain: tmp_adev->reset_domain);
6399	}
6400
6401	static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
6402	struct amdgpu_job *job,
6403	struct amdgpu_reset_context *reset_context,
6404	struct list_head *device_list,
6405	struct amdgpu_hive_info *hive,
6406	bool need_emergency_restart)
6407	{
6408	struct amdgpu_device *tmp_adev = NULL;
6409	int i;
6410
6411	/ block all schedulers and reset given job's ring /
6412	list_for_each_entry(tmp_adev, device_list, reset_list) {
6413	amdgpu_device_set_mp1_state(adev: tmp_adev);
6414
6415	/*
6416	* Try to put the audio codec into suspend state
6417	* before gpu reset started.
6418	*
6419	* Due to the power domain of the graphics device
6420	* is shared with AZ power domain. Without this,
6421	* we may change the audio hardware from behind
6422	* the audio driver's back. That will trigger
6423	* some audio codec errors.
6424	*/
6425	if (!amdgpu_device_suspend_display_audio(adev: tmp_adev))
6426	tmp_adev->pcie_reset_ctx.audio_suspended = true;
6427
6428	amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: false);
6429
6430	cancel_delayed_work_sync(dwork: &tmp_adev->delayed_init_work);
6431
6432	amdgpu_amdkfd_pre_reset(adev: tmp_adev, reset_context);
6433
6434	/*
6435	* Mark these ASICs to be reset as untracked first
6436	* And add them back after reset completed
6437	*/
6438	amdgpu_unregister_gpu_instance(adev: tmp_adev);
6439
6440	drm_client_dev_suspend(dev: adev_to_drm(adev: tmp_adev));
6441
6442	/ disable ras on ALL IPs /
6443	if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
6444	amdgpu_device_ip_need_full_reset(adev: tmp_adev))
6445	amdgpu_ras_suspend(adev: tmp_adev);
6446
6447	amdgpu_userq_pre_reset(adev: tmp_adev);
6448
6449	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
6450	struct amdgpu_ring *ring = tmp_adev->rings[i];
6451
6452	if (!amdgpu_ring_sched_ready(ring))
6453	continue;
6454
6455	drm_sched_stop(sched: &ring->sched, bad: job ? &job->base : NULL);
6456
6457	if (need_emergency_restart)
6458	amdgpu_job_stop_all_jobs_on_sched(sched: &ring->sched);
6459	}
6460	atomic_inc(v: &tmp_adev->gpu_reset_counter);
6461	}
6462	}
6463
6464	static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
6465	struct list_head *device_list,
6466	struct amdgpu_reset_context *reset_context)
6467	{
6468	struct amdgpu_device *tmp_adev = NULL;
6469	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
6470	int r = `0`;
6471
6472	retry: / Rest of adevs pre asic reset from XGMI hive. /
6473	list_for_each_entry(tmp_adev, device_list, reset_list) {
6474	r = amdgpu_device_pre_asic_reset(adev: tmp_adev, reset_context);
6475	/TODO Should we stop ?/
6476	if (r) {
6477	dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
6478	r, adev_to_drm(tmp_adev)->unique);
6479	tmp_adev->asic_reset_res = r;
6480	}
6481	}
6482
6483	/ Actual ASIC resets if needed./
6484	/ Host driver will handle XGMI hive reset for SRIOV /
6485	if (amdgpu_sriov_vf(adev)) {
6486
6487	/ Bail out of reset early /
6488	if (amdgpu_ras_is_rma(adev))
6489	return -ENODEV;
6490
6491	if (amdgpu_ras_get_fed_status(adev) \|\| amdgpu_virt_rcvd_ras_interrupt(adev)) {
6492	dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
6493	amdgpu_ras_set_fed(adev, status: true);
6494	set_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags);
6495	}
6496
6497	r = amdgpu_device_reset_sriov(adev, reset_context);
6498	if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > `0`) {
6499	amdgpu_virt_release_full_gpu(adev, init: true);
6500	goto retry;
6501	}
6502	if (r)
6503	adev->asic_reset_res = r;
6504	} else {
6505	r = amdgpu_do_asic_reset(device_list_handle: device_list, reset_context);
6506	if (r && r == -EAGAIN)
6507	goto retry;
6508	}
6509
6510	list_for_each_entry(tmp_adev, device_list, reset_list) {
6511	/*
6512	* Drop any pending non scheduler resets queued before reset is done.
6513	* Any reset scheduled after this point would be valid. Scheduler resets
6514	* were already dropped during drm_sched_stop and no new ones can come
6515	* in before drm_sched_start.
6516	*/
6517	amdgpu_device_stop_pending_resets(adev: tmp_adev);
6518	}
6519
6520	return r;
6521	}
6522
6523	static int amdgpu_device_sched_resume(struct list_head *device_list,
6524	struct amdgpu_reset_context *reset_context,
6525	bool job_signaled)
6526	{
6527	struct amdgpu_device *tmp_adev = NULL;
6528	int i, r = `0`;
6529
6530	/ Post ASIC reset for all devs ./
6531	list_for_each_entry(tmp_adev, device_list, reset_list) {
6532
6533	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
6534	struct amdgpu_ring *ring = tmp_adev->rings[i];
6535
6536	if (!amdgpu_ring_sched_ready(ring))
6537	continue;
6538
6539	drm_sched_start(sched: &ring->sched, errno: `0`);
6540	}
6541
6542	if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev: tmp_adev)) && !job_signaled)
6543	drm_helper_resume_force_mode(dev: adev_to_drm(adev: tmp_adev));
6544
6545	if (tmp_adev->asic_reset_res) {
6546	/ bad news, how to tell it to userspace ?*
6547	* for ras error, we should report GPU bad status instead of
6548	* reset failure
6549	*/
6550	if (reset_context->src != AMDGPU_RESET_SRC_RAS \|\|
6551	!amdgpu_ras_eeprom_check_err_threshold(adev: tmp_adev))
6552	dev_info(
6553	tmp_adev->dev,
6554	"GPU reset(%d) failed with error %d \n",
6555	atomic_read(
6556	&tmp_adev->gpu_reset_counter),
6557	tmp_adev->asic_reset_res);
6558	amdgpu_vf_error_put(adev: tmp_adev,
6559	sub_error_code: AMDGIM_ERROR_VF_GPU_RESET_FAIL, error_flags: `0`,
6560	error_data: tmp_adev->asic_reset_res);
6561	if (!r)
6562	r = tmp_adev->asic_reset_res;
6563	tmp_adev->asic_reset_res = `0`;
6564	} else {
6565	dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
6566	atomic_read(&tmp_adev->gpu_reset_counter));
6567	if (amdgpu_acpi_smart_shift_update(adev: tmp_adev,
6568	ss_state: AMDGPU_SS_DEV_D0))
6569	dev_warn(tmp_adev->dev,
6570	"smart shift update failed\n");
6571	}
6572	}
6573
6574	return r;
6575	}
6576
6577	static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
6578	struct list_head *device_list,
6579	bool need_emergency_restart)
6580	{
6581	struct amdgpu_device *tmp_adev = NULL;
6582
6583	list_for_each_entry(tmp_adev, device_list, reset_list) {
6584	/ unlock kfd: SRIOV would do it separately /
6585	if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
6586	amdgpu_amdkfd_post_reset(adev: tmp_adev);
6587
6588	/ kfd_post_reset will do nothing if kfd device is not initialized,*
6589	* need to bring up kfd here if it's not be initialized before
6590	*/
6591	if (!adev->kfd.init_complete)
6592	amdgpu_amdkfd_device_init(adev);
6593
6594	if (tmp_adev->pcie_reset_ctx.audio_suspended)
6595	amdgpu_device_resume_display_audio(adev: tmp_adev);
6596
6597	amdgpu_device_unset_mp1_state(adev: tmp_adev);
6598
6599	amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: true);
6600
6601	}
6602	}
6603
6604
6605	/**
6606	* amdgpu_device_gpu_recover - reset the asic and recover scheduler
6607	*
6608	* @adev: amdgpu_device pointer
6609	* @job: which job trigger hang
6610	* @reset_context: amdgpu reset context pointer
6611	*
6612	* Attempt to reset the GPU if it has hung (all asics).
6613	* Attempt to do soft-reset or full-reset and reinitialize Asic
6614	* Returns 0 for success or an error on failure.
6615	*/
6616
6617	int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
6618	struct amdgpu_job *job,
6619	struct amdgpu_reset_context *reset_context)
6620	{
6621	struct list_head device_list;
6622	bool job_signaled = false;
6623	struct amdgpu_hive_info *hive = NULL;
6624	int r = `0`;
6625	bool need_emergency_restart = false;
6626	/ save the pasid here as the job may be freed before the end of the reset /
6627	int pasid = job ? job->pasid : -EINVAL;
6628
6629	/*
6630	* If it reaches here because of hang/timeout and a RAS error is
6631	* detected at the same time, let RAS recovery take care of it.
6632	*/
6633	if (amdgpu_ras_is_err_state(adev, block: AMDGPU_RAS_BLOCK__ANY) &&
6634	!amdgpu_sriov_vf(adev) &&
6635	reset_context->src != AMDGPU_RESET_SRC_RAS) {
6636	dev_dbg(adev->dev,
6637	"Gpu recovery from source: %d yielding to RAS error recovery handling",
6638	reset_context->src);
6639	return `0`;
6640	}
6641
6642	/*
6643	* Special case: RAS triggered and full reset isn't supported
6644	*/
6645	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
6646
6647	/*
6648	* Flush RAM to disk so that after reboot
6649	* the user can read log and see why the system rebooted.
6650	*/
6651	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
6652	amdgpu_ras_get_context(adev)->reboot) {
6653	dev_warn(adev->dev, "Emergency reboot.");
6654
6655	ksys_sync_helper();
6656	emergency_restart();
6657	}
6658
6659	dev_info(adev->dev, "GPU %s begin!. Source: %d\n",
6660	need_emergency_restart ? "jobs stop" : "reset",
6661	reset_context->src);
6662
6663	if (!amdgpu_sriov_vf(adev))
6664	hive = amdgpu_get_xgmi_hive(adev);
6665	if (hive)
6666	mutex_lock(&hive->hive_lock);
6667
6668	reset_context->job = job;
6669	reset_context->hive = hive;
6670	INIT_LIST_HEAD(list: &device_list);
6671
6672	amdgpu_device_recovery_prepare(adev, device_list: &device_list, hive);
6673
6674	if (!amdgpu_sriov_vf(adev)) {
6675	r = amdgpu_device_health_check(device_list_handle: &device_list);
6676	if (r)
6677	goto end_reset;
6678	}
6679
6680	/ Cannot be called after locking reset domain /
6681	amdgpu_ras_pre_reset(adev, device_list: &device_list);
6682
6683	/ We need to lock reset domain only once both for XGMI and single device /
6684	amdgpu_device_recovery_get_reset_lock(adev, device_list: &device_list);
6685
6686	amdgpu_device_halt_activities(adev, job, reset_context, device_list: &device_list,
6687	hive, need_emergency_restart);
6688	if (need_emergency_restart)
6689	goto skip_sched_resume;
6690	/*
6691	* Must check guilty signal here since after this point all old
6692	* HW fences are force signaled.
6693	*
6694	* job->base holds a reference to parent fence
6695	*/
6696	if (job && dma_fence_is_signaled(fence: &job->hw_fence->base)) {
6697	job_signaled = true;
6698	dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
6699	goto skip_hw_reset;
6700	}
6701
6702	r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context);
6703	if (r)
6704	goto reset_unlock;
6705	skip_hw_reset:
6706	r = amdgpu_device_sched_resume(device_list: &device_list, reset_context, job_signaled);
6707	if (r)
6708	goto reset_unlock;
6709	skip_sched_resume:
6710	amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart);
6711	reset_unlock:
6712	amdgpu_device_recovery_put_reset_lock(adev, device_list: &device_list);
6713	amdgpu_ras_post_reset(adev, device_list: &device_list);
6714	end_reset:
6715	if (hive) {
6716	mutex_unlock(lock: &hive->hive_lock);
6717	amdgpu_put_xgmi_hive(hive);
6718	}
6719
6720	if (r)
6721	dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6722
6723	atomic_set(v: &adev->reset_domain->reset_res, i: r);
6724
6725	if (!r) {
6726	struct amdgpu_task_info *ti = NULL;
6727
6728	/*
6729	* The job may already be freed at this point via the sched tdr workqueue so
6730	* use the cached pasid.
6731	*/
6732	if (pasid >= `0`)
6733	ti = amdgpu_vm_get_task_info_pasid(adev, pasid);
6734
6735	drm_dev_wedged_event(dev: adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
6736	info: ti ? &ti->task : NULL);
6737
6738	amdgpu_vm_put_task_info(task_info: ti);
6739	}
6740
6741	return r;
6742	}
6743
6744	/**
6745	* amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6746	*
6747	* @adev: amdgpu_device pointer
6748	* @speed: pointer to the speed of the link
6749	* @width: pointer to the width of the link
6750	*
6751	* Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6752	* first physical partner to an AMD dGPU.
6753	* This will exclude any virtual switches and links.
6754	*/
6755	static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6756	enum pci_bus_speed *speed,
6757	enum pcie_link_width *width)
6758	{
6759	struct pci_dev *parent = adev->pdev;
6760
6761	if (!speed \|\| !width)
6762	return;
6763
6764	*speed = PCI_SPEED_UNKNOWN;
6765	*width = PCIE_LNK_WIDTH_UNKNOWN;
6766
6767	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6768	while ((parent = pci_upstream_bridge(dev: parent))) {
6769	/ skip upstream/downstream switches internal to dGPU/
6770	if (parent->vendor == PCI_VENDOR_ID_ATI)
6771	continue;
6772	*speed = pcie_get_speed_cap(dev: parent);
6773	*width = pcie_get_width_cap(dev: parent);
6774	break;
6775	}
6776	} else {
6777	/ use the current speeds rather than max if switching is not supported /
6778	pcie_bandwidth_available(dev: adev->pdev, NULL, speed, width);
6779	}
6780	}
6781
6782	/**
6783	* amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
6784	*
6785	* @adev: amdgpu_device pointer
6786	* @speed: pointer to the speed of the link
6787	* @width: pointer to the width of the link
6788	*
6789	* Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6790	* AMD dGPU which may be a virtual upstream bridge.
6791	*/
6792	static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
6793	enum pci_bus_speed *speed,
6794	enum pcie_link_width *width)
6795	{
6796	struct pci_dev *parent = adev->pdev;
6797
6798	if (!speed \|\| !width)
6799	return;
6800
6801	parent = pci_upstream_bridge(dev: parent);
6802	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
6803	/ use the upstream/downstream switches internal to dGPU /
6804	*speed = pcie_get_speed_cap(dev: parent);
6805	*width = pcie_get_width_cap(dev: parent);
6806	while ((parent = pci_upstream_bridge(dev: parent))) {
6807	if (parent->vendor == PCI_VENDOR_ID_ATI) {
6808	/ use the upstream/downstream switches internal to dGPU /
6809	*speed = pcie_get_speed_cap(dev: parent);
6810	*width = pcie_get_width_cap(dev: parent);
6811	}
6812	}
6813	} else {
6814	/ use the device itself /
6815	*speed = pcie_get_speed_cap(dev: adev->pdev);
6816	*width = pcie_get_width_cap(dev: adev->pdev);
6817	}
6818	}
6819
6820	/**
6821	* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6822	*
6823	* @adev: amdgpu_device pointer
6824	*
6825	* Fetches and stores in the driver the PCIE capabilities (gen speed
6826	* and lanes) of the slot the device is in. Handles APUs and
6827	* virtualized environments where PCIE config space may not be available.
6828	*/
6829	static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6830	{
6831	enum pci_bus_speed speed_cap, platform_speed_cap;
6832	enum pcie_link_width platform_link_width, link_width;
6833
6834	if (amdgpu_pcie_gen_cap)
6835	adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6836
6837	if (amdgpu_pcie_lane_cap)
6838	adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6839
6840	/ covers APUs as well /
6841	if (pci_is_root_bus(pbus: adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6842	if (adev->pm.pcie_gen_mask == `0`)
6843	adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6844	if (adev->pm.pcie_mlw_mask == `0`)
6845	adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6846	return;
6847	}
6848
6849	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6850	return;
6851
6852	amdgpu_device_partner_bandwidth(adev, speed: &platform_speed_cap,
6853	width: &platform_link_width);
6854	amdgpu_device_gpu_bandwidth(adev, speed: &speed_cap, width: &link_width);
6855
6856	if (adev->pm.pcie_gen_mask == `0`) {
6857	/ asic caps /
6858	if (speed_cap == PCI_SPEED_UNKNOWN) {
6859	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6860	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6861	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6862	} else {
6863	if (speed_cap == PCIE_SPEED_32_0GT)
6864	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6865	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6866	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6867	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 \|
6868	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6869	else if (speed_cap == PCIE_SPEED_16_0GT)
6870	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6871	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6872	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6873	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6874	else if (speed_cap == PCIE_SPEED_8_0GT)
6875	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6876	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6877	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6878	else if (speed_cap == PCIE_SPEED_5_0GT)
6879	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6880	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6881	else
6882	adev->pm.pcie_gen_mask \|= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6883	}
6884	/ platform caps /
6885	if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6886	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6887	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6888	} else {
6889	if (platform_speed_cap == PCIE_SPEED_32_0GT)
6890	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6891	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6892	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6893	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 \|
6894	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6895	else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6896	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6897	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6898	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6899	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6900	else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6901	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6902	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6903	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6904	else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6905	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6906	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6907	else
6908	adev->pm.pcie_gen_mask \|= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6909
6910	}
6911	}
6912	if (adev->pm.pcie_mlw_mask == `0`) {
6913	/ asic caps /
6914	if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6915	adev->pm.pcie_mlw_mask \|= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6916	} else {
6917	switch (link_width) {
6918	case PCIE_LNK_X32:
6919	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 \|
6920	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6921	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6922	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6923	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6924	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6925	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6926	break;
6927	case PCIE_LNK_X16:
6928	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6929	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6930	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6931	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6932	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6933	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6934	break;
6935	case PCIE_LNK_X12:
6936	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6937	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6938	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6939	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6940	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6941	break;
6942	case PCIE_LNK_X8:
6943	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6944	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6945	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6946	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6947	break;
6948	case PCIE_LNK_X4:
6949	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6950	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6951	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6952	break;
6953	case PCIE_LNK_X2:
6954	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6955	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6956	break;
6957	case PCIE_LNK_X1:
6958	adev->pm.pcie_mlw_mask \|= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6959	break;
6960	default:
6961	break;
6962	}
6963	}
6964	/ platform caps /
6965	if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6966	adev->pm.pcie_mlw_mask \|= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6967	} else {
6968	switch (platform_link_width) {
6969	case PCIE_LNK_X32:
6970	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 \|
6971	CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6972	CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6973	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6974	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6975	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6976	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6977	break;
6978	case PCIE_LNK_X16:
6979	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6980	CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6981	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6982	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6983	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6984	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6985	break;
6986	case PCIE_LNK_X12:
6987	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6988	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6989	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6990	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6991	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6992	break;
6993	case PCIE_LNK_X8:
6994	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6995	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6996	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6997	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6998	break;
6999	case PCIE_LNK_X4:
7000	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
7001	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
7002	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
7003	break;
7004	case PCIE_LNK_X2:
7005	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
7006	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
7007	break;
7008	case PCIE_LNK_X1:
7009	adev->pm.pcie_mlw_mask \|= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
7010	break;
7011	default:
7012	break;
7013	}
7014	}
7015	}
7016	}
7017
7018	/**
7019	* amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
7020	*
7021	* @adev: amdgpu_device pointer
7022	* @peer_adev: amdgpu_device pointer for peer device trying to access @adev
7023	*
7024	* Return true if @peer_adev can access (DMA) @adev through the PCIe
7025	* BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
7026	* @peer_adev.
7027	*/
7028	bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
7029	struct amdgpu_device *peer_adev)
7030	{
7031	#ifdef CONFIG_HSA_AMD_P2P
7032	bool p2p_access =
7033	!adev->gmc.xgmi.connected_to_cpu &&
7034	!(pci_p2pdma_distance(provider: adev->pdev, client: peer_adev->dev, verbose: false) < `0`);
7035	if (!p2p_access)
7036	dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
7037	pci_name(peer_adev->pdev));
7038
7039	bool is_large_bar = adev->gmc.visible_vram_size &&
7040	adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
7041	bool p2p_addressable = amdgpu_device_check_iommu_remap(adev: peer_adev);
7042
7043	if (!p2p_addressable) {
7044	uint64_t address_mask = peer_adev->dev->dma_mask ?
7045	~*peer_adev->dev->dma_mask : ~((`1ULL` << `32`) - `1`);
7046	resource_size_t aper_limit =
7047	adev->gmc.aper_base + adev->gmc.aper_size - `1`;
7048
7049	p2p_addressable = !(adev->gmc.aper_base & address_mask \|\|
7050	aper_limit & address_mask);
7051	}
7052	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
7053	#else
7054	return false;
7055	#endif
7056	}
7057
7058	int amdgpu_device_baco_enter(struct amdgpu_device *adev)
7059	{
7060	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
7061
7062	if (!amdgpu_device_supports_baco(adev))
7063	return -ENOTSUPP;
7064
7065	if (ras && adev->ras_enabled &&
7066	adev->nbio.funcs->enable_doorbell_interrupt)
7067	adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
7068
7069	return amdgpu_dpm_baco_enter(adev);
7070	}
7071
7072	int amdgpu_device_baco_exit(struct amdgpu_device *adev)
7073	{
7074	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
7075	int ret = `0`;
7076
7077	if (!amdgpu_device_supports_baco(adev))
7078	return -ENOTSUPP;
7079
7080	ret = amdgpu_dpm_baco_exit(adev);
7081	if (ret)
7082	return ret;
7083
7084	if (ras && adev->ras_enabled &&
7085	adev->nbio.funcs->enable_doorbell_interrupt)
7086	adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
7087
7088	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
7089	adev->nbio.funcs->clear_doorbell_interrupt)
7090	adev->nbio.funcs->clear_doorbell_interrupt(adev);
7091
7092	return `0`;
7093	}
7094
7095	/**
7096	* amdgpu_pci_error_detected - Called when a PCI error is detected.
7097	* @pdev: PCI device struct
7098	* @state: PCI channel state
7099	*
7100	* Description: Called when a PCI error is detected.
7101	*
7102	* Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
7103	*/
7104	pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
7105	{
7106	struct drm_device *dev = pci_get_drvdata(pdev);
7107	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7108	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
7109	amdgpu_get_xgmi_hive(adev);
7110	struct amdgpu_reset_context reset_context;
7111	struct list_head device_list;
7112
7113	dev_info(adev->dev, "PCI error: detected callback!!\n");
7114
7115	adev->pci_channel_state = state;
7116
7117	switch (state) {
7118	case pci_channel_io_normal:
7119	dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
7120	return PCI_ERS_RESULT_CAN_RECOVER;
7121	case pci_channel_io_frozen:
7122	/ Fatal error, prepare for slot reset /
7123	dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
7124	if (hive) {
7125	/ Hive devices should be able to support FW based*
7126	* link reset on other devices, if not return.
7127	*/
7128	if (!amdgpu_dpm_is_link_reset_supported(adev)) {
7129	dev_warn(adev->dev,
7130	"No support for XGMI hive yet...\n");
7131	return PCI_ERS_RESULT_DISCONNECT;
7132	}
7133	/ Set dpc status only if device is part of hive*
7134	* Non-hive devices should be able to recover after
7135	* link reset.
7136	*/
7137	amdgpu_reset_set_dpc_status(adev, status: true);
7138
7139	mutex_lock(&hive->hive_lock);
7140	}
7141	memset(&reset_context, `0`, sizeof(reset_context));
7142	INIT_LIST_HEAD(list: &device_list);
7143
7144	amdgpu_device_recovery_prepare(adev, device_list: &device_list, hive);
7145	amdgpu_device_recovery_get_reset_lock(adev, device_list: &device_list);
7146	amdgpu_device_halt_activities(adev, NULL, reset_context: &reset_context, device_list: &device_list,
7147	hive, need_emergency_restart: false);
7148	if (hive)
7149	mutex_unlock(lock: &hive->hive_lock);
7150	return PCI_ERS_RESULT_NEED_RESET;
7151	case pci_channel_io_perm_failure:
7152	/ Permanent error, prepare for device removal /
7153	dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
7154	return PCI_ERS_RESULT_DISCONNECT;
7155	}
7156
7157	return PCI_ERS_RESULT_NEED_RESET;
7158	}
7159
7160	/**
7161	* amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
7162	* @pdev: pointer to PCI device
7163	*/
7164	pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
7165	{
7166	struct drm_device *dev = pci_get_drvdata(pdev);
7167	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7168
7169	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
7170
7171	/ TODO - dump whatever for debugging purposes /
7172
7173	/ This called only if amdgpu_pci_error_detected returns*
7174	* PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
7175	* works, no need to reset slot.
7176	*/
7177
7178	return PCI_ERS_RESULT_RECOVERED;
7179	}
7180
7181	/**
7182	* amdgpu_pci_slot_reset - Called when PCI slot has been reset.
7183	* @pdev: PCI device struct
7184	*
7185	* Description: This routine is called by the pci error recovery
7186	* code after the PCI slot has been reset, just before we
7187	* should resume normal operations.
7188	*/
7189	pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
7190	{
7191	struct drm_device *dev = pci_get_drvdata(pdev);
7192	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7193	struct amdgpu_reset_context reset_context;
7194	struct amdgpu_device *tmp_adev;
7195	struct amdgpu_hive_info *hive;
7196	struct list_head device_list;
7197	struct pci_dev *link_dev;
7198	int r = `0`, i, timeout;
7199	u32 memsize;
7200	u16 status;
7201
7202	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
7203
7204	memset(&reset_context, `0`, sizeof(reset_context));
7205
7206	if (adev->pcie_reset_ctx.swus)
7207	link_dev = adev->pcie_reset_ctx.swus;
7208	else
7209	link_dev = adev->pdev;
7210	/ wait for asic to come out of reset, timeout = 10s /
7211	timeout = `10000`;
7212	do {
7213	usleep_range(min: `10000`, max: `10500`);
7214	r = pci_read_config_word(dev: link_dev, PCI_VENDOR_ID, val: &status);
7215	timeout -= `10`;
7216	} while (timeout > `0` && (status != PCI_VENDOR_ID_ATI) &&
7217	(status != PCI_VENDOR_ID_AMD));
7218
7219	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
7220	r = -ETIME;
7221	goto out;
7222	}
7223
7224	amdgpu_device_load_switch_state(adev);
7225	/ Restore PCI confspace /
7226	amdgpu_device_load_pci_state(pdev);
7227
7228	/ confirm ASIC came out of reset /
7229	for (i = `0`; i < adev->usec_timeout; i++) {
7230	memsize = amdgpu_asic_get_config_memsize(adev);
7231
7232	if (memsize != `0xffffffff`)
7233	break;
7234	udelay(usec: `1`);
7235	}
7236	if (memsize == `0xffffffff`) {
7237	r = -ETIME;
7238	goto out;
7239	}
7240
7241	reset_context.method = AMD_RESET_METHOD_NONE;
7242	reset_context.reset_req_dev = adev;
7243	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context.flags);
7244	set_bit(nr: AMDGPU_SKIP_COREDUMP, addr: &reset_context.flags);
7245	INIT_LIST_HEAD(list: &device_list);
7246
7247	hive = amdgpu_get_xgmi_hive(adev);
7248	if (hive) {
7249	mutex_lock(&hive->hive_lock);
7250	reset_context.hive = hive;
7251	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7252	tmp_adev->pcie_reset_ctx.in_link_reset = true;
7253	list_add_tail(new: &tmp_adev->reset_list, head: &device_list);
7254	}
7255	} else {
7256	set_bit(nr: AMDGPU_SKIP_HW_RESET, addr: &reset_context.flags);
7257	list_add_tail(new: &adev->reset_list, head: &device_list);
7258	}
7259
7260	r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context: &reset_context);
7261	out:
7262	if (!r) {
7263	if (amdgpu_device_cache_pci_state(pdev: adev->pdev))
7264	pci_restore_state(dev: adev->pdev);
7265	dev_info(adev->dev, "PCIe error recovery succeeded\n");
7266	} else {
7267	dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
7268	if (hive) {
7269	list_for_each_entry(tmp_adev, &device_list, reset_list)
7270	amdgpu_device_unset_mp1_state(adev: tmp_adev);
7271	}
7272	amdgpu_device_recovery_put_reset_lock(adev, device_list: &device_list);
7273	}
7274
7275	if (hive) {
7276	mutex_unlock(lock: &hive->hive_lock);
7277	amdgpu_put_xgmi_hive(hive);
7278	}
7279
7280	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
7281	}
7282
7283	/**
7284	* amdgpu_pci_resume() - resume normal ops after PCI reset
7285	* @pdev: pointer to PCI device
7286	*
7287	* Called when the error recovery driver tells us that its
7288	* OK to resume normal operation.
7289	*/
7290	void amdgpu_pci_resume(struct pci_dev *pdev)
7291	{
7292	struct drm_device *dev = pci_get_drvdata(pdev);
7293	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7294	struct list_head device_list;
7295	struct amdgpu_hive_info *hive = NULL;
7296	struct amdgpu_device *tmp_adev = NULL;
7297
7298	dev_info(adev->dev, "PCI error: resume callback!!\n");
7299
7300	/ Only continue execution for the case of pci_channel_io_frozen /
7301	if (adev->pci_channel_state != pci_channel_io_frozen)
7302	return;
7303
7304	INIT_LIST_HEAD(list: &device_list);
7305
7306	hive = amdgpu_get_xgmi_hive(adev);
7307	if (hive) {
7308	mutex_lock(&hive->hive_lock);
7309	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
7310	tmp_adev->pcie_reset_ctx.in_link_reset = false;
7311	list_add_tail(new: &tmp_adev->reset_list, head: &device_list);
7312	}
7313	} else
7314	list_add_tail(new: &adev->reset_list, head: &device_list);
7315
7316	amdgpu_device_sched_resume(device_list: &device_list, NULL, NULL);
7317	amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart: false);
7318	amdgpu_device_recovery_put_reset_lock(adev, device_list: &device_list);
7319
7320	if (hive) {
7321	mutex_unlock(lock: &hive->hive_lock);
7322	amdgpu_put_xgmi_hive(hive);
7323	}
7324	}
7325
7326	static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
7327	{
7328	struct pci_dev swus, swds;
7329	int r;
7330
7331	swds = pci_upstream_bridge(dev: adev->pdev);
7332	if (!swds \|\| swds->vendor != PCI_VENDOR_ID_ATI \|\|
7333	pci_pcie_type(dev: swds) != PCI_EXP_TYPE_DOWNSTREAM)
7334	return;
7335	swus = pci_upstream_bridge(dev: swds);
7336	if (!swus \|\|
7337	(swus->vendor != PCI_VENDOR_ID_ATI &&
7338	swus->vendor != PCI_VENDOR_ID_AMD) \|\|
7339	pci_pcie_type(dev: swus) != PCI_EXP_TYPE_UPSTREAM)
7340	return;
7341
7342	/ If already saved, return /
7343	if (adev->pcie_reset_ctx.swus)
7344	return;
7345	/ Upstream bridge is ATI, assume it's SWUS/DS architecture /
7346	r = pci_save_state(dev: swds);
7347	if (r)
7348	return;
7349	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(dev: swds);
7350
7351	r = pci_save_state(dev: swus);
7352	if (r)
7353	return;
7354	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(dev: swus);
7355
7356	adev->pcie_reset_ctx.swus = swus;
7357	}
7358
7359	static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
7360	{
7361	struct pci_dev *pdev;
7362	int r;
7363
7364	if (!adev->pcie_reset_ctx.swds_pcistate \|\|
7365	!adev->pcie_reset_ctx.swus_pcistate)
7366	return;
7367
7368	pdev = adev->pcie_reset_ctx.swus;
7369	r = pci_load_saved_state(dev: pdev, state: adev->pcie_reset_ctx.swus_pcistate);
7370	if (!r) {
7371	pci_restore_state(dev: pdev);
7372	} else {
7373	dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
7374	return;
7375	}
7376
7377	pdev = pci_upstream_bridge(dev: adev->pdev);
7378	r = pci_load_saved_state(dev: pdev, state: adev->pcie_reset_ctx.swds_pcistate);
7379	if (!r)
7380	pci_restore_state(dev: pdev);
7381	else
7382	dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
7383	}
7384
7385	bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
7386	{
7387	struct drm_device *dev = pci_get_drvdata(pdev);
7388	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7389	int r;
7390
7391	if (amdgpu_sriov_vf(adev))
7392	return false;
7393
7394	r = pci_save_state(dev: pdev);
7395	if (!r) {
7396	kfree(objp: adev->pci_state);
7397
7398	adev->pci_state = pci_store_saved_state(dev: pdev);
7399
7400	if (!adev->pci_state) {
7401	dev_err(adev->dev, "Failed to store PCI saved state");
7402	return false;
7403	}
7404	} else {
7405	dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
7406	return false;
7407	}
7408
7409	amdgpu_device_cache_switch_state(adev);
7410
7411	return true;
7412	}
7413
7414	bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
7415	{
7416	struct drm_device *dev = pci_get_drvdata(pdev);
7417	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
7418	int r;
7419
7420	if (!adev->pci_state)
7421	return false;
7422
7423	r = pci_load_saved_state(dev: pdev, state: adev->pci_state);
7424
7425	if (!r) {
7426	pci_restore_state(dev: pdev);
7427	} else {
7428	dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
7429	return false;
7430	}
7431
7432	return true;
7433	}
7434
7435	void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
7436	struct amdgpu_ring *ring)
7437	{
7438	#ifdef CONFIG_X86_64
7439	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7440	return;
7441	#endif
7442	if (adev->gmc.xgmi.connected_to_cpu)
7443	return;
7444
7445	if (ring && ring->funcs->emit_hdp_flush) {
7446	amdgpu_ring_emit_hdp_flush(ring);
7447	return;
7448	}
7449
7450	if (!ring && amdgpu_sriov_runtime(adev)) {
7451	if (!amdgpu_kiq_hdp_flush(adev))
7452	return;
7453	}
7454
7455	amdgpu_hdp_flush(adev, ring);
7456	}
7457
7458	void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
7459	struct amdgpu_ring *ring)
7460	{
7461	#ifdef CONFIG_X86_64
7462	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7463	return;
7464	#endif
7465	if (adev->gmc.xgmi.connected_to_cpu)
7466	return;
7467
7468	amdgpu_hdp_invalidate(adev, ring);
7469	}
7470
7471	int amdgpu_in_reset(struct amdgpu_device *adev)
7472	{
7473	return atomic_read(v: &adev->reset_domain->in_gpu_reset);
7474	}
7475
7476	/**
7477	* amdgpu_device_halt() - bring hardware to some kind of halt state
7478	*
7479	* @adev: amdgpu_device pointer
7480	*
7481	* Bring hardware to some kind of halt state so that no one can touch it
7482	* any more. It will help to maintain error context when error occurred.
7483	* Compare to a simple hang, the system will keep stable at least for SSH
7484	* access. Then it should be trivial to inspect the hardware state and
7485	* see what's going on. Implemented as following:
7486	*
7487	* 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
7488	* clears all CPU mappings to device, disallows remappings through page faults
7489	* 2. amdgpu_irq_disable_all() disables all interrupts
7490	* 3. amdgpu_fence_driver_hw_fini() signals all HW fences
7491	* 4. set adev->no_hw_access to avoid potential crashes after setp 5
7492	* 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
7493	* 6. pci_disable_device() and pci_wait_for_pending_transaction()
7494	* flush any in flight DMA operations
7495	*/
7496	void amdgpu_device_halt(struct amdgpu_device *adev)
7497	{
7498	struct pci_dev *pdev = adev->pdev;
7499	struct drm_device *ddev = adev_to_drm(adev);
7500
7501	amdgpu_xcp_dev_unplug(adev);
7502	drm_dev_unplug(dev: ddev);
7503
7504	amdgpu_irq_disable_all(adev);
7505
7506	amdgpu_fence_driver_hw_fini(adev);
7507
7508	adev->no_hw_access = true;
7509
7510	amdgpu_device_unmap_mmio(adev);
7511
7512	pci_disable_device(dev: pdev);
7513	pci_wait_for_pending_transaction(dev: pdev);
7514	}
7515
7516	u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
7517	u32 reg)
7518	{
7519	unsigned long flags, address, data;
7520	u32 r;
7521
7522	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7523	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7524
7525	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7526	WREG32(address, reg * `4`);
7527	(void)RREG32(address);
7528	r = RREG32(data);
7529	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
7530	return r;
7531	}
7532
7533	void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
7534	u32 reg, u32 v)
7535	{
7536	unsigned long flags, address, data;
7537
7538	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7539	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7540
7541	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7542	WREG32(address, reg * `4`);
7543	(void)RREG32(address);
7544	WREG32(data, v);
7545	(void)RREG32(data);
7546	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
7547	}
7548
7549	/**
7550	* amdgpu_device_get_gang - return a reference to the current gang
7551	* @adev: amdgpu_device pointer
7552	*
7553	* Returns: A new reference to the current gang leader.
7554	*/
7555	struct dma_fence amdgpu_device_get_gang(struct* amdgpu_device *adev)
7556	{
7557	struct dma_fence *fence;
7558
7559	rcu_read_lock();
7560	fence = dma_fence_get_rcu_safe(fencep: &adev->gang_submit);
7561	rcu_read_unlock();
7562	return fence;
7563	}
7564
7565	/**
7566	* amdgpu_device_switch_gang - switch to a new gang
7567	* @adev: amdgpu_device pointer
7568	* @gang: the gang to switch to
7569	*
7570	* Try to switch to a new gang.
7571	* Returns: NULL if we switched to the new gang or a reference to the current
7572	* gang leader.
7573	*/
7574	struct dma_fence amdgpu_device_switch_gang(struct* amdgpu_device *adev,
7575	struct dma_fence *gang)
7576	{
7577	struct dma_fence *old = NULL;
7578
7579	dma_fence_get(fence: gang);
7580	do {
7581	dma_fence_put(fence: old);
7582	old = amdgpu_device_get_gang(adev);
7583	if (old == gang)
7584	break;
7585
7586	if (!dma_fence_is_signaled(fence: old)) {
7587	dma_fence_put(fence: gang);
7588	return old;
7589	}
7590
7591	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
7592	old, gang) != old);
7593
7594	/*
7595	* Drop it once for the exchanged reference in adev and once for the
7596	* thread local reference acquired in amdgpu_device_get_gang().
7597	*/
7598	dma_fence_put(fence: old);
7599	dma_fence_put(fence: old);
7600	return NULL;
7601	}
7602
7603	/**
7604	* amdgpu_device_enforce_isolation - enforce HW isolation
7605	* @adev: the amdgpu device pointer
7606	* @ring: the HW ring the job is supposed to run on
7607	* @job: the job which is about to be pushed to the HW ring
7608	*
7609	* Makes sure that only one client at a time can use the GFX block.
7610	* Returns: The dependency to wait on before the job can be pushed to the HW.
7611	* The function is called multiple times until NULL is returned.
7612	*/
7613	struct dma_fence amdgpu_device_enforce_isolation(struct* amdgpu_device *adev,
7614	struct amdgpu_ring *ring,
7615	struct amdgpu_job *job)
7616	{
7617	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
7618	struct drm_sched_fence *f = job->base.s_fence;
7619	struct dma_fence *dep;
7620	void *owner;
7621	int r;
7622
7623	/*
7624	* For now enforce isolation only for the GFX block since we only need
7625	* the cleaner shader on those rings.
7626	*/
7627	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
7628	ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
7629	return NULL;
7630
7631	/*
7632	* All submissions where enforce isolation is false are handled as if
7633	* they come from a single client. Use ~0l as the owner to distinct it
7634	* from kernel submissions where the owner is NULL.
7635	*/
7636	owner = job->enforce_isolation ? f->owner : (void *)~`0l`;
7637
7638	mutex_lock(&adev->enforce_isolation_mutex);
7639
7640	/*
7641	* The "spearhead" submission is the first one which changes the
7642	* ownership to its client. We always need to wait for it to be
7643	* pushed to the HW before proceeding with anything.
7644	*/
7645	if (&f->scheduled != isolation->spearhead &&
7646	!dma_fence_is_signaled(fence: isolation->spearhead)) {
7647	dep = isolation->spearhead;
7648	goto out_grab_ref;
7649	}
7650
7651	if (isolation->owner != owner) {
7652
7653	/*
7654	* Wait for any gang to be assembled before switching to a
7655	* different owner or otherwise we could deadlock the
7656	* submissions.
7657	*/
7658	if (!job->gang_submit) {
7659	dep = amdgpu_device_get_gang(adev);
7660	if (!dma_fence_is_signaled(fence: dep))
7661	goto out_return_dep;
7662	dma_fence_put(fence: dep);
7663	}
7664
7665	dma_fence_put(fence: isolation->spearhead);
7666	isolation->spearhead = dma_fence_get(fence: &f->scheduled);
7667	amdgpu_sync_move(src: &isolation->active, dst: &isolation->prev);
7668	trace_amdgpu_isolation(prev: isolation->owner, next: owner);
7669	isolation->owner = owner;
7670	}
7671
7672	/*
7673	* Specifying the ring here helps to pipeline submissions even when
7674	* isolation is enabled. If that is not desired for testing NULL can be
7675	* used instead of the ring to enforce a CPU round trip while switching
7676	* between clients.
7677	*/
7678	dep = amdgpu_sync_peek_fence(sync: &isolation->prev, ring);
7679	r = amdgpu_sync_fence(sync: &isolation->active, f: &f->finished, GFP_NOWAIT);
7680	if (r)
7681	dev_warn(adev->dev, "OOM tracking isolation\n");
7682
7683	out_grab_ref:
7684	dma_fence_get(fence: dep);
7685	out_return_dep:
7686	mutex_unlock(lock: &adev->enforce_isolation_mutex);
7687	return dep;
7688	}
7689
7690	bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
7691	{
7692	switch (adev->asic_type) {
7693	#ifdef CONFIG_DRM_AMDGPU_SI
7694	case CHIP_HAINAN:
7695	#endif
7696	case CHIP_TOPAZ:
7697	/ chips with no display hardware /
7698	return false;
7699	#ifdef CONFIG_DRM_AMDGPU_SI
7700	case CHIP_TAHITI:
7701	case CHIP_PITCAIRN:
7702	case CHIP_VERDE:
7703	case CHIP_OLAND:
7704	#endif
7705	#ifdef CONFIG_DRM_AMDGPU_CIK
7706	case CHIP_BONAIRE:
7707	case CHIP_HAWAII:
7708	case CHIP_KAVERI:
7709	case CHIP_KABINI:
7710	case CHIP_MULLINS:
7711	#endif
7712	case CHIP_TONGA:
7713	case CHIP_FIJI:
7714	case CHIP_POLARIS10:
7715	case CHIP_POLARIS11:
7716	case CHIP_POLARIS12:
7717	case CHIP_VEGAM:
7718	case CHIP_CARRIZO:
7719	case CHIP_STONEY:
7720	/ chips with display hardware /
7721	return true;
7722	default:
7723	/ IP discovery /
7724	if (!amdgpu_ip_version(adev, ip: DCE_HWIP, inst: `0`) \|\|
7725	(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
7726	return false;
7727	return true;
7728	}
7729	}
7730
7731	uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
7732	uint32_t inst, uint32_t reg_addr, char reg_name[],
7733	uint32_t expected_value, uint32_t mask)
7734	{
7735	uint32_t ret = `0`;
7736	uint32_t old_ = `0`;
7737	uint32_t tmp_ = RREG32(reg_addr);
7738	uint32_t loop = adev->usec_timeout;
7739
7740	while ((tmp_ & (mask)) != (expected_value)) {
7741	if (old_ != tmp_) {
7742	loop = adev->usec_timeout;
7743	old_ = tmp_;
7744	} else
7745	udelay(usec: `1`);
7746	tmp_ = RREG32(reg_addr);
7747	loop--;
7748	if (!loop) {
7749	dev_warn(
7750	adev->dev,
7751	"Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
7752	inst, reg_name, (uint32_t)expected_value,
7753	(uint32_t)(tmp_ & (mask)));
7754	ret = -ETIMEDOUT;
7755	break;
7756	}
7757	}
7758	return ret;
7759	}
7760
7761	ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
7762	{
7763	ssize_t size = `0`;
7764
7765	if (!ring \|\| !ring->adev)
7766	return size;
7767
7768	if (amdgpu_device_should_recover_gpu(adev: ring->adev))
7769	size \|= AMDGPU_RESET_TYPE_FULL;
7770
7771	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
7772	!amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
7773	size \|= AMDGPU_RESET_TYPE_SOFT_RESET;
7774
7775	return size;
7776	}
7777
7778	ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
7779	{
7780	ssize_t size = `0`;
7781
7782	if (supported_reset == `0`) {
7783	size += sysfs_emit_at(buf, at: size, fmt: "unsupported");
7784	size += sysfs_emit_at(buf, at: size, fmt: "\n");
7785	return size;
7786
7787	}
7788
7789	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
7790	size += sysfs_emit_at(buf, at: size, fmt: "soft ");
7791
7792	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
7793	size += sysfs_emit_at(buf, at: size, fmt: "queue ");
7794
7795	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
7796	size += sysfs_emit_at(buf, at: size, fmt: "pipe ");
7797
7798	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
7799	size += sysfs_emit_at(buf, at: size, fmt: "full ");
7800
7801	size += sysfs_emit_at(buf, at: size, fmt: "\n");
7802	return size;
7803	}
7804
7805	void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
7806	enum amdgpu_uid_type type, uint8_t inst,
7807	uint64_t uid)
7808	{
7809	if (!uid_info)
7810	return;
7811
7812	if (type >= AMDGPU_UID_TYPE_MAX) {
7813	dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7814	type);
7815	return;
7816	}
7817
7818	if (inst >= AMDGPU_UID_INST_MAX) {
7819	dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7820	inst);
7821	return;
7822	}
7823
7824	if (uid_info->uid[type][inst] != `0`) {
7825	dev_warn_once(
7826	uid_info->adev->dev,
7827	"Overwriting existing UID %llu for type %d instance %d\n",
7828	uid_info->uid[type][inst], type, inst);
7829	}
7830
7831	uid_info->uid[type][inst] = uid;
7832	}
7833
7834	u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
7835	enum amdgpu_uid_type type, uint8_t inst)
7836	{
7837	if (!uid_info)
7838	return `0`;
7839
7840	if (type >= AMDGPU_UID_TYPE_MAX) {
7841	dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
7842	type);
7843	return `0`;
7844	}
7845
7846	if (inst >= AMDGPU_UID_INST_MAX) {
7847	dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
7848	inst);
7849	return `0`;
7850	}
7851
7852	return uid_info->uid[type][inst];
7853	}
7854

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c