aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2016-02-18 20:13:01 -0500
committerDave Airlie <airlied@redhat.com>2016-02-18 20:13:01 -0500
commit5263925c092d137a0830ca4afe692366127dca4e (patch)
tree49ce726b058d36f5b5d21156716ab0153f443243
parent08244c00859f25036417ea7b790cfa73e43443fc (diff)
parent390be2824fa4211c2e973c69b72e04000559bba3 (diff)
Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
First radeon and amdgpu pull request for 4.6. Highlights: - ACP support for APUs with i2s audio - CS ioctl optimizations - GPU scheduler optimizations - GPUVM optimizations - Initial GPU reset support (not enabled yet) - New powerplay sysfs interface for manually selecting clocks - Powerplay fixes - Virtualization fixes - Removal of hw semaphore support - Lots of other misc fixes and cleanups * 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits) drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy drm/amdgpu: Fix race condition in amdgpu_mn_unregister drm/amdgpu: cleanup gem init/finit drm/amdgpu: rework GEM info printing drm/amdgpu: print the GPU offset as well in gem_info drm/amdgpu: optionally print the pin count in gem_info as well drm/amdgpu: print the BO size only once in amdgpu_gem_info drm/amdgpu: print pid as integer drm/amdgpu: remove page flip work queue v3 drm/amdgpu: stop blocking for page filp fences drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code drm/amdgpu: remove fence reset detection leftovers drm/amdgpu: Fix race condition in MMU notifier release drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled drm/amdgpu/vi: move uvd tiling config setup into uvd code drm/amdgpu/vi: move sdma tiling config setup into sdma code drm/amdgpu/cik: move uvd tiling config setup into uvd code drm/amdgpu/cik: move sdma tiling config setup into sdma code drm/amdgpu/gfx7: rework gpu_init() drm/amdgpu/gfx: clean up harvest configuration (v2) ...
-rw-r--r--drivers/gpu/drm/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/acp/Kconfig11
-rw-r--r--drivers/gpu/drm/amd/acp/Makefile8
-rw-r--r--drivers/gpu/drm/amd/acp/acp_hw.c50
-rw-r--r--drivers/gpu/drm/amd/acp/include/acp_gfx_if.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h238
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c502
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c385
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c173
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c174
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c237
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c607
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c308
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/fiji_smc.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c1941
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c228
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_smc.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_smc.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c397
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h1117
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h12
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h102
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c206
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c206
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c123
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c93
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c123
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h78
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h44
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h12
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c10
-rw-r--r--drivers/gpu/drm/radeon/cik.c11
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c9
-rw-r--r--drivers/gpu/drm/radeon/r100.c10
-rw-r--r--drivers/gpu/drm/radeon/r600.c10
-rw-r--r--drivers/gpu/drm/radeon/r600_dma.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c40
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c11
-rw-r--r--drivers/gpu/drm/radeon/uvd_v1_0.c10
86 files changed, 5486 insertions, 4365 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 08706f064e6e..f2a74d0b68ae 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -172,6 +172,8 @@ config DRM_AMDGPU
172source "drivers/gpu/drm/amd/amdgpu/Kconfig" 172source "drivers/gpu/drm/amd/amdgpu/Kconfig"
173source "drivers/gpu/drm/amd/powerplay/Kconfig" 173source "drivers/gpu/drm/amd/powerplay/Kconfig"
174 174
175source "drivers/gpu/drm/amd/acp/Kconfig"
176
175source "drivers/gpu/drm/nouveau/Kconfig" 177source "drivers/gpu/drm/nouveau/Kconfig"
176 178
177config DRM_I810 179config DRM_I810
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
new file mode 100644
index 000000000000..2b07813bceed
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -0,0 +1,11 @@
1menu "ACP Configuration"
2
3config DRM_AMD_ACP
4 bool "Enable ACP IP support"
5 default y
6 select MFD_CORE
7 select PM_GENERIC_DOMAINS if PM
8 help
9 Choose this option to enable ACP IP support for AMD SOCs.
10
11endmenu
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
new file mode 100644
index 000000000000..8363cb57915b
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -0,0 +1,8 @@
1#
2# Makefile for the ACP, which is a sub-component
3# of AMDSOC/AMDGPU drm driver.
4# It provides the HW control for ACP related functionalities.
5
6subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
7
8AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/amd/acp/acp_hw.c b/drivers/gpu/drm/amd/acp/acp_hw.c
new file mode 100644
index 000000000000..7af83f142b4b
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/mm.h>
25#include <linux/slab.h>
26#include <linux/device.h>
27#include <linux/delay.h>
28#include <linux/errno.h>
29
30#include "acp_gfx_if.h"
31
32#define ACP_MODE_I2S 0
33#define ACP_MODE_AZ 1
34
35#define mmACP_AZALIA_I2S_SELECT 0x51d4
36
37int amd_acp_hw_init(void *cgs_device,
38 unsigned acp_version_major, unsigned acp_version_minor)
39{
40 unsigned int acp_mode = ACP_MODE_I2S;
41
42 if ((acp_version_major == 2) && (acp_version_minor == 2))
43 acp_mode = cgs_read_register(cgs_device,
44 mmACP_AZALIA_I2S_SELECT);
45
46 if (acp_mode != ACP_MODE_I2S)
47 return -ENODEV;
48
49 return 0;
50}
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
new file mode 100644
index 000000000000..bccf47b63899
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#ifndef _ACP_GFX_IF_H
25#define _ACP_GFX_IF_H
26
27#include <linux/types.h>
28#include "cgs_linux.h"
29#include "cgs_common.h"
30
31int amd_acp_hw_init(void *cgs_device,
32 unsigned acp_version_major, unsigned acp_version_minor);
33
34#endif /* _ACP_GFX_IF_H */
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 20c9539abc36..c7fcdcedaadb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
8 -I$(FULL_AMD_PATH)/include \ 8 -I$(FULL_AMD_PATH)/include \
9 -I$(FULL_AMD_PATH)/amdgpu \ 9 -I$(FULL_AMD_PATH)/amdgpu \
10 -I$(FULL_AMD_PATH)/scheduler \ 10 -I$(FULL_AMD_PATH)/scheduler \
11 -I$(FULL_AMD_PATH)/powerplay/inc 11 -I$(FULL_AMD_PATH)/powerplay/inc \
12 -I$(FULL_AMD_PATH)/acp/include
12 13
13amdgpu-y := amdgpu_drv.o 14amdgpu-y := amdgpu_drv.o
14 15
@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
20 amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ 21 amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
21 amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ 22 amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
22 amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ 23 amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
23 atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \ 24 atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
24 amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ 25 amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
25 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o 26 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
26 27
@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
92amdgpu-y += \ 93amdgpu-y += \
93 ../scheduler/gpu_scheduler.o \ 94 ../scheduler/gpu_scheduler.o \
94 ../scheduler/sched_fence.o \ 95 ../scheduler/sched_fence.o \
95 amdgpu_sched.o 96 amdgpu_job.o
97
98# ACP componet
99ifneq ($(CONFIG_DRM_AMD_ACP),)
100amdgpu-y += amdgpu_acp.o
101
102AMDACPPATH := ../acp
103include $(FULL_AMD_PATH)/acp/Makefile
104
105amdgpu-y += $(AMD_ACP_FILES)
106endif
96 107
97amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o 108amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
98amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o 109amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 82edf95b7740..f5bac97a438b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -53,6 +53,7 @@
53#include "amdgpu_ucode.h" 53#include "amdgpu_ucode.h"
54#include "amdgpu_gds.h" 54#include "amdgpu_gds.h"
55#include "amd_powerplay.h" 55#include "amd_powerplay.h"
56#include "amdgpu_acp.h"
56 57
57#include "gpu_scheduler.h" 58#include "gpu_scheduler.h"
58 59
@@ -74,7 +75,6 @@ extern int amdgpu_dpm;
74extern int amdgpu_smc_load_fw; 75extern int amdgpu_smc_load_fw;
75extern int amdgpu_aspm; 76extern int amdgpu_aspm;
76extern int amdgpu_runtime_pm; 77extern int amdgpu_runtime_pm;
77extern int amdgpu_hard_reset;
78extern unsigned amdgpu_ip_block_mask; 78extern unsigned amdgpu_ip_block_mask;
79extern int amdgpu_bapm; 79extern int amdgpu_bapm;
80extern int amdgpu_deep_color; 80extern int amdgpu_deep_color;
@@ -82,10 +82,8 @@ extern int amdgpu_vm_size;
82extern int amdgpu_vm_block_size; 82extern int amdgpu_vm_block_size;
83extern int amdgpu_vm_fault_stop; 83extern int amdgpu_vm_fault_stop;
84extern int amdgpu_vm_debug; 84extern int amdgpu_vm_debug;
85extern int amdgpu_enable_scheduler;
86extern int amdgpu_sched_jobs; 85extern int amdgpu_sched_jobs;
87extern int amdgpu_sched_hw_submission; 86extern int amdgpu_sched_hw_submission;
88extern int amdgpu_enable_semaphores;
89extern int amdgpu_powerplay; 87extern int amdgpu_powerplay;
90 88
91#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 89#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
@@ -106,9 +104,6 @@ extern int amdgpu_powerplay;
106/* max number of IP instances */ 104/* max number of IP instances */
107#define AMDGPU_MAX_SDMA_INSTANCES 2 105#define AMDGPU_MAX_SDMA_INSTANCES 2
108 106
109/* number of hw syncs before falling back on blocking */
110#define AMDGPU_NUM_SYNCS 4
111
112/* hardcode that limit for now */ 107/* hardcode that limit for now */
113#define AMDGPU_VA_RESERVED_SIZE (8 << 20) 108#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
114 109
@@ -189,7 +184,6 @@ struct amdgpu_fence;
189struct amdgpu_ib; 184struct amdgpu_ib;
190struct amdgpu_vm; 185struct amdgpu_vm;
191struct amdgpu_ring; 186struct amdgpu_ring;
192struct amdgpu_semaphore;
193struct amdgpu_cs_parser; 187struct amdgpu_cs_parser;
194struct amdgpu_job; 188struct amdgpu_job;
195struct amdgpu_irq_src; 189struct amdgpu_irq_src;
@@ -287,7 +281,7 @@ struct amdgpu_vm_pte_funcs {
287 unsigned count); 281 unsigned count);
288 /* write pte one entry at a time with addr mapping */ 282 /* write pte one entry at a time with addr mapping */
289 void (*write_pte)(struct amdgpu_ib *ib, 283 void (*write_pte)(struct amdgpu_ib *ib,
290 uint64_t pe, 284 const dma_addr_t *pages_addr, uint64_t pe,
291 uint64_t addr, unsigned count, 285 uint64_t addr, unsigned count,
292 uint32_t incr, uint32_t flags); 286 uint32_t incr, uint32_t flags);
293 /* for linear pte/pde updates without addr mapping */ 287 /* for linear pte/pde updates without addr mapping */
@@ -295,8 +289,6 @@ struct amdgpu_vm_pte_funcs {
295 uint64_t pe, 289 uint64_t pe,
296 uint64_t addr, unsigned count, 290 uint64_t addr, unsigned count,
297 uint32_t incr, uint32_t flags); 291 uint32_t incr, uint32_t flags);
298 /* pad the indirect buffer to the necessary number of dw */
299 void (*pad_ib)(struct amdgpu_ib *ib);
300}; 292};
301 293
302/* provided by the gmc block */ 294/* provided by the gmc block */
@@ -334,9 +326,6 @@ struct amdgpu_ring_funcs {
334 struct amdgpu_ib *ib); 326 struct amdgpu_ib *ib);
335 void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, 327 void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
336 uint64_t seq, unsigned flags); 328 uint64_t seq, unsigned flags);
337 bool (*emit_semaphore)(struct amdgpu_ring *ring,
338 struct amdgpu_semaphore *semaphore,
339 bool emit_wait);
340 void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, 329 void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
341 uint64_t pd_addr); 330 uint64_t pd_addr);
342 void (*emit_hdp_flush)(struct amdgpu_ring *ring); 331 void (*emit_hdp_flush)(struct amdgpu_ring *ring);
@@ -349,6 +338,8 @@ struct amdgpu_ring_funcs {
349 int (*test_ib)(struct amdgpu_ring *ring); 338 int (*test_ib)(struct amdgpu_ring *ring);
350 /* insert NOP packets */ 339 /* insert NOP packets */
351 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); 340 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
341 /* pad the indirect buffer to the necessary number of dw */
342 void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
352}; 343};
353 344
354/* 345/*
@@ -394,7 +385,7 @@ struct amdgpu_fence_driver {
394 uint64_t gpu_addr; 385 uint64_t gpu_addr;
395 volatile uint32_t *cpu_addr; 386 volatile uint32_t *cpu_addr;
396 /* sync_seq is protected by ring emission lock */ 387 /* sync_seq is protected by ring emission lock */
397 uint64_t sync_seq[AMDGPU_MAX_RINGS]; 388 uint64_t sync_seq;
398 atomic64_t last_seq; 389 atomic64_t last_seq;
399 bool initialized; 390 bool initialized;
400 struct amdgpu_irq_src *irq_src; 391 struct amdgpu_irq_src *irq_src;
@@ -447,11 +438,6 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
447int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 438int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
448unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 439unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
449 440
450bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
451 struct amdgpu_ring *ring);
452void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
453 struct amdgpu_ring *ring);
454
455/* 441/*
456 * TTM. 442 * TTM.
457 */ 443 */
@@ -470,6 +456,8 @@ struct amdgpu_mman {
470 /* buffer handling */ 456 /* buffer handling */
471 const struct amdgpu_buffer_funcs *buffer_funcs; 457 const struct amdgpu_buffer_funcs *buffer_funcs;
472 struct amdgpu_ring *buffer_funcs_ring; 458 struct amdgpu_ring *buffer_funcs_ring;
459 /* Scheduler entity for buffer moves */
460 struct amd_sched_entity entity;
473}; 461};
474 462
475int amdgpu_copy_buffer(struct amdgpu_ring *ring, 463int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -484,8 +472,6 @@ struct amdgpu_bo_list_entry {
484 struct amdgpu_bo *robj; 472 struct amdgpu_bo *robj;
485 struct ttm_validate_buffer tv; 473 struct ttm_validate_buffer tv;
486 struct amdgpu_bo_va *bo_va; 474 struct amdgpu_bo_va *bo_va;
487 unsigned prefered_domains;
488 unsigned allowed_domains;
489 uint32_t priority; 475 uint32_t priority;
490}; 476};
491 477
@@ -522,7 +508,8 @@ struct amdgpu_bo {
522 /* Protected by gem.mutex */ 508 /* Protected by gem.mutex */
523 struct list_head list; 509 struct list_head list;
524 /* Protected by tbo.reserved */ 510 /* Protected by tbo.reserved */
525 u32 initial_domain; 511 u32 prefered_domains;
512 u32 allowed_domains;
526 struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; 513 struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
527 struct ttm_placement placement; 514 struct ttm_placement placement;
528 struct ttm_buffer_object tbo; 515 struct ttm_buffer_object tbo;
@@ -544,7 +531,6 @@ struct amdgpu_bo {
544 struct amdgpu_bo *parent; 531 struct amdgpu_bo *parent;
545 532
546 struct ttm_bo_kmap_obj dma_buf_vmap; 533 struct ttm_bo_kmap_obj dma_buf_vmap;
547 pid_t pid;
548 struct amdgpu_mn *mn; 534 struct amdgpu_mn *mn;
549 struct list_head mn_list; 535 struct list_head mn_list;
550}; 536};
@@ -621,13 +607,7 @@ struct amdgpu_sa_bo {
621/* 607/*
622 * GEM objects. 608 * GEM objects.
623 */ 609 */
624struct amdgpu_gem { 610void amdgpu_gem_force_release(struct amdgpu_device *adev);
625 struct mutex mutex;
626 struct list_head objects;
627};
628
629int amdgpu_gem_init(struct amdgpu_device *adev);
630void amdgpu_gem_fini(struct amdgpu_device *adev);
631int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, 611int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
632 int alignment, u32 initial_domain, 612 int alignment, u32 initial_domain,
633 u64 flags, bool kernel, 613 u64 flags, bool kernel,
@@ -639,32 +619,10 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
639int amdgpu_mode_dumb_mmap(struct drm_file *filp, 619int amdgpu_mode_dumb_mmap(struct drm_file *filp,
640 struct drm_device *dev, 620 struct drm_device *dev,
641 uint32_t handle, uint64_t *offset_p); 621 uint32_t handle, uint64_t *offset_p);
642
643/*
644 * Semaphores.
645 */
646struct amdgpu_semaphore {
647 struct amdgpu_sa_bo *sa_bo;
648 signed waiters;
649 uint64_t gpu_addr;
650};
651
652int amdgpu_semaphore_create(struct amdgpu_device *adev,
653 struct amdgpu_semaphore **semaphore);
654bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
655 struct amdgpu_semaphore *semaphore);
656bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
657 struct amdgpu_semaphore *semaphore);
658void amdgpu_semaphore_free(struct amdgpu_device *adev,
659 struct amdgpu_semaphore **semaphore,
660 struct fence *fence);
661
662/* 622/*
663 * Synchronization 623 * Synchronization
664 */ 624 */
665struct amdgpu_sync { 625struct amdgpu_sync {
666 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
667 struct fence *sync_to[AMDGPU_MAX_RINGS];
668 DECLARE_HASHTABLE(fences, 4); 626 DECLARE_HASHTABLE(fences, 4);
669 struct fence *last_vm_update; 627 struct fence *last_vm_update;
670}; 628};
@@ -676,12 +634,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
676 struct amdgpu_sync *sync, 634 struct amdgpu_sync *sync,
677 struct reservation_object *resv, 635 struct reservation_object *resv,
678 void *owner); 636 void *owner);
679int amdgpu_sync_rings(struct amdgpu_sync *sync,
680 struct amdgpu_ring *ring);
681struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); 637struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
682int amdgpu_sync_wait(struct amdgpu_sync *sync); 638int amdgpu_sync_wait(struct amdgpu_sync *sync);
683void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, 639void amdgpu_sync_free(struct amdgpu_sync *sync);
684 struct fence *fence);
685 640
686/* 641/*
687 * GART structures, functions & helpers 642 * GART structures, functions & helpers
@@ -799,6 +754,7 @@ struct amdgpu_flip_work {
799 struct fence *excl; 754 struct fence *excl;
800 unsigned shared_count; 755 unsigned shared_count;
801 struct fence **shared; 756 struct fence **shared;
757 struct fence_cb cb;
802}; 758};
803 759
804 760
@@ -811,12 +767,11 @@ struct amdgpu_ib {
811 uint32_t length_dw; 767 uint32_t length_dw;
812 uint64_t gpu_addr; 768 uint64_t gpu_addr;
813 uint32_t *ptr; 769 uint32_t *ptr;
814 struct amdgpu_ring *ring;
815 struct amdgpu_fence *fence; 770 struct amdgpu_fence *fence;
816 struct amdgpu_user_fence *user; 771 struct amdgpu_user_fence *user;
772 bool grabbed_vmid;
817 struct amdgpu_vm *vm; 773 struct amdgpu_vm *vm;
818 struct amdgpu_ctx *ctx; 774 struct amdgpu_ctx *ctx;
819 struct amdgpu_sync sync;
820 uint32_t gds_base, gds_size; 775 uint32_t gds_base, gds_size;
821 uint32_t gws_base, gws_size; 776 uint32_t gws_base, gws_size;
822 uint32_t oa_base, oa_size; 777 uint32_t oa_base, oa_size;
@@ -835,13 +790,14 @@ enum amdgpu_ring_type {
835 790
836extern struct amd_sched_backend_ops amdgpu_sched_ops; 791extern struct amd_sched_backend_ops amdgpu_sched_ops;
837 792
838int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, 793int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
839 struct amdgpu_ring *ring, 794 struct amdgpu_job **job);
840 struct amdgpu_ib *ibs, 795int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
841 unsigned num_ibs, 796 struct amdgpu_job **job);
842 int (*free_job)(struct amdgpu_job *), 797void amdgpu_job_free(struct amdgpu_job *job);
843 void *owner, 798int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
844 struct fence **fence); 799 struct amd_sched_entity *entity, void *owner,
800 struct fence **f);
845 801
846struct amdgpu_ring { 802struct amdgpu_ring {
847 struct amdgpu_device *adev; 803 struct amdgpu_device *adev;
@@ -850,7 +806,6 @@ struct amdgpu_ring {
850 struct amd_gpu_scheduler sched; 806 struct amd_gpu_scheduler sched;
851 807
852 spinlock_t fence_lock; 808 spinlock_t fence_lock;
853 struct mutex *ring_lock;
854 struct amdgpu_bo *ring_obj; 809 struct amdgpu_bo *ring_obj;
855 volatile uint32_t *ring; 810 volatile uint32_t *ring;
856 unsigned rptr_offs; 811 unsigned rptr_offs;
@@ -859,7 +814,7 @@ struct amdgpu_ring {
859 unsigned wptr; 814 unsigned wptr;
860 unsigned wptr_old; 815 unsigned wptr_old;
861 unsigned ring_size; 816 unsigned ring_size;
862 unsigned ring_free_dw; 817 unsigned max_dw;
863 int count_dw; 818 int count_dw;
864 uint64_t gpu_addr; 819 uint64_t gpu_addr;
865 uint32_t align_mask; 820 uint32_t align_mask;
@@ -867,8 +822,6 @@ struct amdgpu_ring {
867 bool ready; 822 bool ready;
868 u32 nop; 823 u32 nop;
869 u32 idx; 824 u32 idx;
870 u64 last_semaphore_signal_addr;
871 u64 last_semaphore_wait_addr;
872 u32 me; 825 u32 me;
873 u32 pipe; 826 u32 pipe;
874 u32 queue; 827 u32 queue;
@@ -881,7 +834,6 @@ struct amdgpu_ring {
881 struct amdgpu_ctx *current_ctx; 834 struct amdgpu_ctx *current_ctx;
882 enum amdgpu_ring_type type; 835 enum amdgpu_ring_type type;
883 char name[16]; 836 char name[16];
884 bool is_pte_ring;
885}; 837};
886 838
887/* 839/*
@@ -932,6 +884,8 @@ struct amdgpu_vm_id {
932}; 884};
933 885
934struct amdgpu_vm { 886struct amdgpu_vm {
887 /* tree of virtual addresses mapped */
888 spinlock_t it_lock;
935 struct rb_root va; 889 struct rb_root va;
936 890
937 /* protecting invalidated */ 891 /* protecting invalidated */
@@ -956,30 +910,40 @@ struct amdgpu_vm {
956 910
957 /* for id and flush management per ring */ 911 /* for id and flush management per ring */
958 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; 912 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
959 /* for interval tree */ 913
960 spinlock_t it_lock;
961 /* protecting freed */ 914 /* protecting freed */
962 spinlock_t freed_lock; 915 spinlock_t freed_lock;
916
917 /* Scheduler entity for page table updates */
918 struct amd_sched_entity entity;
919};
920
921struct amdgpu_vm_manager_id {
922 struct list_head list;
923 struct fence *active;
924 atomic_long_t owner;
963}; 925};
964 926
965struct amdgpu_vm_manager { 927struct amdgpu_vm_manager {
966 struct { 928 /* Handling of VMIDs */
967 struct fence *active; 929 struct mutex lock;
968 atomic_long_t owner; 930 unsigned num_ids;
969 } ids[AMDGPU_NUM_VM]; 931 struct list_head ids_lru;
932 struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM];
970 933
971 uint32_t max_pfn; 934 uint32_t max_pfn;
972 /* number of VMIDs */
973 unsigned nvm;
974 /* vram base address for page table entry */ 935 /* vram base address for page table entry */
975 u64 vram_base_offset; 936 u64 vram_base_offset;
976 /* is vm enabled? */ 937 /* is vm enabled? */
977 bool enabled; 938 bool enabled;
978 /* vm pte handling */ 939 /* vm pte handling */
979 const struct amdgpu_vm_pte_funcs *vm_pte_funcs; 940 const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
980 struct amdgpu_ring *vm_pte_funcs_ring; 941 struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
942 unsigned vm_pte_num_rings;
943 atomic_t vm_pte_next_ring;
981}; 944};
982 945
946void amdgpu_vm_manager_init(struct amdgpu_device *adev);
983void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 947void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
984int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); 948int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
985void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 949void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -990,14 +954,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
990void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, 954void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
991 struct amdgpu_vm *vm); 955 struct amdgpu_vm *vm);
992int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 956int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
993 struct amdgpu_sync *sync); 957 struct amdgpu_sync *sync, struct fence *fence);
994void amdgpu_vm_flush(struct amdgpu_ring *ring, 958void amdgpu_vm_flush(struct amdgpu_ring *ring,
995 struct amdgpu_vm *vm, 959 struct amdgpu_vm *vm,
996 struct fence *updates); 960 struct fence *updates);
997void amdgpu_vm_fence(struct amdgpu_device *adev, 961uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
998 struct amdgpu_vm *vm,
999 struct fence *fence);
1000uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
1001int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, 962int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
1002 struct amdgpu_vm *vm); 963 struct amdgpu_vm *vm);
1003int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 964int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -1023,7 +984,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1023 uint64_t addr); 984 uint64_t addr);
1024void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 985void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1025 struct amdgpu_bo_va *bo_va); 986 struct amdgpu_bo_va *bo_va);
1026int amdgpu_vm_free_job(struct amdgpu_job *job);
1027 987
1028/* 988/*
1029 * context related structures 989 * context related structures
@@ -1051,10 +1011,6 @@ struct amdgpu_ctx_mgr {
1051 struct idr ctx_handles; 1011 struct idr ctx_handles;
1052}; 1012};
1053 1013
1054int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
1055 struct amdgpu_ctx *ctx);
1056void amdgpu_ctx_fini(struct amdgpu_ctx *ctx);
1057
1058struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); 1014struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
1059int amdgpu_ctx_put(struct amdgpu_ctx *ctx); 1015int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
1060 1016
@@ -1096,6 +1052,8 @@ struct amdgpu_bo_list {
1096 1052
1097struct amdgpu_bo_list * 1053struct amdgpu_bo_list *
1098amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); 1054amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
1055void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
1056 struct list_head *validated);
1099void amdgpu_bo_list_put(struct amdgpu_bo_list *list); 1057void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
1100void amdgpu_bo_list_free(struct amdgpu_bo_list *list); 1058void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
1101 1059
@@ -1169,6 +1127,7 @@ struct amdgpu_gca_config {
1169 unsigned multi_gpu_tile_size; 1127 unsigned multi_gpu_tile_size;
1170 unsigned mc_arb_ramcfg; 1128 unsigned mc_arb_ramcfg;
1171 unsigned gb_addr_config; 1129 unsigned gb_addr_config;
1130 unsigned num_rbs;
1172 1131
1173 uint32_t tile_mode_array[32]; 1132 uint32_t tile_mode_array[32];
1174 uint32_t macrotile_mode_array[16]; 1133 uint32_t macrotile_mode_array[16];
@@ -1211,23 +1170,21 @@ struct amdgpu_gfx {
1211 unsigned ce_ram_size; 1170 unsigned ce_ram_size;
1212}; 1171};
1213 1172
1214int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, 1173int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1215 unsigned size, struct amdgpu_ib *ib); 1174 unsigned size, struct amdgpu_ib *ib);
1216void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); 1175void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
1217int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, 1176int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
1218 struct amdgpu_ib *ib, void *owner); 1177 struct amdgpu_ib *ib, void *owner,
1178 struct fence *last_vm_update,
1179 struct fence **f);
1219int amdgpu_ib_pool_init(struct amdgpu_device *adev); 1180int amdgpu_ib_pool_init(struct amdgpu_device *adev);
1220void amdgpu_ib_pool_fini(struct amdgpu_device *adev); 1181void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
1221int amdgpu_ib_ring_tests(struct amdgpu_device *adev); 1182int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
1222/* Ring access between begin & end cannot sleep */
1223void amdgpu_ring_free_size(struct amdgpu_ring *ring);
1224int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); 1183int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
1225int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
1226void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); 1184void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
1185void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
1227void amdgpu_ring_commit(struct amdgpu_ring *ring); 1186void amdgpu_ring_commit(struct amdgpu_ring *ring);
1228void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
1229void amdgpu_ring_undo(struct amdgpu_ring *ring); 1187void amdgpu_ring_undo(struct amdgpu_ring *ring);
1230void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring);
1231unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, 1188unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
1232 uint32_t **data); 1189 uint32_t **data);
1233int amdgpu_ring_restore(struct amdgpu_ring *ring, 1190int amdgpu_ring_restore(struct amdgpu_ring *ring,
@@ -1246,47 +1203,57 @@ struct amdgpu_cs_chunk {
1246 uint32_t chunk_id; 1203 uint32_t chunk_id;
1247 uint32_t length_dw; 1204 uint32_t length_dw;
1248 uint32_t *kdata; 1205 uint32_t *kdata;
1249 void __user *user_ptr;
1250}; 1206};
1251 1207
1252struct amdgpu_cs_parser { 1208struct amdgpu_cs_parser {
1253 struct amdgpu_device *adev; 1209 struct amdgpu_device *adev;
1254 struct drm_file *filp; 1210 struct drm_file *filp;
1255 struct amdgpu_ctx *ctx; 1211 struct amdgpu_ctx *ctx;
1256 struct amdgpu_bo_list *bo_list; 1212
1257 /* chunks */ 1213 /* chunks */
1258 unsigned nchunks; 1214 unsigned nchunks;
1259 struct amdgpu_cs_chunk *chunks; 1215 struct amdgpu_cs_chunk *chunks;
1260 /* relocations */
1261 struct amdgpu_bo_list_entry vm_pd;
1262 struct list_head validated;
1263 struct fence *fence;
1264 1216
1265 struct amdgpu_ib *ibs; 1217 /* scheduler job object */
1266 uint32_t num_ibs; 1218 struct amdgpu_job *job;
1267 1219
1268 struct ww_acquire_ctx ticket; 1220 /* buffer objects */
1221 struct ww_acquire_ctx ticket;
1222 struct amdgpu_bo_list *bo_list;
1223 struct amdgpu_bo_list_entry vm_pd;
1224 struct list_head validated;
1225 struct fence *fence;
1226 uint64_t bytes_moved_threshold;
1227 uint64_t bytes_moved;
1269 1228
1270 /* user fence */ 1229 /* user fence */
1271 struct amdgpu_user_fence uf;
1272 struct amdgpu_bo_list_entry uf_entry; 1230 struct amdgpu_bo_list_entry uf_entry;
1273}; 1231};
1274 1232
1275struct amdgpu_job { 1233struct amdgpu_job {
1276 struct amd_sched_job base; 1234 struct amd_sched_job base;
1277 struct amdgpu_device *adev; 1235 struct amdgpu_device *adev;
1236 struct amdgpu_ring *ring;
1237 struct amdgpu_sync sync;
1278 struct amdgpu_ib *ibs; 1238 struct amdgpu_ib *ibs;
1279 uint32_t num_ibs; 1239 uint32_t num_ibs;
1280 void *owner; 1240 void *owner;
1281 struct amdgpu_user_fence uf; 1241 struct amdgpu_user_fence uf;
1282 int (*free_job)(struct amdgpu_job *job);
1283}; 1242};
1284#define to_amdgpu_job(sched_job) \ 1243#define to_amdgpu_job(sched_job) \
1285 container_of((sched_job), struct amdgpu_job, base) 1244 container_of((sched_job), struct amdgpu_job, base)
1286 1245
1287static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) 1246static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
1247 uint32_t ib_idx, int idx)
1288{ 1248{
1289 return p->ibs[ib_idx].ptr[idx]; 1249 return p->job->ibs[ib_idx].ptr[idx];
1250}
1251
1252static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
1253 uint32_t ib_idx, int idx,
1254 uint32_t value)
1255{
1256 p->job->ibs[ib_idx].ptr[idx] = value;
1290} 1257}
1291 1258
1292/* 1259/*
@@ -1538,6 +1505,7 @@ enum amdgpu_dpm_forced_level {
1538 AMDGPU_DPM_FORCED_LEVEL_AUTO = 0, 1505 AMDGPU_DPM_FORCED_LEVEL_AUTO = 0,
1539 AMDGPU_DPM_FORCED_LEVEL_LOW = 1, 1506 AMDGPU_DPM_FORCED_LEVEL_LOW = 1,
1540 AMDGPU_DPM_FORCED_LEVEL_HIGH = 2, 1507 AMDGPU_DPM_FORCED_LEVEL_HIGH = 2,
1508 AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3,
1541}; 1509};
1542 1510
1543struct amdgpu_vce_state { 1511struct amdgpu_vce_state {
@@ -1667,6 +1635,7 @@ struct amdgpu_uvd {
1667 struct amdgpu_ring ring; 1635 struct amdgpu_ring ring;
1668 struct amdgpu_irq_src irq; 1636 struct amdgpu_irq_src irq;
1669 bool address_64_bit; 1637 bool address_64_bit;
1638 struct amd_sched_entity entity;
1670}; 1639};
1671 1640
1672/* 1641/*
@@ -1691,6 +1660,7 @@ struct amdgpu_vce {
1691 struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; 1660 struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
1692 struct amdgpu_irq_src irq; 1661 struct amdgpu_irq_src irq;
1693 unsigned harvest_config; 1662 unsigned harvest_config;
1663 struct amd_sched_entity entity;
1694}; 1664};
1695 1665
1696/* 1666/*
@@ -1925,6 +1895,18 @@ void amdgpu_cgs_destroy_device(void *cgs_device);
1925 1895
1926 1896
1927/* 1897/*
1898 * CGS
1899 */
1900void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
1901void amdgpu_cgs_destroy_device(void *cgs_device);
1902
1903
1904/* GPU virtualization */
1905struct amdgpu_virtualization {
1906 bool supports_sr_iov;
1907};
1908
1909/*
1928 * Core structure, functions and helpers. 1910 * Core structure, functions and helpers.
1929 */ 1911 */
1930typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); 1912typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
@@ -1944,6 +1926,10 @@ struct amdgpu_device {
1944 struct drm_device *ddev; 1926 struct drm_device *ddev;
1945 struct pci_dev *pdev; 1927 struct pci_dev *pdev;
1946 1928
1929#ifdef CONFIG_DRM_AMD_ACP
1930 struct amdgpu_acp acp;
1931#endif
1932
1947 /* ASIC */ 1933 /* ASIC */
1948 enum amd_asic_type asic_type; 1934 enum amd_asic_type asic_type;
1949 uint32_t family; 1935 uint32_t family;
@@ -2020,7 +2006,6 @@ struct amdgpu_device {
2020 2006
2021 /* memory management */ 2007 /* memory management */
2022 struct amdgpu_mman mman; 2008 struct amdgpu_mman mman;
2023 struct amdgpu_gem gem;
2024 struct amdgpu_vram_scratch vram_scratch; 2009 struct amdgpu_vram_scratch vram_scratch;
2025 struct amdgpu_wb wb; 2010 struct amdgpu_wb wb;
2026 atomic64_t vram_usage; 2011 atomic64_t vram_usage;
@@ -2038,7 +2023,6 @@ struct amdgpu_device {
2038 2023
2039 /* rings */ 2024 /* rings */
2040 unsigned fence_context; 2025 unsigned fence_context;
2041 struct mutex ring_lock;
2042 unsigned num_rings; 2026 unsigned num_rings;
2043 struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; 2027 struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
2044 bool ib_pool_ready; 2028 bool ib_pool_ready;
@@ -2050,6 +2034,7 @@ struct amdgpu_device {
2050 /* powerplay */ 2034 /* powerplay */
2051 struct amd_powerplay powerplay; 2035 struct amd_powerplay powerplay;
2052 bool pp_enabled; 2036 bool pp_enabled;
2037 bool pp_force_state_enabled;
2053 2038
2054 /* dpm */ 2039 /* dpm */
2055 struct amdgpu_pm pm; 2040 struct amdgpu_pm pm;
@@ -2091,8 +2076,7 @@ struct amdgpu_device {
2091 /* amdkfd interface */ 2076 /* amdkfd interface */
2092 struct kfd_dev *kfd; 2077 struct kfd_dev *kfd;
2093 2078
2094 /* kernel conext for IB submission */ 2079 struct amdgpu_virtualization virtualization;
2095 struct amdgpu_ctx kernel_ctx;
2096}; 2080};
2097 2081
2098bool amdgpu_device_is_px(struct drm_device *dev); 2082bool amdgpu_device_is_px(struct drm_device *dev);
@@ -2197,7 +2181,6 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
2197 ring->ring[ring->wptr++] = v; 2181 ring->ring[ring->wptr++] = v;
2198 ring->wptr &= ring->ptr_mask; 2182 ring->wptr &= ring->ptr_mask;
2199 ring->count_dw--; 2183 ring->count_dw--;
2200 ring->ring_free_dw--;
2201} 2184}
2202 2185
2203static inline struct amdgpu_sdma_instance * 2186static inline struct amdgpu_sdma_instance *
@@ -2233,9 +2216,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2233#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) 2216#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
2234#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) 2217#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
2235#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) 2218#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
2236#define amdgpu_vm_write_pte(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (addr), (count), (incr), (flags))) 2219#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
2237#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) 2220#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
2238#define amdgpu_vm_pad_ib(adev, ib) ((adev)->vm_manager.vm_pte_funcs->pad_ib((ib)))
2239#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) 2221#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
2240#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) 2222#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
2241#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r)) 2223#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
@@ -2245,9 +2227,9 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2245#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) 2227#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
2246#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) 2228#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
2247#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) 2229#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
2248#define amdgpu_ring_emit_semaphore(r, semaphore, emit_wait) (r)->funcs->emit_semaphore((r), (semaphore), (emit_wait))
2249#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) 2230#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
2250#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) 2231#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
2232#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
2251#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) 2233#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
2252#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) 2234#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
2253#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) 2235#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
@@ -2339,6 +2321,21 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2339#define amdgpu_dpm_get_performance_level(adev) \ 2321#define amdgpu_dpm_get_performance_level(adev) \
2340 (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) 2322 (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle)
2341 2323
2324#define amdgpu_dpm_get_pp_num_states(adev, data) \
2325 (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)
2326
2327#define amdgpu_dpm_get_pp_table(adev, table) \
2328 (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table)
2329
2330#define amdgpu_dpm_set_pp_table(adev, buf, size) \
2331 (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size)
2332
2333#define amdgpu_dpm_print_clock_levels(adev, type, buf) \
2334 (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf)
2335
2336#define amdgpu_dpm_force_clock_level(adev, type, level) \
2337 (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
2338
2342#define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ 2339#define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \
2343 (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) 2340 (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
2344 2341
@@ -2349,7 +2346,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev);
2349void amdgpu_pci_config_reset(struct amdgpu_device *adev); 2346void amdgpu_pci_config_reset(struct amdgpu_device *adev);
2350bool amdgpu_card_posted(struct amdgpu_device *adev); 2347bool amdgpu_card_posted(struct amdgpu_device *adev);
2351void amdgpu_update_display_priority(struct amdgpu_device *adev); 2348void amdgpu_update_display_priority(struct amdgpu_device *adev);
2352bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
2353 2349
2354int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); 2350int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
2355int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 2351int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2359,7 +2355,9 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
2359bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 2355bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
2360int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 2356int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
2361 uint32_t flags); 2357 uint32_t flags);
2362bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); 2358struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
2359bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
2360 unsigned long end);
2363bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); 2361bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
2364uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, 2362uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
2365 struct ttm_mem_reg *mem); 2363 struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
new file mode 100644
index 000000000000..9f8cfaab3004
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -0,0 +1,502 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26#include <linux/irqdomain.h>
27#include <linux/pm_domain.h>
28#include <linux/platform_device.h>
29#include <sound/designware_i2s.h>
30#include <sound/pcm.h>
31
32#include "amdgpu.h"
33#include "atom.h"
34#include "amdgpu_acp.h"
35
36#include "acp_gfx_if.h"
37
38#define ACP_TILE_ON_MASK 0x03
39#define ACP_TILE_OFF_MASK 0x02
40#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
41#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20
42
43#define ACP_TILE_P1_MASK 0x3e
44#define ACP_TILE_P2_MASK 0x3d
45#define ACP_TILE_DSP0_MASK 0x3b
46#define ACP_TILE_DSP1_MASK 0x37
47
48#define ACP_TILE_DSP2_MASK 0x2f
49
50#define ACP_DMA_REGS_END 0x146c0
51#define ACP_I2S_PLAY_REGS_START 0x14840
52#define ACP_I2S_PLAY_REGS_END 0x148b4
53#define ACP_I2S_CAP_REGS_START 0x148b8
54#define ACP_I2S_CAP_REGS_END 0x1496c
55
56#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac
57#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8
58#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c
59#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68
60
61#define mmACP_PGFSM_RETAIN_REG 0x51c9
62#define mmACP_PGFSM_CONFIG_REG 0x51ca
63#define mmACP_PGFSM_READ_REG_0 0x51cc
64
65#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8
66#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9
67#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa
68#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb
69
70#define ACP_TIMEOUT_LOOP 0x000000FF
71#define ACP_DEVS 3
72#define ACP_SRC_ID 162
73
74enum {
75 ACP_TILE_P1 = 0,
76 ACP_TILE_P2,
77 ACP_TILE_DSP0,
78 ACP_TILE_DSP1,
79 ACP_TILE_DSP2,
80};
81
82static int acp_sw_init(void *handle)
83{
84 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
85
86 adev->acp.parent = adev->dev;
87
88 adev->acp.cgs_device =
89 amdgpu_cgs_create_device(adev);
90 if (!adev->acp.cgs_device)
91 return -EINVAL;
92
93 return 0;
94}
95
96static int acp_sw_fini(void *handle)
97{
98 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
99
100 if (adev->acp.cgs_device)
101 amdgpu_cgs_destroy_device(adev->acp.cgs_device);
102
103 return 0;
104}
105
106/* power off a tile/block within ACP */
107static int acp_suspend_tile(void *cgs_dev, int tile)
108{
109 u32 val = 0;
110 u32 count = 0;
111
112 if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
113 pr_err("Invalid ACP tile : %d to suspend\n", tile);
114 return -1;
115 }
116
117 val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
118 val &= ACP_TILE_ON_MASK;
119
120 if (val == 0x0) {
121 val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
122 val = val | (1 << tile);
123 cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
124 cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
125 0x500 + tile);
126
127 count = ACP_TIMEOUT_LOOP;
128 while (true) {
129 val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
130 + tile);
131 val = val & ACP_TILE_ON_MASK;
132 if (val == ACP_TILE_OFF_MASK)
133 break;
134 if (--count == 0) {
135 pr_err("Timeout reading ACP PGFSM status\n");
136 return -ETIMEDOUT;
137 }
138 udelay(100);
139 }
140
141 val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
142
143 val |= ACP_TILE_OFF_RETAIN_REG_MASK;
144 cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
145 }
146 return 0;
147}
148
149/* power on a tile/block within ACP */
150static int acp_resume_tile(void *cgs_dev, int tile)
151{
152 u32 val = 0;
153 u32 count = 0;
154
155 if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
156 pr_err("Invalid ACP tile to resume\n");
157 return -1;
158 }
159
160 val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
161 val = val & ACP_TILE_ON_MASK;
162
163 if (val != 0x0) {
164 cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
165 0x600 + tile);
166 count = ACP_TIMEOUT_LOOP;
167 while (true) {
168 val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
169 + tile);
170 val = val & ACP_TILE_ON_MASK;
171 if (val == 0x0)
172 break;
173 if (--count == 0) {
174 pr_err("Timeout reading ACP PGFSM status\n");
175 return -ETIMEDOUT;
176 }
177 udelay(100);
178 }
179 val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
180 if (tile == ACP_TILE_P1)
181 val = val & (ACP_TILE_P1_MASK);
182 else if (tile == ACP_TILE_P2)
183 val = val & (ACP_TILE_P2_MASK);
184
185 cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
186 }
187 return 0;
188}
189
190struct acp_pm_domain {
191 void *cgs_dev;
192 struct generic_pm_domain gpd;
193};
194
195static int acp_poweroff(struct generic_pm_domain *genpd)
196{
197 int i, ret;
198 struct acp_pm_domain *apd;
199
200 apd = container_of(genpd, struct acp_pm_domain, gpd);
201 if (apd != NULL) {
202 /* Donot return abruptly if any of power tile fails to suspend.
203 * Log it and continue powering off other tile
204 */
205 for (i = 4; i >= 0 ; i--) {
206 ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
207 if (ret)
208 pr_err("ACP tile %d tile suspend failed\n", i);
209 }
210 }
211 return 0;
212}
213
214static int acp_poweron(struct generic_pm_domain *genpd)
215{
216 int i, ret;
217 struct acp_pm_domain *apd;
218
219 apd = container_of(genpd, struct acp_pm_domain, gpd);
220 if (apd != NULL) {
221 for (i = 0; i < 2; i++) {
222 ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
223 if (ret) {
224 pr_err("ACP tile %d resume failed\n", i);
225 break;
226 }
227 }
228
229 /* Disable DSPs which are not going to be used */
230 for (i = 0; i < 3; i++) {
231 ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
232 /* Continue suspending other DSP, even if one fails */
233 if (ret)
234 pr_err("ACP DSP %d suspend failed\n", i);
235 }
236 }
237 return 0;
238}
239
240static struct device *get_mfd_cell_dev(const char *device_name, int r)
241{
242 char auto_dev_name[25];
243 char buf[8];
244 struct device *dev;
245
246 sprintf(buf, ".%d.auto", r);
247 strcpy(auto_dev_name, device_name);
248 strcat(auto_dev_name, buf);
249 dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
250 dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
251
252 return dev;
253}
254
255/**
256 * acp_hw_init - start and test ACP block
257 *
258 * @adev: amdgpu_device pointer
259 *
260 */
261static int acp_hw_init(void *handle)
262{
263 int r, i;
264 uint64_t acp_base;
265 struct device *dev;
266 struct i2s_platform_data *i2s_pdata;
267
268 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
269
270 const struct amdgpu_ip_block_version *ip_version =
271 amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
272
273 if (!ip_version)
274 return -EINVAL;
275
276 r = amd_acp_hw_init(adev->acp.cgs_device,
277 ip_version->major, ip_version->minor);
278 /* -ENODEV means board uses AZ rather than ACP */
279 if (r == -ENODEV)
280 return 0;
281 else if (r)
282 return r;
283
284 r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
285 0x5289, 0, &acp_base);
286 if (r == -ENODEV)
287 return 0;
288 else if (r)
289 return r;
290
291 adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
292 if (adev->acp.acp_genpd == NULL)
293 return -ENOMEM;
294
295 adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
296 adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
297 adev->acp.acp_genpd->gpd.power_on = acp_poweron;
298
299
300 adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
301
302 pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
303
304 adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS,
305 GFP_KERNEL);
306
307 if (adev->acp.acp_cell == NULL)
308 return -ENOMEM;
309
310 adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL);
311
312 if (adev->acp.acp_res == NULL) {
313 kfree(adev->acp.acp_cell);
314 return -ENOMEM;
315 }
316
317 i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL);
318 if (i2s_pdata == NULL) {
319 kfree(adev->acp.acp_res);
320 kfree(adev->acp.acp_cell);
321 return -ENOMEM;
322 }
323
324 i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
325 i2s_pdata[0].cap = DWC_I2S_PLAY;
326 i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
327 i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
328 i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
329
330 i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
331 DW_I2S_QUIRK_COMP_PARAM1;
332 i2s_pdata[1].cap = DWC_I2S_RECORD;
333 i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
334 i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
335 i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
336
337 adev->acp.acp_res[0].name = "acp2x_dma";
338 adev->acp.acp_res[0].flags = IORESOURCE_MEM;
339 adev->acp.acp_res[0].start = acp_base;
340 adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
341
342 adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
343 adev->acp.acp_res[1].flags = IORESOURCE_MEM;
344 adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
345 adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
346
347 adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
348 adev->acp.acp_res[2].flags = IORESOURCE_MEM;
349 adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
350 adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
351
352 adev->acp.acp_res[3].name = "acp2x_dma_irq";
353 adev->acp.acp_res[3].flags = IORESOURCE_IRQ;
354 adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162);
355 adev->acp.acp_res[3].end = adev->acp.acp_res[3].start;
356
357 adev->acp.acp_cell[0].name = "acp_audio_dma";
358 adev->acp.acp_cell[0].num_resources = 4;
359 adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
360
361 adev->acp.acp_cell[1].name = "designware-i2s";
362 adev->acp.acp_cell[1].num_resources = 1;
363 adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
364 adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
365 adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
366
367 adev->acp.acp_cell[2].name = "designware-i2s";
368 adev->acp.acp_cell[2].num_resources = 1;
369 adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
370 adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
371 adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
372
373 r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
374 ACP_DEVS);
375 if (r)
376 return r;
377
378 for (i = 0; i < ACP_DEVS ; i++) {
379 dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
380 r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
381 if (r) {
382 dev_err(dev, "Failed to add dev to genpd\n");
383 return r;
384 }
385 }
386
387 return 0;
388}
389
390/**
391 * acp_hw_fini - stop the hardware block
392 *
393 * @adev: amdgpu_device pointer
394 *
395 */
396static int acp_hw_fini(void *handle)
397{
398 int i, ret;
399 struct device *dev;
400
401 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
402
403 for (i = 0; i < ACP_DEVS ; i++) {
404 dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
405 ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
406 /* If removal fails, dont giveup and try rest */
407 if (ret)
408 dev_err(dev, "remove dev from genpd failed\n");
409 }
410
411 mfd_remove_devices(adev->acp.parent);
412 kfree(adev->acp.acp_res);
413 kfree(adev->acp.acp_genpd);
414 kfree(adev->acp.acp_cell);
415
416 return 0;
417}
418
419static int acp_suspend(void *handle)
420{
421 return 0;
422}
423
424static int acp_resume(void *handle)
425{
426 int i, ret;
427 struct acp_pm_domain *apd;
428 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
429
430 /* SMU block will power on ACP irrespective of ACP runtime status.
431 * Power off explicitly based on genpd ACP runtime status so that ACP
432 * hw and ACP-genpd status are in sync.
433 * 'suspend_power_off' represents "Power status before system suspend"
434 */
435 if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
436 apd = container_of(&adev->acp.acp_genpd->gpd,
437 struct acp_pm_domain, gpd);
438
439 for (i = 4; i >= 0 ; i--) {
440 ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
441 if (ret)
442 pr_err("ACP tile %d tile suspend failed\n", i);
443 }
444 }
445 return 0;
446}
447
448static int acp_early_init(void *handle)
449{
450 return 0;
451}
452
453static bool acp_is_idle(void *handle)
454{
455 return true;
456}
457
458static int acp_wait_for_idle(void *handle)
459{
460 return 0;
461}
462
463static int acp_soft_reset(void *handle)
464{
465 return 0;
466}
467
468static void acp_print_status(void *handle)
469{
470 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
471
472 dev_info(adev->dev, "ACP STATUS\n");
473}
474
475static int acp_set_clockgating_state(void *handle,
476 enum amd_clockgating_state state)
477{
478 return 0;
479}
480
481static int acp_set_powergating_state(void *handle,
482 enum amd_powergating_state state)
483{
484 return 0;
485}
486
487const struct amd_ip_funcs acp_ip_funcs = {
488 .early_init = acp_early_init,
489 .late_init = NULL,
490 .sw_init = acp_sw_init,
491 .sw_fini = acp_sw_fini,
492 .hw_init = acp_hw_init,
493 .hw_fini = acp_hw_fini,
494 .suspend = acp_suspend,
495 .resume = acp_resume,
496 .is_idle = acp_is_idle,
497 .wait_for_idle = acp_wait_for_idle,
498 .soft_reset = acp_soft_reset,
499 .print_status = acp_print_status,
500 .set_clockgating_state = acp_set_clockgating_state,
501 .set_powergating_state = acp_set_powergating_state,
502};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
new file mode 100644
index 000000000000..f6e32a639107
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26#ifndef __AMDGPU_ACP_H__
27#define __AMDGPU_ACP_H__
28
29#include <linux/mfd/core.h>
30
31struct amdgpu_acp {
32 struct device *parent;
33 void *cgs_device;
34 struct amd_acp_private *private;
35 struct mfd_cell *acp_cell;
36 struct resource *acp_res;
37 struct acp_pm_domain *acp_genpd;
38};
39
40extern const struct amd_ip_funcs acp_ip_funcs;
41
42#endif /* __AMDGPU_ACP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9416e0f5c1db..84b0ce39ee14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
1514 return -EINVAL; 1514 return -EINVAL;
1515} 1515}
1516 1516
1517bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
1518{
1519 int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
1520 u8 frev, crev;
1521 u16 data_offset, size;
1522
1523 if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
1524 &frev, &crev, &data_offset))
1525 return true;
1526
1527 return false;
1528}
1529
1517void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) 1530void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
1518{ 1531{
1519 uint32_t bios_6_scratch; 1532 uint32_t bios_6_scratch;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 0ebb959ea435..9e1442053fe4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
196 u8 module_index, 196 u8 module_index,
197 struct atom_mc_reg_table *reg_table); 197 struct atom_mc_reg_table *reg_table);
198 198
199bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
200
199void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); 201void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
200void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); 202void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
201void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); 203void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index f82a2dd83874..90d6fc1618aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -32,6 +32,9 @@
32#include "amdgpu.h" 32#include "amdgpu.h"
33#include "amdgpu_trace.h" 33#include "amdgpu_trace.h"
34 34
35#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
36#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
37
35static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, 38static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
36 struct amdgpu_bo_list **result, 39 struct amdgpu_bo_list **result,
37 int *id) 40 int *id)
@@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
90 93
91 bool has_userptr = false; 94 bool has_userptr = false;
92 unsigned i; 95 unsigned i;
96 int r;
93 97
94 array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); 98 array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
95 if (!array) 99 if (!array)
@@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
99 for (i = 0; i < num_entries; ++i) { 103 for (i = 0; i < num_entries; ++i) {
100 struct amdgpu_bo_list_entry *entry = &array[i]; 104 struct amdgpu_bo_list_entry *entry = &array[i];
101 struct drm_gem_object *gobj; 105 struct drm_gem_object *gobj;
106 struct mm_struct *usermm;
102 107
103 gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); 108 gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
104 if (!gobj) 109 if (!gobj) {
110 r = -ENOENT;
105 goto error_free; 111 goto error_free;
112 }
106 113
107 entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 114 entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
108 drm_gem_object_unreference_unlocked(gobj); 115 drm_gem_object_unreference_unlocked(gobj);
109 entry->priority = info[i].bo_priority; 116 entry->priority = min(info[i].bo_priority,
110 entry->prefered_domains = entry->robj->initial_domain; 117 AMDGPU_BO_LIST_MAX_PRIORITY);
111 entry->allowed_domains = entry->prefered_domains; 118 usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
112 if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) 119 if (usermm) {
113 entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; 120 if (usermm != current->mm) {
114 if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) { 121 r = -EPERM;
122 goto error_free;
123 }
115 has_userptr = true; 124 has_userptr = true;
116 entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
117 entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
118 } 125 }
119 entry->tv.bo = &entry->robj->tbo; 126 entry->tv.bo = &entry->robj->tbo;
120 entry->tv.shared = true; 127 entry->tv.shared = true;
121 128
122 if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) 129 if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
123 gds_obj = entry->robj; 130 gds_obj = entry->robj;
124 if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) 131 if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
125 gws_obj = entry->robj; 132 gws_obj = entry->robj;
126 if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) 133 if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
127 oa_obj = entry->robj; 134 oa_obj = entry->robj;
128 135
129 trace_amdgpu_bo_list_set(list, entry->robj); 136 trace_amdgpu_bo_list_set(list, entry->robj);
@@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
145 152
146error_free: 153error_free:
147 drm_free_large(array); 154 drm_free_large(array);
148 return -ENOENT; 155 return r;
149} 156}
150 157
151struct amdgpu_bo_list * 158struct amdgpu_bo_list *
@@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
161 return result; 168 return result;
162} 169}
163 170
171void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
172 struct list_head *validated)
173{
174 /* This is based on the bucket sort with O(n) time complexity.
175 * An item with priority "i" is added to bucket[i]. The lists are then
176 * concatenated in descending order.
177 */
178 struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
179 unsigned i;
180
181 for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
182 INIT_LIST_HEAD(&bucket[i]);
183
184 /* Since buffers which appear sooner in the relocation list are
185 * likely to be used more often than buffers which appear later
186 * in the list, the sort mustn't change the ordering of buffers
187 * with the same priority, i.e. it must be stable.
188 */
189 for (i = 0; i < list->num_entries; i++) {
190 unsigned priority = list->array[i].priority;
191
192 list_add_tail(&list->array[i].tv.head,
193 &bucket[priority]);
194 }
195
196 /* Connect the sorted buckets in the output list. */
197 for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
198 list_splice(&bucket[i], validated);
199}
200
164void amdgpu_bo_list_put(struct amdgpu_bo_list *list) 201void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
165{ 202{
166 mutex_unlock(&list->lock); 203 mutex_unlock(&list->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b882e8175615..52c3eb96b199 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -30,47 +30,6 @@
30#include "amdgpu.h" 30#include "amdgpu.h"
31#include "amdgpu_trace.h" 31#include "amdgpu_trace.h"
32 32
33#define AMDGPU_CS_MAX_PRIORITY 32u
34#define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1)
35
36/* This is based on the bucket sort with O(n) time complexity.
37 * An item with priority "i" is added to bucket[i]. The lists are then
38 * concatenated in descending order.
39 */
40struct amdgpu_cs_buckets {
41 struct list_head bucket[AMDGPU_CS_NUM_BUCKETS];
42};
43
44static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b)
45{
46 unsigned i;
47
48 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++)
49 INIT_LIST_HEAD(&b->bucket[i]);
50}
51
52static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b,
53 struct list_head *item, unsigned priority)
54{
55 /* Since buffers which appear sooner in the relocation list are
56 * likely to be used more often than buffers which appear later
57 * in the list, the sort mustn't change the ordering of buffers
58 * with the same priority, i.e. it must be stable.
59 */
60 list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]);
61}
62
63static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b,
64 struct list_head *out_list)
65{
66 unsigned i;
67
68 /* Connect the sorted buckets in the output list. */
69 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) {
70 list_splice(&b->bucket[i], out_list);
71 }
72}
73
74int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 33int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
75 u32 ip_instance, u32 ring, 34 u32 ip_instance, u32 ring,
76 struct amdgpu_ring **out_ring) 35 struct amdgpu_ring **out_ring)
@@ -128,6 +87,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
128} 87}
129 88
130static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 89static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
90 struct amdgpu_user_fence *uf,
131 struct drm_amdgpu_cs_chunk_fence *fence_data) 91 struct drm_amdgpu_cs_chunk_fence *fence_data)
132{ 92{
133 struct drm_gem_object *gobj; 93 struct drm_gem_object *gobj;
@@ -139,17 +99,15 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
139 if (gobj == NULL) 99 if (gobj == NULL)
140 return -EINVAL; 100 return -EINVAL;
141 101
142 p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 102 uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
143 p->uf.offset = fence_data->offset; 103 uf->offset = fence_data->offset;
144 104
145 if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { 105 if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
146 drm_gem_object_unreference_unlocked(gobj); 106 drm_gem_object_unreference_unlocked(gobj);
147 return -EINVAL; 107 return -EINVAL;
148 } 108 }
149 109
150 p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); 110 p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
151 p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
152 p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
153 p->uf_entry.priority = 0; 111 p->uf_entry.priority = 0;
154 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 112 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
155 p->uf_entry.tv.shared = true; 113 p->uf_entry.tv.shared = true;
@@ -160,11 +118,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
160 118
161int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 119int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
162{ 120{
121 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
163 union drm_amdgpu_cs *cs = data; 122 union drm_amdgpu_cs *cs = data;
164 uint64_t *chunk_array_user; 123 uint64_t *chunk_array_user;
165 uint64_t *chunk_array; 124 uint64_t *chunk_array;
166 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 125 struct amdgpu_user_fence uf = {};
167 unsigned size; 126 unsigned size, num_ibs = 0;
168 int i; 127 int i;
169 int ret; 128 int ret;
170 129
@@ -181,15 +140,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
181 goto free_chunk; 140 goto free_chunk;
182 } 141 }
183 142
184 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
185
186 /* get chunks */ 143 /* get chunks */
187 INIT_LIST_HEAD(&p->validated);
188 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); 144 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
189 if (copy_from_user(chunk_array, chunk_array_user, 145 if (copy_from_user(chunk_array, chunk_array_user,
190 sizeof(uint64_t)*cs->in.num_chunks)) { 146 sizeof(uint64_t)*cs->in.num_chunks)) {
191 ret = -EFAULT; 147 ret = -EFAULT;
192 goto put_bo_list; 148 goto put_ctx;
193 } 149 }
194 150
195 p->nchunks = cs->in.num_chunks; 151 p->nchunks = cs->in.num_chunks;
@@ -197,7 +153,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
197 GFP_KERNEL); 153 GFP_KERNEL);
198 if (!p->chunks) { 154 if (!p->chunks) {
199 ret = -ENOMEM; 155 ret = -ENOMEM;
200 goto put_bo_list; 156 goto put_ctx;
201 } 157 }
202 158
203 for (i = 0; i < p->nchunks; i++) { 159 for (i = 0; i < p->nchunks; i++) {
@@ -217,7 +173,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
217 173
218 size = p->chunks[i].length_dw; 174 size = p->chunks[i].length_dw;
219 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 175 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
220 p->chunks[i].user_ptr = cdata;
221 176
222 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 177 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
223 if (p->chunks[i].kdata == NULL) { 178 if (p->chunks[i].kdata == NULL) {
@@ -233,7 +188,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
233 188
234 switch (p->chunks[i].chunk_id) { 189 switch (p->chunks[i].chunk_id) {
235 case AMDGPU_CHUNK_ID_IB: 190 case AMDGPU_CHUNK_ID_IB:
236 p->num_ibs++; 191 ++num_ibs;
237 break; 192 break;
238 193
239 case AMDGPU_CHUNK_ID_FENCE: 194 case AMDGPU_CHUNK_ID_FENCE:
@@ -243,7 +198,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
243 goto free_partial_kdata; 198 goto free_partial_kdata;
244 } 199 }
245 200
246 ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); 201 ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
247 if (ret) 202 if (ret)
248 goto free_partial_kdata; 203 goto free_partial_kdata;
249 204
@@ -258,12 +213,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
258 } 213 }
259 } 214 }
260 215
261 216 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
262 p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); 217 if (ret)
263 if (!p->ibs) {
264 ret = -ENOMEM;
265 goto free_all_kdata; 218 goto free_all_kdata;
266 } 219
220 p->job->uf = uf;
267 221
268 kfree(chunk_array); 222 kfree(chunk_array);
269 return 0; 223 return 0;
@@ -274,9 +228,7 @@ free_partial_kdata:
274 for (; i >= 0; i--) 228 for (; i >= 0; i--)
275 drm_free_large(p->chunks[i].kdata); 229 drm_free_large(p->chunks[i].kdata);
276 kfree(p->chunks); 230 kfree(p->chunks);
277put_bo_list: 231put_ctx:
278 if (p->bo_list)
279 amdgpu_bo_list_put(p->bo_list);
280 amdgpu_ctx_put(p->ctx); 232 amdgpu_ctx_put(p->ctx);
281free_chunk: 233free_chunk:
282 kfree(chunk_array); 234 kfree(chunk_array);
@@ -336,80 +288,76 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
336 return max(bytes_moved_threshold, 1024*1024ull); 288 return max(bytes_moved_threshold, 1024*1024ull);
337} 289}
338 290
339int amdgpu_cs_list_validate(struct amdgpu_device *adev, 291int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
340 struct amdgpu_vm *vm,
341 struct list_head *validated) 292 struct list_head *validated)
342{ 293{
343 struct amdgpu_bo_list_entry *lobj; 294 struct amdgpu_bo_list_entry *lobj;
344 struct amdgpu_bo *bo; 295 u64 initial_bytes_moved;
345 u64 bytes_moved = 0, initial_bytes_moved;
346 u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
347 int r; 296 int r;
348 297
349 list_for_each_entry(lobj, validated, tv.head) { 298 list_for_each_entry(lobj, validated, tv.head) {
350 bo = lobj->robj; 299 struct amdgpu_bo *bo = lobj->robj;
351 if (!bo->pin_count) { 300 struct mm_struct *usermm;
352 u32 domain = lobj->prefered_domains; 301 uint32_t domain;
353 u32 current_domain =
354 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
355
356 /* Check if this buffer will be moved and don't move it
357 * if we have moved too many buffers for this IB already.
358 *
359 * Note that this allows moving at least one buffer of
360 * any size, because it doesn't take the current "bo"
361 * into account. We don't want to disallow buffer moves
362 * completely.
363 */
364 if ((lobj->allowed_domains & current_domain) != 0 &&
365 (domain & current_domain) == 0 && /* will be moved */
366 bytes_moved > bytes_moved_threshold) {
367 /* don't move it */
368 domain = current_domain;
369 }
370 302
371 retry: 303 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
372 amdgpu_ttm_placement_from_domain(bo, domain); 304 if (usermm && usermm != current->mm)
373 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 305 return -EPERM;
374 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 306
375 bytes_moved += atomic64_read(&adev->num_bytes_moved) - 307 if (bo->pin_count)
376 initial_bytes_moved; 308 continue;
377 309
378 if (unlikely(r)) { 310 /* Avoid moving this one if we have moved too many buffers
379 if (r != -ERESTARTSYS && domain != lobj->allowed_domains) { 311 * for this IB already.
380 domain = lobj->allowed_domains; 312 *
381 goto retry; 313 * Note that this allows moving at least one buffer of
382 } 314 * any size, because it doesn't take the current "bo"
383 return r; 315 * into account. We don't want to disallow buffer moves
316 * completely.
317 */
318 if (p->bytes_moved <= p->bytes_moved_threshold)
319 domain = bo->prefered_domains;
320 else
321 domain = bo->allowed_domains;
322
323 retry:
324 amdgpu_ttm_placement_from_domain(bo, domain);
325 initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
326 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
327 p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
328 initial_bytes_moved;
329
330 if (unlikely(r)) {
331 if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
332 domain = bo->allowed_domains;
333 goto retry;
384 } 334 }
335 return r;
385 } 336 }
386 lobj->bo_va = amdgpu_vm_bo_find(vm, bo);
387 } 337 }
388 return 0; 338 return 0;
389} 339}
390 340
391static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) 341static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
342 union drm_amdgpu_cs *cs)
392{ 343{
393 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 344 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
394 struct amdgpu_cs_buckets buckets;
395 struct list_head duplicates; 345 struct list_head duplicates;
396 bool need_mmap_lock = false; 346 bool need_mmap_lock = false;
397 int i, r; 347 int r;
398 348
349 INIT_LIST_HEAD(&p->validated);
350
351 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
399 if (p->bo_list) { 352 if (p->bo_list) {
400 need_mmap_lock = p->bo_list->has_userptr; 353 need_mmap_lock = p->bo_list->has_userptr;
401 amdgpu_cs_buckets_init(&buckets); 354 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
402 for (i = 0; i < p->bo_list->num_entries; i++)
403 amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head,
404 p->bo_list->array[i].priority);
405
406 amdgpu_cs_buckets_get_list(&buckets, &p->validated);
407 } 355 }
408 356
409 INIT_LIST_HEAD(&duplicates); 357 INIT_LIST_HEAD(&duplicates);
410 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 358 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
411 359
412 if (p->uf.bo) 360 if (p->job->uf.bo)
413 list_add(&p->uf_entry.tv.head, &p->validated); 361 list_add(&p->uf_entry.tv.head, &p->validated);
414 362
415 if (need_mmap_lock) 363 if (need_mmap_lock)
@@ -421,11 +369,27 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
421 369
422 amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); 370 amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
423 371
424 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); 372 p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
373 p->bytes_moved = 0;
374
375 r = amdgpu_cs_list_validate(p, &duplicates);
376 if (r)
377 goto error_validate;
378
379 r = amdgpu_cs_list_validate(p, &p->validated);
425 if (r) 380 if (r)
426 goto error_validate; 381 goto error_validate;
427 382
428 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); 383 if (p->bo_list) {
384 struct amdgpu_vm *vm = &fpriv->vm;
385 unsigned i;
386
387 for (i = 0; i < p->bo_list->num_entries; i++) {
388 struct amdgpu_bo *bo = p->bo_list->array[i].robj;
389
390 p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
391 }
392 }
429 393
430error_validate: 394error_validate:
431 if (r) { 395 if (r) {
@@ -447,7 +411,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
447 411
448 list_for_each_entry(e, &p->validated, tv.head) { 412 list_for_each_entry(e, &p->validated, tv.head) {
449 struct reservation_object *resv = e->robj->tbo.resv; 413 struct reservation_object *resv = e->robj->tbo.resv;
450 r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp); 414 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
451 415
452 if (r) 416 if (r)
453 return r; 417 return r;
@@ -510,11 +474,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
510 for (i = 0; i < parser->nchunks; i++) 474 for (i = 0; i < parser->nchunks; i++)
511 drm_free_large(parser->chunks[i].kdata); 475 drm_free_large(parser->chunks[i].kdata);
512 kfree(parser->chunks); 476 kfree(parser->chunks);
513 if (parser->ibs) 477 if (parser->job)
514 for (i = 0; i < parser->num_ibs; i++) 478 amdgpu_job_free(parser->job);
515 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
516 kfree(parser->ibs);
517 amdgpu_bo_unref(&parser->uf.bo);
518 amdgpu_bo_unref(&parser->uf_entry.robj); 479 amdgpu_bo_unref(&parser->uf_entry.robj);
519} 480}
520 481
@@ -530,7 +491,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
530 if (r) 491 if (r)
531 return r; 492 return r;
532 493
533 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); 494 r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
534 if (r) 495 if (r)
535 return r; 496 return r;
536 497
@@ -556,14 +517,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
556 return r; 517 return r;
557 518
558 f = bo_va->last_pt_update; 519 f = bo_va->last_pt_update;
559 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); 520 r = amdgpu_sync_fence(adev, &p->job->sync, f);
560 if (r) 521 if (r)
561 return r; 522 return r;
562 } 523 }
563 524
564 } 525 }
565 526
566 r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); 527 r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
567 528
568 if (amdgpu_vm_debug && p->bo_list) { 529 if (amdgpu_vm_debug && p->bo_list) {
569 /* Invalidate all BOs to test for userspace bugs */ 530 /* Invalidate all BOs to test for userspace bugs */
@@ -581,29 +542,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
581} 542}
582 543
583static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 544static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
584 struct amdgpu_cs_parser *parser) 545 struct amdgpu_cs_parser *p)
585{ 546{
586 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 547 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
587 struct amdgpu_vm *vm = &fpriv->vm; 548 struct amdgpu_vm *vm = &fpriv->vm;
588 struct amdgpu_ring *ring; 549 struct amdgpu_ring *ring = p->job->ring;
589 int i, r; 550 int i, r;
590 551
591 if (parser->num_ibs == 0)
592 return 0;
593
594 /* Only for UVD/VCE VM emulation */ 552 /* Only for UVD/VCE VM emulation */
595 for (i = 0; i < parser->num_ibs; i++) { 553 if (ring->funcs->parse_cs) {
596 ring = parser->ibs[i].ring; 554 for (i = 0; i < p->job->num_ibs; i++) {
597 if (ring->funcs->parse_cs) { 555 r = amdgpu_ring_parse_cs(ring, p, i);
598 r = amdgpu_ring_parse_cs(ring, parser, i);
599 if (r) 556 if (r)
600 return r; 557 return r;
601 } 558 }
602 } 559 }
603 560
604 r = amdgpu_bo_vm_update_pte(parser, vm); 561 r = amdgpu_bo_vm_update_pte(p, vm);
605 if (!r) 562 if (!r)
606 amdgpu_cs_sync_rings(parser); 563 amdgpu_cs_sync_rings(p);
607 564
608 return r; 565 return r;
609} 566}
@@ -626,14 +583,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
626 int i, j; 583 int i, j;
627 int r; 584 int r;
628 585
629 for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) { 586 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
630 struct amdgpu_cs_chunk *chunk; 587 struct amdgpu_cs_chunk *chunk;
631 struct amdgpu_ib *ib; 588 struct amdgpu_ib *ib;
632 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 589 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
633 struct amdgpu_ring *ring; 590 struct amdgpu_ring *ring;
634 591
635 chunk = &parser->chunks[i]; 592 chunk = &parser->chunks[i];
636 ib = &parser->ibs[j]; 593 ib = &parser->job->ibs[j];
637 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 594 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
638 595
639 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 596 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
@@ -645,6 +602,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
645 if (r) 602 if (r)
646 return r; 603 return r;
647 604
605 if (parser->job->ring && parser->job->ring != ring)
606 return -EINVAL;
607
608 parser->job->ring = ring;
609
648 if (ring->funcs->parse_cs) { 610 if (ring->funcs->parse_cs) {
649 struct amdgpu_bo_va_mapping *m; 611 struct amdgpu_bo_va_mapping *m;
650 struct amdgpu_bo *aobj = NULL; 612 struct amdgpu_bo *aobj = NULL;
@@ -673,7 +635,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
673 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; 635 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
674 kptr += chunk_ib->va_start - offset; 636 kptr += chunk_ib->va_start - offset;
675 637
676 r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib); 638 r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
677 if (r) { 639 if (r) {
678 DRM_ERROR("Failed to get ib !\n"); 640 DRM_ERROR("Failed to get ib !\n");
679 return r; 641 return r;
@@ -682,7 +644,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
682 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 644 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
683 amdgpu_bo_kunmap(aobj); 645 amdgpu_bo_kunmap(aobj);
684 } else { 646 } else {
685 r = amdgpu_ib_get(ring, vm, 0, ib); 647 r = amdgpu_ib_get(adev, vm, 0, ib);
686 if (r) { 648 if (r) {
687 DRM_ERROR("Failed to get ib !\n"); 649 DRM_ERROR("Failed to get ib !\n");
688 return r; 650 return r;
@@ -697,15 +659,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
697 j++; 659 j++;
698 } 660 }
699 661
700 if (!parser->num_ibs)
701 return 0;
702
703 /* add GDS resources to first IB */ 662 /* add GDS resources to first IB */
704 if (parser->bo_list) { 663 if (parser->bo_list) {
705 struct amdgpu_bo *gds = parser->bo_list->gds_obj; 664 struct amdgpu_bo *gds = parser->bo_list->gds_obj;
706 struct amdgpu_bo *gws = parser->bo_list->gws_obj; 665 struct amdgpu_bo *gws = parser->bo_list->gws_obj;
707 struct amdgpu_bo *oa = parser->bo_list->oa_obj; 666 struct amdgpu_bo *oa = parser->bo_list->oa_obj;
708 struct amdgpu_ib *ib = &parser->ibs[0]; 667 struct amdgpu_ib *ib = &parser->job->ibs[0];
709 668
710 if (gds) { 669 if (gds) {
711 ib->gds_base = amdgpu_bo_gpu_offset(gds); 670 ib->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -721,15 +680,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
721 } 680 }
722 } 681 }
723 /* wrap the last IB with user fence */ 682 /* wrap the last IB with user fence */
724 if (parser->uf.bo) { 683 if (parser->job->uf.bo) {
725 struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; 684 struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
726 685
727 /* UVD & VCE fw doesn't support user fences */ 686 /* UVD & VCE fw doesn't support user fences */
728 if (ib->ring->type == AMDGPU_RING_TYPE_UVD || 687 if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
729 ib->ring->type == AMDGPU_RING_TYPE_VCE) 688 parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
730 return -EINVAL; 689 return -EINVAL;
731 690
732 ib->user = &parser->uf; 691 ib->user = &parser->job->uf;
733 } 692 }
734 693
735 return 0; 694 return 0;
@@ -739,14 +698,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
739 struct amdgpu_cs_parser *p) 698 struct amdgpu_cs_parser *p)
740{ 699{
741 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 700 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
742 struct amdgpu_ib *ib;
743 int i, j, r; 701 int i, j, r;
744 702
745 if (!p->num_ibs)
746 return 0;
747
748 /* Add dependencies to first IB */
749 ib = &p->ibs[0];
750 for (i = 0; i < p->nchunks; ++i) { 703 for (i = 0; i < p->nchunks; ++i) {
751 struct drm_amdgpu_cs_chunk_dep *deps; 704 struct drm_amdgpu_cs_chunk_dep *deps;
752 struct amdgpu_cs_chunk *chunk; 705 struct amdgpu_cs_chunk *chunk;
@@ -784,7 +737,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
784 return r; 737 return r;
785 738
786 } else if (fence) { 739 } else if (fence) {
787 r = amdgpu_sync_fence(adev, &ib->sync, fence); 740 r = amdgpu_sync_fence(adev, &p->job->sync,
741 fence);
788 fence_put(fence); 742 fence_put(fence);
789 amdgpu_ctx_put(ctx); 743 amdgpu_ctx_put(ctx);
790 if (r) 744 if (r)
@@ -796,15 +750,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
796 return 0; 750 return 0;
797} 751}
798 752
799static int amdgpu_cs_free_job(struct amdgpu_job *job) 753static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
754 union drm_amdgpu_cs *cs)
800{ 755{
801 int i; 756 struct amdgpu_ring *ring = p->job->ring;
802 if (job->ibs) 757 struct amd_sched_fence *fence;
803 for (i = 0; i < job->num_ibs; i++) 758 struct amdgpu_job *job;
804 amdgpu_ib_free(job->adev, &job->ibs[i]); 759
805 kfree(job->ibs); 760 job = p->job;
806 if (job->uf.bo) 761 p->job = NULL;
807 amdgpu_bo_unref(&job->uf.bo); 762
763 job->base.sched = &ring->sched;
764 job->base.s_entity = &p->ctx->rings[ring->idx].entity;
765 job->owner = p->filp;
766
767 fence = amd_sched_fence_create(job->base.s_entity, p->filp);
768 if (!fence) {
769 amdgpu_job_free(job);
770 return -ENOMEM;
771 }
772
773 job->base.s_fence = fence;
774 p->fence = fence_get(&fence->base);
775
776 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring,
777 &fence->base);
778 job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
779
780 trace_amdgpu_cs_ioctl(job);
781 amd_sched_entity_push_job(&job->base);
782
808 return 0; 783 return 0;
809} 784}
810 785
@@ -829,7 +804,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
829 r = amdgpu_cs_handle_lockup(adev, r); 804 r = amdgpu_cs_handle_lockup(adev, r);
830 return r; 805 return r;
831 } 806 }
832 r = amdgpu_cs_parser_relocs(&parser); 807 r = amdgpu_cs_parser_bos(&parser, data);
833 if (r == -ENOMEM) 808 if (r == -ENOMEM)
834 DRM_ERROR("Not enough memory for command submission!\n"); 809 DRM_ERROR("Not enough memory for command submission!\n");
835 else if (r && r != -ERESTARTSYS) 810 else if (r && r != -ERESTARTSYS)
@@ -848,68 +823,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
848 if (r) 823 if (r)
849 goto out; 824 goto out;
850 825
851 for (i = 0; i < parser.num_ibs; i++) 826 for (i = 0; i < parser.job->num_ibs; i++)
852 trace_amdgpu_cs(&parser, i); 827 trace_amdgpu_cs(&parser, i);
853 828
854 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 829 r = amdgpu_cs_ib_vm_chunk(adev, &parser);
855 if (r) 830 if (r)
856 goto out; 831 goto out;
857 832
858 if (amdgpu_enable_scheduler && parser.num_ibs) { 833 r = amdgpu_cs_submit(&parser, cs);
859 struct amdgpu_ring * ring = parser.ibs->ring;
860 struct amd_sched_fence *fence;
861 struct amdgpu_job *job;
862
863 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
864 if (!job) {
865 r = -ENOMEM;
866 goto out;
867 }
868
869 job->base.sched = &ring->sched;
870 job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
871 job->adev = parser.adev;
872 job->owner = parser.filp;
873 job->free_job = amdgpu_cs_free_job;
874
875 job->ibs = parser.ibs;
876 job->num_ibs = parser.num_ibs;
877 parser.ibs = NULL;
878 parser.num_ibs = 0;
879
880 if (job->ibs[job->num_ibs - 1].user) {
881 job->uf = parser.uf;
882 job->ibs[job->num_ibs - 1].user = &job->uf;
883 parser.uf.bo = NULL;
884 }
885
886 fence = amd_sched_fence_create(job->base.s_entity,
887 parser.filp);
888 if (!fence) {
889 r = -ENOMEM;
890 amdgpu_cs_free_job(job);
891 kfree(job);
892 goto out;
893 }
894 job->base.s_fence = fence;
895 parser.fence = fence_get(&fence->base);
896
897 cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
898 &fence->base);
899 job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
900
901 trace_amdgpu_cs_ioctl(job);
902 amd_sched_entity_push_job(&job->base);
903
904 } else {
905 struct amdgpu_fence *fence;
906
907 r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
908 parser.filp);
909 fence = parser.ibs[parser.num_ibs - 1].fence;
910 parser.fence = fence_get(&fence->base);
911 cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
912 }
913 834
914out: 835out:
915 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 836 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
@@ -980,30 +901,36 @@ struct amdgpu_bo_va_mapping *
980amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 901amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
981 uint64_t addr, struct amdgpu_bo **bo) 902 uint64_t addr, struct amdgpu_bo **bo)
982{ 903{
983 struct amdgpu_bo_list_entry *reloc;
984 struct amdgpu_bo_va_mapping *mapping; 904 struct amdgpu_bo_va_mapping *mapping;
905 unsigned i;
906
907 if (!parser->bo_list)
908 return NULL;
985 909
986 addr /= AMDGPU_GPU_PAGE_SIZE; 910 addr /= AMDGPU_GPU_PAGE_SIZE;
987 911
988 list_for_each_entry(reloc, &parser->validated, tv.head) { 912 for (i = 0; i < parser->bo_list->num_entries; i++) {
989 if (!reloc->bo_va) 913 struct amdgpu_bo_list_entry *lobj;
914
915 lobj = &parser->bo_list->array[i];
916 if (!lobj->bo_va)
990 continue; 917 continue;
991 918
992 list_for_each_entry(mapping, &reloc->bo_va->valids, list) { 919 list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
993 if (mapping->it.start > addr || 920 if (mapping->it.start > addr ||
994 addr > mapping->it.last) 921 addr > mapping->it.last)
995 continue; 922 continue;
996 923
997 *bo = reloc->bo_va->bo; 924 *bo = lobj->bo_va->bo;
998 return mapping; 925 return mapping;
999 } 926 }
1000 927
1001 list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { 928 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
1002 if (mapping->it.start > addr || 929 if (mapping->it.start > addr ||
1003 addr > mapping->it.last) 930 addr > mapping->it.last)
1004 continue; 931 continue;
1005 932
1006 *bo = reloc->bo_va->bo; 933 *bo = lobj->bo_va->bo;
1007 return mapping; 934 return mapping;
1008 } 935 }
1009 } 936 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 17d1fb12128a..17e13621fae9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,8 +25,7 @@
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27 27
28int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, 28static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
29 struct amdgpu_ctx *ctx)
30{ 29{
31 unsigned i, j; 30 unsigned i, j;
32 int r; 31 int r;
@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
35 ctx->adev = adev; 34 ctx->adev = adev;
36 kref_init(&ctx->refcount); 35 kref_init(&ctx->refcount);
37 spin_lock_init(&ctx->ring_lock); 36 spin_lock_init(&ctx->ring_lock);
38 ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs * 37 ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
39 AMDGPU_MAX_RINGS, GFP_KERNEL); 38 sizeof(struct fence*), GFP_KERNEL);
40 if (!ctx->fences) 39 if (!ctx->fences)
41 return -ENOMEM; 40 return -ENOMEM;
42 41
43 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 42 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
44 ctx->rings[i].sequence = 1; 43 ctx->rings[i].sequence = 1;
45 ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) * 44 ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
46 amdgpu_sched_jobs * i;
47 } 45 }
48 if (amdgpu_enable_scheduler) { 46 /* create context entity for each ring */
49 /* create context entity for each ring */ 47 for (i = 0; i < adev->num_rings; i++) {
50 for (i = 0; i < adev->num_rings; i++) { 48 struct amdgpu_ring *ring = adev->rings[i];
51 struct amd_sched_rq *rq; 49 struct amd_sched_rq *rq;
52 if (pri >= AMD_SCHED_MAX_PRIORITY) { 50
53 kfree(ctx->fences); 51 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
54 return -EINVAL; 52 r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
55 } 53 rq, amdgpu_sched_jobs);
56 rq = &adev->rings[i]->sched.sched_rq[pri]; 54 if (r)
57 r = amd_sched_entity_init(&adev->rings[i]->sched, 55 break;
58 &ctx->rings[i].entity, 56 }
59 rq, amdgpu_sched_jobs); 57
60 if (r) 58 if (i < adev->num_rings) {
61 break; 59 for (j = 0; j < i; j++)
62 } 60 amd_sched_entity_fini(&adev->rings[j]->sched,
63 61 &ctx->rings[j].entity);
64 if (i < adev->num_rings) { 62 kfree(ctx->fences);
65 for (j = 0; j < i; j++) 63 return r;
66 amd_sched_entity_fini(&adev->rings[j]->sched,
67 &ctx->rings[j].entity);
68 kfree(ctx->fences);
69 return r;
70 }
71 } 64 }
72 return 0; 65 return 0;
73} 66}
74 67
75void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) 68static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
76{ 69{
77 struct amdgpu_device *adev = ctx->adev; 70 struct amdgpu_device *adev = ctx->adev;
78 unsigned i, j; 71 unsigned i, j;
@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
85 fence_put(ctx->rings[i].fences[j]); 78 fence_put(ctx->rings[i].fences[j]);
86 kfree(ctx->fences); 79 kfree(ctx->fences);
87 80
88 if (amdgpu_enable_scheduler) { 81 for (i = 0; i < adev->num_rings; i++)
89 for (i = 0; i < adev->num_rings; i++) 82 amd_sched_entity_fini(&adev->rings[i]->sched,
90 amd_sched_entity_fini(&adev->rings[i]->sched, 83 &ctx->rings[i].entity);
91 &ctx->rings[i].entity);
92 }
93} 84}
94 85
95static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 86static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
112 return r; 103 return r;
113 } 104 }
114 *id = (uint32_t)r; 105 *id = (uint32_t)r;
115 r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx); 106 r = amdgpu_ctx_init(adev, ctx);
116 if (r) { 107 if (r) {
117 idr_remove(&mgr->ctx_handles, *id); 108 idr_remove(&mgr->ctx_handles, *id);
118 *id = 0; 109 *id = 0;
@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
200 id = args->in.ctx_id; 191 id = args->in.ctx_id;
201 192
202 switch (args->in.op) { 193 switch (args->in.op) {
203 case AMDGPU_CTX_OP_ALLOC_CTX: 194 case AMDGPU_CTX_OP_ALLOC_CTX:
204 r = amdgpu_ctx_alloc(adev, fpriv, &id); 195 r = amdgpu_ctx_alloc(adev, fpriv, &id);
205 args->out.alloc.ctx_id = id; 196 args->out.alloc.ctx_id = id;
206 break; 197 break;
207 case AMDGPU_CTX_OP_FREE_CTX: 198 case AMDGPU_CTX_OP_FREE_CTX:
208 r = amdgpu_ctx_free(fpriv, id); 199 r = amdgpu_ctx_free(fpriv, id);
209 break; 200 break;
210 case AMDGPU_CTX_OP_QUERY_STATE: 201 case AMDGPU_CTX_OP_QUERY_STATE:
211 r = amdgpu_ctx_query(adev, fpriv, id, &args->out); 202 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
212 break; 203 break;
213 default: 204 default:
214 return -EINVAL; 205 return -EINVAL;
215 } 206 }
216 207
217 return r; 208 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 65531463f88e..db20d2783def 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -636,31 +636,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)
636} 636}
637 637
638/** 638/**
639 * amdgpu_boot_test_post_card - check and possibly initialize the hw
640 *
641 * @adev: amdgpu_device pointer
642 *
643 * Check if the asic is initialized and if not, attempt to initialize
644 * it (all asics).
645 * Returns true if initialized or false if not.
646 */
647bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
648{
649 if (amdgpu_card_posted(adev))
650 return true;
651
652 if (adev->bios) {
653 DRM_INFO("GPU not posted. posting now...\n");
654 if (adev->is_atom_bios)
655 amdgpu_atom_asic_init(adev->mode_info.atom_context);
656 return true;
657 } else {
658 dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
659 return false;
660 }
661}
662
663/**
664 * amdgpu_dummy_page_init - init dummy page used by the driver 639 * amdgpu_dummy_page_init - init dummy page used by the driver
665 * 640 *
666 * @adev: amdgpu_device pointer 641 * @adev: amdgpu_device pointer
@@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
959 amdgpu_sched_jobs); 934 amdgpu_sched_jobs);
960 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 935 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
961 } 936 }
962 /* vramlimit must be a power of two */
963 if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
964 dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
965 amdgpu_vram_limit);
966 amdgpu_vram_limit = 0;
967 }
968 937
969 if (amdgpu_gart_size != -1) { 938 if (amdgpu_gart_size != -1) {
970 /* gtt size must be power of two and greater or equal to 32M */ 939 /* gtt size must be power of two and greater or equal to 32M */
@@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1434 adev->mman.buffer_funcs = NULL; 1403 adev->mman.buffer_funcs = NULL;
1435 adev->mman.buffer_funcs_ring = NULL; 1404 adev->mman.buffer_funcs_ring = NULL;
1436 adev->vm_manager.vm_pte_funcs = NULL; 1405 adev->vm_manager.vm_pte_funcs = NULL;
1437 adev->vm_manager.vm_pte_funcs_ring = NULL; 1406 adev->vm_manager.vm_pte_num_rings = 0;
1438 adev->gart.gart_funcs = NULL; 1407 adev->gart.gart_funcs = NULL;
1439 adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); 1408 adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
1440 1409
@@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1455 1424
1456 /* mutex initialization are all done here so we 1425 /* mutex initialization are all done here so we
1457 * can recall function without having locking issues */ 1426 * can recall function without having locking issues */
1458 mutex_init(&adev->ring_lock); 1427 mutex_init(&adev->vm_manager.lock);
1459 atomic_set(&adev->irq.ih.lock, 0); 1428 atomic_set(&adev->irq.ih.lock, 0);
1460 mutex_init(&adev->gem.mutex);
1461 mutex_init(&adev->pm.mutex); 1429 mutex_init(&adev->pm.mutex);
1462 mutex_init(&adev->gfx.gpu_clock_mutex); 1430 mutex_init(&adev->gfx.gpu_clock_mutex);
1463 mutex_init(&adev->srbm_mutex); 1431 mutex_init(&adev->srbm_mutex);
@@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1531 return r; 1499 return r;
1532 } 1500 }
1533 1501
1502 /* See if the asic supports SR-IOV */
1503 adev->virtualization.supports_sr_iov =
1504 amdgpu_atombios_has_gpu_virtualization_table(adev);
1505
1534 /* Post card if necessary */ 1506 /* Post card if necessary */
1535 if (!amdgpu_card_posted(adev)) { 1507 if (!amdgpu_card_posted(adev) ||
1508 adev->virtualization.supports_sr_iov) {
1536 if (!adev->bios) { 1509 if (!adev->bios) {
1537 dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); 1510 dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
1538 return -EINVAL; 1511 return -EINVAL;
@@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1577 return r; 1550 return r;
1578 } 1551 }
1579 1552
1580 r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
1581 if (r) {
1582 dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
1583 return r;
1584 }
1585 r = amdgpu_ib_ring_tests(adev); 1553 r = amdgpu_ib_ring_tests(adev);
1586 if (r) 1554 if (r)
1587 DRM_ERROR("ib ring test failed (%d).\n", r); 1555 DRM_ERROR("ib ring test failed (%d).\n", r);
@@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
1645 adev->shutdown = true; 1613 adev->shutdown = true;
1646 /* evict vram memory */ 1614 /* evict vram memory */
1647 amdgpu_bo_evict_vram(adev); 1615 amdgpu_bo_evict_vram(adev);
1648 amdgpu_ctx_fini(&adev->kernel_ctx);
1649 amdgpu_ib_pool_fini(adev); 1616 amdgpu_ib_pool_fini(adev);
1650 amdgpu_fence_driver_fini(adev); 1617 amdgpu_fence_driver_fini(adev);
1651 amdgpu_fbdev_fini(adev); 1618 amdgpu_fbdev_fini(adev);
@@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
1889 1856
1890retry: 1857retry:
1891 r = amdgpu_asic_reset(adev); 1858 r = amdgpu_asic_reset(adev);
1859 /* post card */
1860 amdgpu_atom_asic_init(adev->mode_info.atom_context);
1861
1892 if (!r) { 1862 if (!r) {
1893 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 1863 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
1894 r = amdgpu_resume(adev); 1864 r = amdgpu_resume(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index acd066d0a805..2cb53c24dec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,32 +35,30 @@
35#include <drm/drm_crtc_helper.h> 35#include <drm/drm_crtc_helper.h>
36#include <drm/drm_edid.h> 36#include <drm/drm_edid.h>
37 37
38static void amdgpu_flip_wait_fence(struct amdgpu_device *adev, 38static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
39 struct fence **f)
40{ 39{
41 struct amdgpu_fence *fence; 40 struct amdgpu_flip_work *work =
42 long r; 41 container_of(cb, struct amdgpu_flip_work, cb);
43 42
44 if (*f == NULL) 43 fence_put(f);
45 return; 44 schedule_work(&work->flip_work);
45}
46 46
47 fence = to_amdgpu_fence(*f); 47static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
48 if (fence) { 48 struct fence **f)
49 r = fence_wait(&fence->base, false); 49{
50 if (r == -EDEADLK) 50 struct fence *fence= *f;
51 r = amdgpu_gpu_reset(adev);
52 } else
53 r = fence_wait(*f, false);
54 51
55 if (r) 52 if (fence == NULL)
56 DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r); 53 return false;
57 54
58 /* We continue with the page flip even if we failed to wait on
59 * the fence, otherwise the DRM core and userspace will be
60 * confused about which BO the CRTC is scanning out
61 */
62 fence_put(*f);
63 *f = NULL; 55 *f = NULL;
56
57 if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
58 return true;
59
60 fence_put(*f);
61 return false;
64} 62}
65 63
66static void amdgpu_flip_work_func(struct work_struct *__work) 64static void amdgpu_flip_work_func(struct work_struct *__work)
@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
76 int vpos, hpos, stat, min_udelay; 74 int vpos, hpos, stat, min_udelay;
77 struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; 75 struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
78 76
79 amdgpu_flip_wait_fence(adev, &work->excl); 77 if (amdgpu_flip_handle_fence(work, &work->excl))
78 return;
79
80 for (i = 0; i < work->shared_count; ++i) 80 for (i = 0; i < work->shared_count; ++i)
81 amdgpu_flip_wait_fence(adev, &work->shared[i]); 81 if (amdgpu_flip_handle_fence(work, &work->shared[i]))
82 return;
82 83
83 /* We borrow the event spin lock for protecting flip_status */ 84 /* We borrow the event spin lock for protecting flip_status */
84 spin_lock_irqsave(&crtc->dev->event_lock, flags); 85 spin_lock_irqsave(&crtc->dev->event_lock, flags);
@@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
118 spin_lock_irqsave(&crtc->dev->event_lock, flags); 119 spin_lock_irqsave(&crtc->dev->event_lock, flags);
119 }; 120 };
120 121
121 /* do the flip (mmio) */
122 adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
123 /* set the flip status */ 122 /* set the flip status */
124 amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; 123 amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
125
126 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 124 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
125
126 /* Do the flip (mmio) */
127 adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
127} 128}
128 129
129/* 130/*
@@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
242 /* update crtc fb */ 243 /* update crtc fb */
243 crtc->primary->fb = fb; 244 crtc->primary->fb = fb;
244 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 245 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
245 queue_work(amdgpu_crtc->pflip_queue, &work->flip_work); 246 amdgpu_flip_work_func(&work->flip_work);
246 return 0; 247 return 0;
247 248
248vblank_cleanup: 249vblank_cleanup:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9c1af8976bef..ce79a8b605a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
69int amdgpu_smc_load_fw = 1; 69int amdgpu_smc_load_fw = 1;
70int amdgpu_aspm = -1; 70int amdgpu_aspm = -1;
71int amdgpu_runtime_pm = -1; 71int amdgpu_runtime_pm = -1;
72int amdgpu_hard_reset = 0;
73unsigned amdgpu_ip_block_mask = 0xffffffff; 72unsigned amdgpu_ip_block_mask = 0xffffffff;
74int amdgpu_bapm = -1; 73int amdgpu_bapm = -1;
75int amdgpu_deep_color = 0; 74int amdgpu_deep_color = 0;
@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
78int amdgpu_vm_fault_stop = 0; 77int amdgpu_vm_fault_stop = 0;
79int amdgpu_vm_debug = 0; 78int amdgpu_vm_debug = 0;
80int amdgpu_exp_hw_support = 0; 79int amdgpu_exp_hw_support = 0;
81int amdgpu_enable_scheduler = 1;
82int amdgpu_sched_jobs = 32; 80int amdgpu_sched_jobs = 32;
83int amdgpu_sched_hw_submission = 2; 81int amdgpu_sched_hw_submission = 2;
84int amdgpu_enable_semaphores = 0;
85int amdgpu_powerplay = -1; 82int amdgpu_powerplay = -1;
86 83
87MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 84MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
@@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
126MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); 123MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
127module_param_named(runpm, amdgpu_runtime_pm, int, 0444); 124module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
128 125
129MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
130module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
131
132MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); 126MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
133module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); 127module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
134 128
@@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
153MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); 147MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
154module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); 148module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
155 149
156MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
157module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
158
159MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); 150MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
160module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); 151module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
161 152
162MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); 153MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
163module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); 154module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
164 155
165MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
166module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
167
168#ifdef CONFIG_DRM_AMD_POWERPLAY 156#ifdef CONFIG_DRM_AMD_POWERPLAY
169MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); 157MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
170module_param_named(powerplay, amdgpu_powerplay, int, 0444); 158module_param_named(powerplay, amdgpu_powerplay, int, 0444);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3671f9f220bd..97db196dc6f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
107 if ((*fence) == NULL) { 107 if ((*fence) == NULL) {
108 return -ENOMEM; 108 return -ENOMEM;
109 } 109 }
110 (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx]; 110 (*fence)->seq = ++ring->fence_drv.sync_seq;
111 (*fence)->ring = ring; 111 (*fence)->ring = ring;
112 (*fence)->owner = owner; 112 (*fence)->owner = owner;
113 fence_init(&(*fence)->base, &amdgpu_fence_ops, 113 fence_init(&(*fence)->base, &amdgpu_fence_ops,
@@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
171 */ 171 */
172 last_seq = atomic64_read(&ring->fence_drv.last_seq); 172 last_seq = atomic64_read(&ring->fence_drv.last_seq);
173 do { 173 do {
174 last_emitted = ring->fence_drv.sync_seq[ring->idx]; 174 last_emitted = ring->fence_drv.sync_seq;
175 seq = amdgpu_fence_read(ring); 175 seq = amdgpu_fence_read(ring);
176 seq |= last_seq & 0xffffffff00000000LL; 176 seq |= last_seq & 0xffffffff00000000LL;
177 if (seq < last_seq) { 177 if (seq < last_seq) {
@@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
260} 260}
261 261
262/* 262/*
263 * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal 263 * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
264 * @ring: ring to wait on for the seq number 264 * @ring: ring to wait on for the seq number
265 * @seq: seq number wait for 265 * @seq: seq number wait for
266 * 266 *
267 * return value: 267 * return value:
268 * 0: seq signaled, and gpu not hang 268 * 0: seq signaled, and gpu not hang
269 * -EDEADL: GPU hang detected
270 * -EINVAL: some paramter is not valid 269 * -EINVAL: some paramter is not valid
271 */ 270 */
272static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) 271static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
273{ 272{
274 bool signaled = false;
275
276 BUG_ON(!ring); 273 BUG_ON(!ring);
277 if (seq > ring->fence_drv.sync_seq[ring->idx]) 274 if (seq > ring->fence_drv.sync_seq)
278 return -EINVAL; 275 return -EINVAL;
279 276
280 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 277 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
281 return 0; 278 return 0;
282 279
283 amdgpu_fence_schedule_fallback(ring); 280 amdgpu_fence_schedule_fallback(ring);
284 wait_event(ring->fence_drv.fence_queue, ( 281 wait_event(ring->fence_drv.fence_queue,
285 (signaled = amdgpu_fence_seq_signaled(ring, seq)))); 282 amdgpu_fence_seq_signaled(ring, seq));
286 283
287 if (signaled) 284 return 0;
288 return 0;
289 else
290 return -EDEADLK;
291} 285}
292 286
293/** 287/**
@@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
304{ 298{
305 uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; 299 uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
306 300
307 if (seq >= ring->fence_drv.sync_seq[ring->idx]) 301 if (seq >= ring->fence_drv.sync_seq)
308 return -ENOENT; 302 return -ENOENT;
309 303
310 return amdgpu_fence_ring_wait_seq(ring, seq); 304 return amdgpu_fence_ring_wait_seq(ring, seq);
@@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
322 */ 316 */
323int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) 317int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
324{ 318{
325 uint64_t seq = ring->fence_drv.sync_seq[ring->idx]; 319 uint64_t seq = ring->fence_drv.sync_seq;
326 320
327 if (!seq) 321 if (!seq)
328 return 0; 322 return 0;
@@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
347 * but it's ok to report slightly wrong fence count here. 341 * but it's ok to report slightly wrong fence count here.
348 */ 342 */
349 amdgpu_fence_process(ring); 343 amdgpu_fence_process(ring);
350 emitted = ring->fence_drv.sync_seq[ring->idx] 344 emitted = ring->fence_drv.sync_seq
351 - atomic64_read(&ring->fence_drv.last_seq); 345 - atomic64_read(&ring->fence_drv.last_seq);
352 /* to avoid 32bits warp around */ 346 /* to avoid 32bits warp around */
353 if (emitted > 0x10000000) 347 if (emitted > 0x10000000)
@@ -357,68 +351,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
357} 351}
358 352
359/** 353/**
360 * amdgpu_fence_need_sync - do we need a semaphore
361 *
362 * @fence: amdgpu fence object
363 * @dst_ring: which ring to check against
364 *
365 * Check if the fence needs to be synced against another ring
366 * (all asics). If so, we need to emit a semaphore.
367 * Returns true if we need to sync with another ring, false if
368 * not.
369 */
370bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
371 struct amdgpu_ring *dst_ring)
372{
373 struct amdgpu_fence_driver *fdrv;
374
375 if (!fence)
376 return false;
377
378 if (fence->ring == dst_ring)
379 return false;
380
381 /* we are protected by the ring mutex */
382 fdrv = &dst_ring->fence_drv;
383 if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
384 return false;
385
386 return true;
387}
388
389/**
390 * amdgpu_fence_note_sync - record the sync point
391 *
392 * @fence: amdgpu fence object
393 * @dst_ring: which ring to check against
394 *
395 * Note the sequence number at which point the fence will
396 * be synced with the requested ring (all asics).
397 */
398void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
399 struct amdgpu_ring *dst_ring)
400{
401 struct amdgpu_fence_driver *dst, *src;
402 unsigned i;
403
404 if (!fence)
405 return;
406
407 if (fence->ring == dst_ring)
408 return;
409
410 /* we are protected by the ring mutex */
411 src = &fence->ring->fence_drv;
412 dst = &dst_ring->fence_drv;
413 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
414 if (i == dst_ring->idx)
415 continue;
416
417 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
418 }
419}
420
421/**
422 * amdgpu_fence_driver_start_ring - make the fence driver 354 * amdgpu_fence_driver_start_ring - make the fence driver
423 * ready for use on the requested ring. 355 * ready for use on the requested ring.
424 * 356 *
@@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
471 */ 403 */
472int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) 404int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
473{ 405{
474 int i, r; 406 long timeout;
407 int r;
475 408
476 ring->fence_drv.cpu_addr = NULL; 409 ring->fence_drv.cpu_addr = NULL;
477 ring->fence_drv.gpu_addr = 0; 410 ring->fence_drv.gpu_addr = 0;
478 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 411 ring->fence_drv.sync_seq = 0;
479 ring->fence_drv.sync_seq[i] = 0;
480
481 atomic64_set(&ring->fence_drv.last_seq, 0); 412 atomic64_set(&ring->fence_drv.last_seq, 0);
482 ring->fence_drv.initialized = false; 413 ring->fence_drv.initialized = false;
483 414
@@ -486,26 +417,24 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
486 417
487 init_waitqueue_head(&ring->fence_drv.fence_queue); 418 init_waitqueue_head(&ring->fence_drv.fence_queue);
488 419
489 if (amdgpu_enable_scheduler) { 420 timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
490 long timeout = msecs_to_jiffies(amdgpu_lockup_timeout); 421 if (timeout == 0) {
491 if (timeout == 0) { 422 /*
492 /* 423 * FIXME:
493 * FIXME: 424 * Delayed workqueue cannot use it directly,
494 * Delayed workqueue cannot use it directly, 425 * so the scheduler will not use delayed workqueue if
495 * so the scheduler will not use delayed workqueue if 426 * MAX_SCHEDULE_TIMEOUT is set.
496 * MAX_SCHEDULE_TIMEOUT is set. 427 * Currently keep it simple and silly.
497 * Currently keep it simple and silly. 428 */
498 */ 429 timeout = MAX_SCHEDULE_TIMEOUT;
499 timeout = MAX_SCHEDULE_TIMEOUT; 430 }
500 } 431 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
501 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, 432 amdgpu_sched_hw_submission,
502 amdgpu_sched_hw_submission, 433 timeout, ring->name);
503 timeout, ring->name); 434 if (r) {
504 if (r) { 435 DRM_ERROR("Failed to create scheduler on ring %s.\n",
505 DRM_ERROR("Failed to create scheduler on ring %s.\n", 436 ring->name);
506 ring->name); 437 return r;
507 return r;
508 }
509 } 438 }
510 439
511 return 0; 440 return 0;
@@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
552 481
553 if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) 482 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
554 kmem_cache_destroy(amdgpu_fence_slab); 483 kmem_cache_destroy(amdgpu_fence_slab);
555 mutex_lock(&adev->ring_lock);
556 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 484 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
557 struct amdgpu_ring *ring = adev->rings[i]; 485 struct amdgpu_ring *ring = adev->rings[i];
558 486
@@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
570 del_timer_sync(&ring->fence_drv.fallback_timer); 498 del_timer_sync(&ring->fence_drv.fallback_timer);
571 ring->fence_drv.initialized = false; 499 ring->fence_drv.initialized = false;
572 } 500 }
573 mutex_unlock(&adev->ring_lock);
574} 501}
575 502
576/** 503/**
@@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
585{ 512{
586 int i, r; 513 int i, r;
587 514
588 mutex_lock(&adev->ring_lock);
589 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 515 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
590 struct amdgpu_ring *ring = adev->rings[i]; 516 struct amdgpu_ring *ring = adev->rings[i];
591 if (!ring || !ring->fence_drv.initialized) 517 if (!ring || !ring->fence_drv.initialized)
@@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
602 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 528 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
603 ring->fence_drv.irq_type); 529 ring->fence_drv.irq_type);
604 } 530 }
605 mutex_unlock(&adev->ring_lock);
606} 531}
607 532
608/** 533/**
@@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
621{ 546{
622 int i; 547 int i;
623 548
624 mutex_lock(&adev->ring_lock);
625 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 549 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
626 struct amdgpu_ring *ring = adev->rings[i]; 550 struct amdgpu_ring *ring = adev->rings[i];
627 if (!ring || !ring->fence_drv.initialized) 551 if (!ring || !ring->fence_drv.initialized)
@@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
631 amdgpu_irq_get(adev, ring->fence_drv.irq_src, 555 amdgpu_irq_get(adev, ring->fence_drv.irq_src,
632 ring->fence_drv.irq_type); 556 ring->fence_drv.irq_type);
633 } 557 }
634 mutex_unlock(&adev->ring_lock);
635} 558}
636 559
637/** 560/**
@@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
651 if (!ring || !ring->fence_drv.initialized) 574 if (!ring || !ring->fence_drv.initialized)
652 continue; 575 continue;
653 576
654 amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]); 577 amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
655 } 578 }
656} 579}
657 580
@@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
781 struct drm_info_node *node = (struct drm_info_node *)m->private; 704 struct drm_info_node *node = (struct drm_info_node *)m->private;
782 struct drm_device *dev = node->minor->dev; 705 struct drm_device *dev = node->minor->dev;
783 struct amdgpu_device *adev = dev->dev_private; 706 struct amdgpu_device *adev = dev->dev_private;
784 int i, j; 707 int i;
785 708
786 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 709 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
787 struct amdgpu_ring *ring = adev->rings[i]; 710 struct amdgpu_ring *ring = adev->rings[i];
@@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
794 seq_printf(m, "Last signaled fence 0x%016llx\n", 717 seq_printf(m, "Last signaled fence 0x%016llx\n",
795 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); 718 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
796 seq_printf(m, "Last emitted 0x%016llx\n", 719 seq_printf(m, "Last emitted 0x%016llx\n",
797 ring->fence_drv.sync_seq[i]); 720 ring->fence_drv.sync_seq);
798
799 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
800 struct amdgpu_ring *other = adev->rings[j];
801 if (i != j && other && other->fence_drv.initialized &&
802 ring->fence_drv.sync_seq[j])
803 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
804 j, ring->fence_drv.sync_seq[j]);
805 }
806 } 721 }
807 return 0; 722 return 0;
808} 723}
809 724
725/**
726 * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
727 *
728 * Manually trigger a gpu reset at the next fence wait.
729 */
730static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
731{
732 struct drm_info_node *node = (struct drm_info_node *) m->private;
733 struct drm_device *dev = node->minor->dev;
734 struct amdgpu_device *adev = dev->dev_private;
735
736 seq_printf(m, "gpu reset\n");
737 amdgpu_gpu_reset(adev);
738
739 return 0;
740}
741
810static struct drm_info_list amdgpu_debugfs_fence_list[] = { 742static struct drm_info_list amdgpu_debugfs_fence_list[] = {
811 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, 743 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
744 {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
812}; 745};
813#endif 746#endif
814 747
815int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) 748int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
816{ 749{
817#if defined(CONFIG_DEBUG_FS) 750#if defined(CONFIG_DEBUG_FS)
818 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); 751 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
819#else 752#else
820 return 0; 753 return 0;
821#endif 754#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7380f782cd14..2e26a517f2d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -83,24 +83,32 @@ retry:
83 return r; 83 return r;
84 } 84 }
85 *obj = &robj->gem_base; 85 *obj = &robj->gem_base;
86 robj->pid = task_pid_nr(current);
87
88 mutex_lock(&adev->gem.mutex);
89 list_add_tail(&robj->list, &adev->gem.objects);
90 mutex_unlock(&adev->gem.mutex);
91 86
92 return 0; 87 return 0;
93} 88}
94 89
95int amdgpu_gem_init(struct amdgpu_device *adev) 90void amdgpu_gem_force_release(struct amdgpu_device *adev)
96{ 91{
97 INIT_LIST_HEAD(&adev->gem.objects); 92 struct drm_device *ddev = adev->ddev;
98 return 0; 93 struct drm_file *file;
99}
100 94
101void amdgpu_gem_fini(struct amdgpu_device *adev) 95 mutex_lock(&ddev->struct_mutex);
102{ 96
103 amdgpu_bo_force_delete(adev); 97 list_for_each_entry(file, &ddev->filelist, lhead) {
98 struct drm_gem_object *gobj;
99 int handle;
100
101 WARN_ONCE(1, "Still active user space clients!\n");
102 spin_lock(&file->table_lock);
103 idr_for_each_entry(&file->object_idr, gobj, handle) {
104 WARN_ONCE(1, "And also active allocations!\n");
105 drm_gem_object_unreference(gobj);
106 }
107 idr_destroy(&file->object_idr);
108 spin_unlock(&file->table_lock);
109 }
110
111 mutex_unlock(&ddev->struct_mutex);
104} 112}
105 113
106/* 114/*
@@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
252 goto handle_lockup; 260 goto handle_lockup;
253 261
254 bo = gem_to_amdgpu_bo(gobj); 262 bo = gem_to_amdgpu_bo(gobj);
263 bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
264 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
255 r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); 265 r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
256 if (r) 266 if (r)
257 goto release_object; 267 goto release_object;
@@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
308 return -ENOENT; 318 return -ENOENT;
309 } 319 }
310 robj = gem_to_amdgpu_bo(gobj); 320 robj = gem_to_amdgpu_bo(gobj);
311 if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) || 321 if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
312 (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { 322 (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
313 drm_gem_object_unreference_unlocked(gobj); 323 drm_gem_object_unreference_unlocked(gobj);
314 return -EPERM; 324 return -EPERM;
@@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
628 638
629 info.bo_size = robj->gem_base.size; 639 info.bo_size = robj->gem_base.size;
630 info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; 640 info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
631 info.domains = robj->initial_domain; 641 info.domains = robj->prefered_domains;
632 info.domain_flags = robj->flags; 642 info.domain_flags = robj->flags;
633 amdgpu_bo_unreserve(robj); 643 amdgpu_bo_unreserve(robj);
634 if (copy_to_user(out, &info, sizeof(info))) 644 if (copy_to_user(out, &info, sizeof(info)))
@@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
636 break; 646 break;
637 } 647 }
638 case AMDGPU_GEM_OP_SET_PLACEMENT: 648 case AMDGPU_GEM_OP_SET_PLACEMENT:
639 if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) { 649 if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
640 r = -EPERM; 650 r = -EPERM;
641 amdgpu_bo_unreserve(robj); 651 amdgpu_bo_unreserve(robj);
642 break; 652 break;
643 } 653 }
644 robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM | 654 robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
645 AMDGPU_GEM_DOMAIN_GTT | 655 AMDGPU_GEM_DOMAIN_GTT |
646 AMDGPU_GEM_DOMAIN_CPU); 656 AMDGPU_GEM_DOMAIN_CPU);
657 robj->allowed_domains = robj->prefered_domains;
658 if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
659 robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
660
647 amdgpu_bo_unreserve(robj); 661 amdgpu_bo_unreserve(robj);
648 break; 662 break;
649 default: 663 default:
@@ -688,38 +702,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
688} 702}
689 703
690#if defined(CONFIG_DEBUG_FS) 704#if defined(CONFIG_DEBUG_FS)
705static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
706{
707 struct drm_gem_object *gobj = ptr;
708 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
709 struct seq_file *m = data;
710
711 unsigned domain;
712 const char *placement;
713 unsigned pin_count;
714
715 domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
716 switch (domain) {
717 case AMDGPU_GEM_DOMAIN_VRAM:
718 placement = "VRAM";
719 break;
720 case AMDGPU_GEM_DOMAIN_GTT:
721 placement = " GTT";
722 break;
723 case AMDGPU_GEM_DOMAIN_CPU:
724 default:
725 placement = " CPU";
726 break;
727 }
728 seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
729 id, amdgpu_bo_size(bo), placement,
730 amdgpu_bo_gpu_offset(bo));
731
732 pin_count = ACCESS_ONCE(bo->pin_count);
733 if (pin_count)
734 seq_printf(m, " pin count %d", pin_count);
735 seq_printf(m, "\n");
736
737 return 0;
738}
739
691static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) 740static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
692{ 741{
693 struct drm_info_node *node = (struct drm_info_node *)m->private; 742 struct drm_info_node *node = (struct drm_info_node *)m->private;
694 struct drm_device *dev = node->minor->dev; 743 struct drm_device *dev = node->minor->dev;
695 struct amdgpu_device *adev = dev->dev_private; 744 struct drm_file *file;
696 struct amdgpu_bo *rbo; 745 int r;
697 unsigned i = 0;
698 746
699 mutex_lock(&adev->gem.mutex); 747 r = mutex_lock_interruptible(&dev->struct_mutex);
700 list_for_each_entry(rbo, &adev->gem.objects, list) { 748 if (r)
701 unsigned domain; 749 return r;
702 const char *placement;
703 750
704 domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type); 751 list_for_each_entry(file, &dev->filelist, lhead) {
705 switch (domain) { 752 struct task_struct *task;
706 case AMDGPU_GEM_DOMAIN_VRAM: 753
707 placement = "VRAM"; 754 /*
708 break; 755 * Although we have a valid reference on file->pid, that does
709 case AMDGPU_GEM_DOMAIN_GTT: 756 * not guarantee that the task_struct who called get_pid() is
710 placement = " GTT"; 757 * still alive (e.g. get_pid(current) => fork() => exit()).
711 break; 758 * Therefore, we need to protect this ->comm access using RCU.
712 case AMDGPU_GEM_DOMAIN_CPU: 759 */
713 default: 760 rcu_read_lock();
714 placement = " CPU"; 761 task = pid_task(file->pid, PIDTYPE_PID);
715 break; 762 seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
716 } 763 task ? task->comm : "<unknown>");
717 seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n", 764 rcu_read_unlock();
718 i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20, 765
719 placement, (unsigned long)rbo->pid); 766 spin_lock(&file->table_lock);
720 i++; 767 idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
768 spin_unlock(&file->table_lock);
721 } 769 }
722 mutex_unlock(&adev->gem.mutex); 770
771 mutex_unlock(&dev->struct_mutex);
723 return 0; 772 return 0;
724} 773}
725 774
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9e25edafa721..b5bdd5d59b58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
55 * suballocator. 55 * suballocator.
56 * Returns 0 on success, error on failure. 56 * Returns 0 on success, error on failure.
57 */ 57 */
58int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, 58int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
59 unsigned size, struct amdgpu_ib *ib) 59 unsigned size, struct amdgpu_ib *ib)
60{ 60{
61 struct amdgpu_device *adev = ring->adev;
62 int r; 61 int r;
63 62
64 if (size) { 63 if (size) {
@@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
75 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 74 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
76 } 75 }
77 76
78 amdgpu_sync_create(&ib->sync);
79
80 ib->ring = ring;
81 ib->vm = vm; 77 ib->vm = vm;
82 78
83 return 0; 79 return 0;
@@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
93 */ 89 */
94void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) 90void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
95{ 91{
96 amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
97 amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); 92 amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
98 if (ib->fence) 93 if (ib->fence)
99 fence_put(&ib->fence->base); 94 fence_put(&ib->fence->base);
@@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
106 * @num_ibs: number of IBs to schedule 101 * @num_ibs: number of IBs to schedule
107 * @ibs: IB objects to schedule 102 * @ibs: IB objects to schedule
108 * @owner: owner for creating the fences 103 * @owner: owner for creating the fences
104 * @f: fence created during this submission
109 * 105 *
110 * Schedule an IB on the associated ring (all asics). 106 * Schedule an IB on the associated ring (all asics).
111 * Returns 0 on success, error on failure. 107 * Returns 0 on success, error on failure.
@@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
120 * a CONST_IB), it will be put on the ring prior to the DE IB. Prior 116 * a CONST_IB), it will be put on the ring prior to the DE IB. Prior
121 * to SI there was just a DE IB. 117 * to SI there was just a DE IB.
122 */ 118 */
123int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, 119int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
124 struct amdgpu_ib *ibs, void *owner) 120 struct amdgpu_ib *ibs, void *owner,
121 struct fence *last_vm_update,
122 struct fence **f)
125{ 123{
124 struct amdgpu_device *adev = ring->adev;
126 struct amdgpu_ib *ib = &ibs[0]; 125 struct amdgpu_ib *ib = &ibs[0];
127 struct amdgpu_ring *ring;
128 struct amdgpu_ctx *ctx, *old_ctx; 126 struct amdgpu_ctx *ctx, *old_ctx;
129 struct amdgpu_vm *vm; 127 struct amdgpu_vm *vm;
130 unsigned i; 128 unsigned i;
@@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
133 if (num_ibs == 0) 131 if (num_ibs == 0)
134 return -EINVAL; 132 return -EINVAL;
135 133
136 ring = ibs->ring;
137 ctx = ibs->ctx; 134 ctx = ibs->ctx;
138 vm = ibs->vm; 135 vm = ibs->vm;
139 136
@@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
141 dev_err(adev->dev, "couldn't schedule ib\n"); 138 dev_err(adev->dev, "couldn't schedule ib\n");
142 return -EINVAL; 139 return -EINVAL;
143 } 140 }
144 r = amdgpu_sync_wait(&ibs->sync);
145 if (r) {
146 dev_err(adev->dev, "IB sync failed (%d).\n", r);
147 return r;
148 }
149 r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
150 if (r) {
151 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
152 return r;
153 }
154 141
155 if (vm) { 142 if (vm && !ibs->grabbed_vmid) {
156 /* grab a vm id if necessary */ 143 dev_err(adev->dev, "VM IB without ID\n");
157 r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync); 144 return -EINVAL;
158 if (r) {
159 amdgpu_ring_unlock_undo(ring);
160 return r;
161 }
162 } 145 }
163 146
164 r = amdgpu_sync_rings(&ibs->sync, ring); 147 r = amdgpu_ring_alloc(ring, 256 * num_ibs);
165 if (r) { 148 if (r) {
166 amdgpu_ring_unlock_undo(ring); 149 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
167 dev_err(adev->dev, "failed to sync rings (%d)\n", r);
168 return r; 150 return r;
169 } 151 }
170 152
171 if (vm) { 153 if (vm) {
172 /* do context switch */ 154 /* do context switch */
173 amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update); 155 amdgpu_vm_flush(ring, vm, last_vm_update);
174 156
175 if (ring->funcs->emit_gds_switch) 157 if (ring->funcs->emit_gds_switch)
176 amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, 158 amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
@@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
186 for (i = 0; i < num_ibs; ++i) { 168 for (i = 0; i < num_ibs; ++i) {
187 ib = &ibs[i]; 169 ib = &ibs[i];
188 170
189 if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) { 171 if (ib->ctx != ctx || ib->vm != vm) {
190 ring->current_ctx = old_ctx; 172 ring->current_ctx = old_ctx;
191 amdgpu_ring_unlock_undo(ring); 173 amdgpu_ring_undo(ring);
192 return -EINVAL; 174 return -EINVAL;
193 } 175 }
194 amdgpu_ring_emit_ib(ring, ib); 176 amdgpu_ring_emit_ib(ring, ib);
@@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
199 if (r) { 181 if (r) {
200 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 182 dev_err(adev->dev, "failed to emit fence (%d)\n", r);
201 ring->current_ctx = old_ctx; 183 ring->current_ctx = old_ctx;
202 amdgpu_ring_unlock_undo(ring); 184 amdgpu_ring_undo(ring);
203 return r; 185 return r;
204 } 186 }
205 187
206 if (!amdgpu_enable_scheduler && ib->ctx)
207 ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
208 &ib->fence->base);
209
210 /* wrap the last IB with fence */ 188 /* wrap the last IB with fence */
211 if (ib->user) { 189 if (ib->user) {
212 uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); 190 uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
@@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
215 AMDGPU_FENCE_FLAG_64BIT); 193 AMDGPU_FENCE_FLAG_64BIT);
216 } 194 }
217 195
218 if (ib->vm) 196 if (f)
219 amdgpu_vm_fence(adev, ib->vm, &ib->fence->base); 197 *f = fence_get(&ib->fence->base);
220 198
221 amdgpu_ring_unlock_commit(ring); 199 amdgpu_ring_commit(ring);
222 return 0; 200 return 0;
223} 201}
224 202
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
new file mode 100644
index 000000000000..f29bbb96a881
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -0,0 +1,159 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 *
23 */
24#include <linux/kthread.h>
25#include <linux/wait.h>
26#include <linux/sched.h>
27#include <drm/drmP.h>
28#include "amdgpu.h"
29#include "amdgpu_trace.h"
30
31int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
32 struct amdgpu_job **job)
33{
34 size_t size = sizeof(struct amdgpu_job);
35
36 if (num_ibs == 0)
37 return -EINVAL;
38
39 size += sizeof(struct amdgpu_ib) * num_ibs;
40
41 *job = kzalloc(size, GFP_KERNEL);
42 if (!*job)
43 return -ENOMEM;
44
45 (*job)->adev = adev;
46 (*job)->ibs = (void *)&(*job)[1];
47 (*job)->num_ibs = num_ibs;
48
49 amdgpu_sync_create(&(*job)->sync);
50
51 return 0;
52}
53
54int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
55 struct amdgpu_job **job)
56{
57 int r;
58
59 r = amdgpu_job_alloc(adev, 1, job);
60 if (r)
61 return r;
62
63 r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
64 if (r)
65 kfree(*job);
66
67 return r;
68}
69
70void amdgpu_job_free(struct amdgpu_job *job)
71{
72 unsigned i;
73
74 for (i = 0; i < job->num_ibs; ++i)
75 amdgpu_ib_free(job->adev, &job->ibs[i]);
76
77 amdgpu_bo_unref(&job->uf.bo);
78 amdgpu_sync_free(&job->sync);
79 kfree(job);
80}
81
82int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
83 struct amd_sched_entity *entity, void *owner,
84 struct fence **f)
85{
86 job->ring = ring;
87 job->base.sched = &ring->sched;
88 job->base.s_entity = entity;
89 job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
90 if (!job->base.s_fence)
91 return -ENOMEM;
92
93 *f = fence_get(&job->base.s_fence->base);
94
95 job->owner = owner;
96 amd_sched_entity_push_job(&job->base);
97
98 return 0;
99}
100
101static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
102{
103 struct amdgpu_job *job = to_amdgpu_job(sched_job);
104 struct amdgpu_vm *vm = job->ibs->vm;
105
106 struct fence *fence = amdgpu_sync_get_fence(&job->sync);
107
108 if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
109 struct amdgpu_ring *ring = job->ring;
110 int r;
111
112 r = amdgpu_vm_grab_id(vm, ring, &job->sync,
113 &job->base.s_fence->base);
114 if (r)
115 DRM_ERROR("Error getting VM ID (%d)\n", r);
116 else
117 job->ibs->grabbed_vmid = true;
118
119 fence = amdgpu_sync_get_fence(&job->sync);
120 }
121
122 return fence;
123}
124
125static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
126{
127 struct fence *fence = NULL;
128 struct amdgpu_job *job;
129 int r;
130
131 if (!sched_job) {
132 DRM_ERROR("job is null\n");
133 return NULL;
134 }
135 job = to_amdgpu_job(sched_job);
136
137 r = amdgpu_sync_wait(&job->sync);
138 if (r) {
139 DRM_ERROR("failed to sync wait (%d)\n", r);
140 return NULL;
141 }
142
143 trace_amdgpu_sched_run_job(job);
144 r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
145 job->sync.last_vm_update, &fence);
146 if (r) {
147 DRM_ERROR("Error scheduling IBs (%d)\n", r);
148 goto err;
149 }
150
151err:
152 amdgpu_job_free(job);
153 return fence;
154}
155
156struct amd_sched_backend_ops amdgpu_sched_ops = {
157 .dependency = amdgpu_job_dependency,
158 .run_job = amdgpu_job_run,
159};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e23843f4d877..7805a8706af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
447 dev_info.max_memory_clock = adev->pm.default_mclk * 10; 447 dev_info.max_memory_clock = adev->pm.default_mclk * 10;
448 } 448 }
449 dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; 449 dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
450 dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * 450 dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
451 adev->gfx.config.max_shader_engines;
452 dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; 451 dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
453 dev_info._pad = 0; 452 dev_info._pad = 0;
454 dev_info.ids_flags = 0; 453 dev_info.ids_flags = 0;
@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
727 726
728 /* Get associated drm_crtc: */ 727 /* Get associated drm_crtc: */
729 crtc = &adev->mode_info.crtcs[pipe]->base; 728 crtc = &adev->mode_info.crtcs[pipe]->base;
729 if (!crtc) {
730 /* This can occur on driver load if some component fails to
731 * initialize completely and driver is unloaded */
732 DRM_ERROR("Uninitialized crtc %d\n", pipe);
733 return -EINVAL;
734 }
730 735
731 /* Helper routine in DRM core does all the work: */ 736 /* Helper routine in DRM core does all the work: */
732 return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, 737 return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index b1969f2b2038..d7ec9bd6755f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -48,8 +48,7 @@ struct amdgpu_mn {
48 /* protected by adev->mn_lock */ 48 /* protected by adev->mn_lock */
49 struct hlist_node node; 49 struct hlist_node node;
50 50
51 /* objects protected by lock */ 51 /* objects protected by mm->mmap_sem */
52 struct mutex lock;
53 struct rb_root objects; 52 struct rb_root objects;
54}; 53};
55 54
@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work)
73 struct amdgpu_bo *bo, *next_bo; 72 struct amdgpu_bo *bo, *next_bo;
74 73
75 mutex_lock(&adev->mn_lock); 74 mutex_lock(&adev->mn_lock);
76 mutex_lock(&rmn->lock); 75 down_write(&rmn->mm->mmap_sem);
77 hash_del(&rmn->node); 76 hash_del(&rmn->node);
78 rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, 77 rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
79 it.rb) { 78 it.rb) {
80
81 interval_tree_remove(&node->it, &rmn->objects);
82 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { 79 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
83 bo->mn = NULL; 80 bo->mn = NULL;
84 list_del_init(&bo->mn_list); 81 list_del_init(&bo->mn_list);
85 } 82 }
86 kfree(node); 83 kfree(node);
87 } 84 }
88 mutex_unlock(&rmn->lock); 85 up_write(&rmn->mm->mmap_sem);
89 mutex_unlock(&adev->mn_lock); 86 mutex_unlock(&adev->mn_lock);
90 mmu_notifier_unregister(&rmn->mn, rmn->mm); 87 mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
91 kfree(rmn); 88 kfree(rmn);
92} 89}
93 90
@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
129 /* notification is exclusive, but interval is inclusive */ 126 /* notification is exclusive, but interval is inclusive */
130 end -= 1; 127 end -= 1;
131 128
132 mutex_lock(&rmn->lock);
133
134 it = interval_tree_iter_first(&rmn->objects, start, end); 129 it = interval_tree_iter_first(&rmn->objects, start, end);
135 while (it) { 130 while (it) {
136 struct amdgpu_mn_node *node; 131 struct amdgpu_mn_node *node;
@@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
142 137
143 list_for_each_entry(bo, &node->bos, mn_list) { 138 list_for_each_entry(bo, &node->bos, mn_list) {
144 139
145 if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) 140 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
141 end))
146 continue; 142 continue;
147 143
148 r = amdgpu_bo_reserve(bo, true); 144 r = amdgpu_bo_reserve(bo, true);
@@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
164 amdgpu_bo_unreserve(bo); 160 amdgpu_bo_unreserve(bo);
165 } 161 }
166 } 162 }
167
168 mutex_unlock(&rmn->lock);
169} 163}
170 164
171static const struct mmu_notifier_ops amdgpu_mn_ops = { 165static const struct mmu_notifier_ops amdgpu_mn_ops = {
@@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
186 struct amdgpu_mn *rmn; 180 struct amdgpu_mn *rmn;
187 int r; 181 int r;
188 182
189 down_write(&mm->mmap_sem);
190 mutex_lock(&adev->mn_lock); 183 mutex_lock(&adev->mn_lock);
184 down_write(&mm->mmap_sem);
191 185
192 hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) 186 hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
193 if (rmn->mm == mm) 187 if (rmn->mm == mm)
@@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
202 rmn->adev = adev; 196 rmn->adev = adev;
203 rmn->mm = mm; 197 rmn->mm = mm;
204 rmn->mn.ops = &amdgpu_mn_ops; 198 rmn->mn.ops = &amdgpu_mn_ops;
205 mutex_init(&rmn->lock);
206 rmn->objects = RB_ROOT; 199 rmn->objects = RB_ROOT;
207 200
208 r = __mmu_notifier_register(&rmn->mn, mm); 201 r = __mmu_notifier_register(&rmn->mn, mm);
@@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
212 hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); 205 hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
213 206
214release_locks: 207release_locks:
215 mutex_unlock(&adev->mn_lock);
216 up_write(&mm->mmap_sem); 208 up_write(&mm->mmap_sem);
209 mutex_unlock(&adev->mn_lock);
217 210
218 return rmn; 211 return rmn;
219 212
220free_rmn: 213free_rmn:
221 mutex_unlock(&adev->mn_lock);
222 up_write(&mm->mmap_sem); 214 up_write(&mm->mmap_sem);
215 mutex_unlock(&adev->mn_lock);
223 kfree(rmn); 216 kfree(rmn);
224 217
225 return ERR_PTR(r); 218 return ERR_PTR(r);
@@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
249 242
250 INIT_LIST_HEAD(&bos); 243 INIT_LIST_HEAD(&bos);
251 244
252 mutex_lock(&rmn->lock); 245 down_write(&rmn->mm->mmap_sem);
253 246
254 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { 247 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
255 kfree(node); 248 kfree(node);
@@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
263 if (!node) { 256 if (!node) {
264 node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); 257 node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
265 if (!node) { 258 if (!node) {
266 mutex_unlock(&rmn->lock); 259 up_write(&rmn->mm->mmap_sem);
267 return -ENOMEM; 260 return -ENOMEM;
268 } 261 }
269 } 262 }
@@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
278 271
279 interval_tree_insert(&node->it, &rmn->objects); 272 interval_tree_insert(&node->it, &rmn->objects);
280 273
281 mutex_unlock(&rmn->lock); 274 up_write(&rmn->mm->mmap_sem);
282 275
283 return 0; 276 return 0;
284} 277}
@@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
297 struct list_head *head; 290 struct list_head *head;
298 291
299 mutex_lock(&adev->mn_lock); 292 mutex_lock(&adev->mn_lock);
293
300 rmn = bo->mn; 294 rmn = bo->mn;
301 if (rmn == NULL) { 295 if (rmn == NULL) {
302 mutex_unlock(&adev->mn_lock); 296 mutex_unlock(&adev->mn_lock);
303 return; 297 return;
304 } 298 }
305 299
306 mutex_lock(&rmn->lock); 300 down_write(&rmn->mm->mmap_sem);
301
307 /* save the next list entry for later */ 302 /* save the next list entry for later */
308 head = bo->mn_list.next; 303 head = bo->mn_list.next;
309 304
@@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
317 kfree(node); 312 kfree(node);
318 } 313 }
319 314
320 mutex_unlock(&rmn->lock); 315 up_write(&rmn->mm->mmap_sem);
321 mutex_unlock(&adev->mn_lock); 316 mutex_unlock(&adev->mn_lock);
322} 317}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index fdc1be8550da..8d432e6901af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -390,7 +390,6 @@ struct amdgpu_crtc {
390 struct drm_display_mode native_mode; 390 struct drm_display_mode native_mode;
391 u32 pll_id; 391 u32 pll_id;
392 /* page flipping */ 392 /* page flipping */
393 struct workqueue_struct *pflip_queue;
394 struct amdgpu_flip_work *pflip_works; 393 struct amdgpu_flip_work *pflip_works;
395 enum amdgpu_flip_status pflip_status; 394 enum amdgpu_flip_status pflip_status;
396 int deferred_flip_completion; 395 int deferred_flip_completion;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b8fbbd7699e4..9a025a77958d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
97 97
98 amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL); 98 amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
99 99
100 mutex_lock(&bo->adev->gem.mutex);
101 list_del_init(&bo->list);
102 mutex_unlock(&bo->adev->gem.mutex);
103 drm_gem_object_release(&bo->gem_base); 100 drm_gem_object_release(&bo->gem_base);
104 amdgpu_bo_unref(&bo->parent); 101 amdgpu_bo_unref(&bo->parent);
105 kfree(bo->metadata); 102 kfree(bo->metadata);
@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
254 bo->adev = adev; 251 bo->adev = adev;
255 INIT_LIST_HEAD(&bo->list); 252 INIT_LIST_HEAD(&bo->list);
256 INIT_LIST_HEAD(&bo->va); 253 INIT_LIST_HEAD(&bo->va);
257 bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM | 254 bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
258 AMDGPU_GEM_DOMAIN_GTT | 255 AMDGPU_GEM_DOMAIN_GTT |
259 AMDGPU_GEM_DOMAIN_CPU | 256 AMDGPU_GEM_DOMAIN_CPU |
260 AMDGPU_GEM_DOMAIN_GDS | 257 AMDGPU_GEM_DOMAIN_GDS |
261 AMDGPU_GEM_DOMAIN_GWS | 258 AMDGPU_GEM_DOMAIN_GWS |
262 AMDGPU_GEM_DOMAIN_OA); 259 AMDGPU_GEM_DOMAIN_OA);
260 bo->allowed_domains = bo->prefered_domains;
261 if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
262 bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
263 263
264 bo->flags = flags; 264 bo->flags = flags;
265 265
@@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
367 int r, i; 367 int r, i;
368 unsigned fpfn, lpfn; 368 unsigned fpfn, lpfn;
369 369
370 if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) 370 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
371 return -EPERM; 371 return -EPERM;
372 372
373 if (WARN_ON_ONCE(min_offset > max_offset)) 373 if (WARN_ON_ONCE(min_offset > max_offset))
@@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
470 return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); 470 return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
471} 471}
472 472
473void amdgpu_bo_force_delete(struct amdgpu_device *adev)
474{
475 struct amdgpu_bo *bo, *n;
476
477 if (list_empty(&adev->gem.objects)) {
478 return;
479 }
480 dev_err(adev->dev, "Userspace still has active objects !\n");
481 list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
482 dev_err(adev->dev, "%p %p %lu %lu force free\n",
483 &bo->gem_base, bo, (unsigned long)bo->gem_base.size,
484 *((unsigned long *)&bo->gem_base.refcount));
485 mutex_lock(&bo->adev->gem.mutex);
486 list_del_init(&bo->list);
487 mutex_unlock(&bo->adev->gem.mutex);
488 /* this should unref the ttm bo */
489 drm_gem_object_unreference_unlocked(&bo->gem_base);
490 }
491}
492
493int amdgpu_bo_init(struct amdgpu_device *adev) 473int amdgpu_bo_init(struct amdgpu_device *adev)
494{ 474{
495 /* Add an MTRR for the VRAM */ 475 /* Add an MTRR for the VRAM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 5107fb291bdb..acc08018c6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
149 u64 *gpu_addr); 149 u64 *gpu_addr);
150int amdgpu_bo_unpin(struct amdgpu_bo *bo); 150int amdgpu_bo_unpin(struct amdgpu_bo *bo);
151int amdgpu_bo_evict_vram(struct amdgpu_device *adev); 151int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
152void amdgpu_bo_force_delete(struct amdgpu_device *adev);
153int amdgpu_bo_init(struct amdgpu_device *adev); 152int amdgpu_bo_init(struct amdgpu_device *adev);
154void amdgpu_bo_fini(struct amdgpu_device *adev); 153void amdgpu_bo_fini(struct amdgpu_device *adev);
155int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, 154int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7d8d84eaea4a..d77b2bdbe800 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -119,7 +119,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
119 level = amdgpu_dpm_get_performance_level(adev); 119 level = amdgpu_dpm_get_performance_level(adev);
120 return snprintf(buf, PAGE_SIZE, "%s\n", 120 return snprintf(buf, PAGE_SIZE, "%s\n",
121 (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : 121 (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
122 (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high"); 122 (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
123 (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
124 (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown");
123 } else { 125 } else {
124 enum amdgpu_dpm_forced_level level; 126 enum amdgpu_dpm_forced_level level;
125 127
@@ -146,6 +148,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
146 level = AMDGPU_DPM_FORCED_LEVEL_HIGH; 148 level = AMDGPU_DPM_FORCED_LEVEL_HIGH;
147 } else if (strncmp("auto", buf, strlen("auto")) == 0) { 149 } else if (strncmp("auto", buf, strlen("auto")) == 0) {
148 level = AMDGPU_DPM_FORCED_LEVEL_AUTO; 150 level = AMDGPU_DPM_FORCED_LEVEL_AUTO;
151 } else if (strncmp("manual", buf, strlen("manual")) == 0) {
152 level = AMDGPU_DPM_FORCED_LEVEL_MANUAL;
149 } else { 153 } else {
150 count = -EINVAL; 154 count = -EINVAL;
151 goto fail; 155 goto fail;
@@ -172,10 +176,293 @@ fail:
172 return count; 176 return count;
173} 177}
174 178
179static ssize_t amdgpu_get_pp_num_states(struct device *dev,
180 struct device_attribute *attr,
181 char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
184 struct amdgpu_device *adev = ddev->dev_private;
185 struct pp_states_info data;
186 int i, buf_len;
187
188 if (adev->pp_enabled)
189 amdgpu_dpm_get_pp_num_states(adev, &data);
190
191 buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
192 for (i = 0; i < data.nums; i++)
193 buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
194 (data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
195 (data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" :
196 (data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" :
197 (data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default");
198
199 return buf_len;
200}
201
202static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
203 struct device_attribute *attr,
204 char *buf)
205{
206 struct drm_device *ddev = dev_get_drvdata(dev);
207 struct amdgpu_device *adev = ddev->dev_private;
208 struct pp_states_info data;
209 enum amd_pm_state_type pm = 0;
210 int i = 0;
211
212 if (adev->pp_enabled) {
213
214 pm = amdgpu_dpm_get_current_power_state(adev);
215 amdgpu_dpm_get_pp_num_states(adev, &data);
216
217 for (i = 0; i < data.nums; i++) {
218 if (pm == data.states[i])
219 break;
220 }
221
222 if (i == data.nums)
223 i = -EINVAL;
224 }
225
226 return snprintf(buf, PAGE_SIZE, "%d\n", i);
227}
228
229static ssize_t amdgpu_get_pp_force_state(struct device *dev,
230 struct device_attribute *attr,
231 char *buf)
232{
233 struct drm_device *ddev = dev_get_drvdata(dev);
234 struct amdgpu_device *adev = ddev->dev_private;
235 struct pp_states_info data;
236 enum amd_pm_state_type pm = 0;
237 int i;
238
239 if (adev->pp_force_state_enabled && adev->pp_enabled) {
240 pm = amdgpu_dpm_get_current_power_state(adev);
241 amdgpu_dpm_get_pp_num_states(adev, &data);
242
243 for (i = 0; i < data.nums; i++) {
244 if (pm == data.states[i])
245 break;
246 }
247
248 if (i == data.nums)
249 i = -EINVAL;
250
251 return snprintf(buf, PAGE_SIZE, "%d\n", i);
252
253 } else
254 return snprintf(buf, PAGE_SIZE, "\n");
255}
256
257static ssize_t amdgpu_set_pp_force_state(struct device *dev,
258 struct device_attribute *attr,
259 const char *buf,
260 size_t count)
261{
262 struct drm_device *ddev = dev_get_drvdata(dev);
263 struct amdgpu_device *adev = ddev->dev_private;
264 enum amd_pm_state_type state = 0;
265 long idx;
266 int ret;
267
268 if (strlen(buf) == 1)
269 adev->pp_force_state_enabled = false;
270 else {
271 ret = kstrtol(buf, 0, &idx);
272
273 if (ret) {
274 count = -EINVAL;
275 goto fail;
276 }
277
278 if (adev->pp_enabled) {
279 struct pp_states_info data;
280 amdgpu_dpm_get_pp_num_states(adev, &data);
281 state = data.states[idx];
282 /* only set user selected power states */
283 if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
284 state != POWER_STATE_TYPE_DEFAULT) {
285 amdgpu_dpm_dispatch_task(adev,
286 AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
287 adev->pp_force_state_enabled = true;
288 }
289 }
290 }
291fail:
292 return count;
293}
294
295static ssize_t amdgpu_get_pp_table(struct device *dev,
296 struct device_attribute *attr,
297 char *buf)
298{
299 struct drm_device *ddev = dev_get_drvdata(dev);
300 struct amdgpu_device *adev = ddev->dev_private;
301 char *table = NULL;
302 int size, i;
303
304 if (adev->pp_enabled)
305 size = amdgpu_dpm_get_pp_table(adev, &table);
306 else
307 return 0;
308
309 if (size >= PAGE_SIZE)
310 size = PAGE_SIZE - 1;
311
312 for (i = 0; i < size; i++) {
313 sprintf(buf + i, "%02x", table[i]);
314 }
315 sprintf(buf + i, "\n");
316
317 return size;
318}
319
320static ssize_t amdgpu_set_pp_table(struct device *dev,
321 struct device_attribute *attr,
322 const char *buf,
323 size_t count)
324{
325 struct drm_device *ddev = dev_get_drvdata(dev);
326 struct amdgpu_device *adev = ddev->dev_private;
327
328 if (adev->pp_enabled)
329 amdgpu_dpm_set_pp_table(adev, buf, count);
330
331 return count;
332}
333
334static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
335 struct device_attribute *attr,
336 char *buf)
337{
338 struct drm_device *ddev = dev_get_drvdata(dev);
339 struct amdgpu_device *adev = ddev->dev_private;
340 ssize_t size = 0;
341
342 if (adev->pp_enabled)
343 size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
344
345 return size;
346}
347
348static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
349 struct device_attribute *attr,
350 const char *buf,
351 size_t count)
352{
353 struct drm_device *ddev = dev_get_drvdata(dev);
354 struct amdgpu_device *adev = ddev->dev_private;
355 int ret;
356 long level;
357
358 ret = kstrtol(buf, 0, &level);
359
360 if (ret) {
361 count = -EINVAL;
362 goto fail;
363 }
364
365 if (adev->pp_enabled)
366 amdgpu_dpm_force_clock_level(adev, PP_SCLK, level);
367fail:
368 return count;
369}
370
371static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
372 struct device_attribute *attr,
373 char *buf)
374{
375 struct drm_device *ddev = dev_get_drvdata(dev);
376 struct amdgpu_device *adev = ddev->dev_private;
377 ssize_t size = 0;
378
379 if (adev->pp_enabled)
380 size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
381
382 return size;
383}
384
385static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
386 struct device_attribute *attr,
387 const char *buf,
388 size_t count)
389{
390 struct drm_device *ddev = dev_get_drvdata(dev);
391 struct amdgpu_device *adev = ddev->dev_private;
392 int ret;
393 long level;
394
395 ret = kstrtol(buf, 0, &level);
396
397 if (ret) {
398 count = -EINVAL;
399 goto fail;
400 }
401
402 if (adev->pp_enabled)
403 amdgpu_dpm_force_clock_level(adev, PP_MCLK, level);
404fail:
405 return count;
406}
407
408static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
409 struct device_attribute *attr,
410 char *buf)
411{
412 struct drm_device *ddev = dev_get_drvdata(dev);
413 struct amdgpu_device *adev = ddev->dev_private;
414 ssize_t size = 0;
415
416 if (adev->pp_enabled)
417 size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
418
419 return size;
420}
421
422static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
423 struct device_attribute *attr,
424 const char *buf,
425 size_t count)
426{
427 struct drm_device *ddev = dev_get_drvdata(dev);
428 struct amdgpu_device *adev = ddev->dev_private;
429 int ret;
430 long level;
431
432 ret = kstrtol(buf, 0, &level);
433
434 if (ret) {
435 count = -EINVAL;
436 goto fail;
437 }
438
439 if (adev->pp_enabled)
440 amdgpu_dpm_force_clock_level(adev, PP_PCIE, level);
441fail:
442 return count;
443}
444
175static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); 445static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
176static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, 446static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
177 amdgpu_get_dpm_forced_performance_level, 447 amdgpu_get_dpm_forced_performance_level,
178 amdgpu_set_dpm_forced_performance_level); 448 amdgpu_set_dpm_forced_performance_level);
449static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
450static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
451static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
452 amdgpu_get_pp_force_state,
453 amdgpu_set_pp_force_state);
454static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
455 amdgpu_get_pp_table,
456 amdgpu_set_pp_table);
457static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
458 amdgpu_get_pp_dpm_sclk,
459 amdgpu_set_pp_dpm_sclk);
460static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
461 amdgpu_get_pp_dpm_mclk,
462 amdgpu_set_pp_dpm_mclk);
463static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
464 amdgpu_get_pp_dpm_pcie,
465 amdgpu_set_pp_dpm_pcie);
179 466
180static ssize_t amdgpu_hwmon_show_temp(struct device *dev, 467static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
181 struct device_attribute *attr, 468 struct device_attribute *attr,
@@ -623,14 +910,12 @@ force:
623 amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps); 910 amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps);
624 } 911 }
625 912
626 mutex_lock(&adev->ring_lock);
627
628 /* update whether vce is active */ 913 /* update whether vce is active */
629 ps->vce_active = adev->pm.dpm.vce_active; 914 ps->vce_active = adev->pm.dpm.vce_active;
630 915
631 ret = amdgpu_dpm_pre_set_power_state(adev); 916 ret = amdgpu_dpm_pre_set_power_state(adev);
632 if (ret) 917 if (ret)
633 goto done; 918 return;
634 919
635 /* update display watermarks based on new power state */ 920 /* update display watermarks based on new power state */
636 amdgpu_display_bandwidth_update(adev); 921 amdgpu_display_bandwidth_update(adev);
@@ -667,9 +952,6 @@ force:
667 amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level); 952 amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level);
668 } 953 }
669 } 954 }
670
671done:
672 mutex_unlock(&adev->ring_lock);
673} 955}
674 956
675void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) 957void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
@@ -770,6 +1052,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
770 DRM_ERROR("failed to create device file for dpm state\n"); 1052 DRM_ERROR("failed to create device file for dpm state\n");
771 return ret; 1053 return ret;
772 } 1054 }
1055
1056 if (adev->pp_enabled) {
1057 ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
1058 if (ret) {
1059 DRM_ERROR("failed to create device file pp_num_states\n");
1060 return ret;
1061 }
1062 ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
1063 if (ret) {
1064 DRM_ERROR("failed to create device file pp_cur_state\n");
1065 return ret;
1066 }
1067 ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
1068 if (ret) {
1069 DRM_ERROR("failed to create device file pp_force_state\n");
1070 return ret;
1071 }
1072 ret = device_create_file(adev->dev, &dev_attr_pp_table);
1073 if (ret) {
1074 DRM_ERROR("failed to create device file pp_table\n");
1075 return ret;
1076 }
1077 ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
1078 if (ret) {
1079 DRM_ERROR("failed to create device file pp_dpm_sclk\n");
1080 return ret;
1081 }
1082 ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
1083 if (ret) {
1084 DRM_ERROR("failed to create device file pp_dpm_mclk\n");
1085 return ret;
1086 }
1087 ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
1088 if (ret) {
1089 DRM_ERROR("failed to create device file pp_dpm_pcie\n");
1090 return ret;
1091 }
1092 }
773 ret = amdgpu_debugfs_pm_init(adev); 1093 ret = amdgpu_debugfs_pm_init(adev);
774 if (ret) { 1094 if (ret) {
775 DRM_ERROR("Failed to register debugfs file for dpm!\n"); 1095 DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -787,6 +1107,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
787 hwmon_device_unregister(adev->pm.int_hwmon_dev); 1107 hwmon_device_unregister(adev->pm.int_hwmon_dev);
788 device_remove_file(adev->dev, &dev_attr_power_dpm_state); 1108 device_remove_file(adev->dev, &dev_attr_power_dpm_state);
789 device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); 1109 device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
1110 if (adev->pp_enabled) {
1111 device_remove_file(adev->dev, &dev_attr_pp_num_states);
1112 device_remove_file(adev->dev, &dev_attr_pp_cur_state);
1113 device_remove_file(adev->dev, &dev_attr_pp_force_state);
1114 device_remove_file(adev->dev, &dev_attr_pp_table);
1115 device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
1116 device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
1117 device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
1118 }
790} 1119}
791 1120
792void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) 1121void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -802,13 +1131,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
802 int i = 0; 1131 int i = 0;
803 1132
804 amdgpu_display_bandwidth_update(adev); 1133 amdgpu_display_bandwidth_update(adev);
805 mutex_lock(&adev->ring_lock); 1134 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
806 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 1135 struct amdgpu_ring *ring = adev->rings[i];
807 struct amdgpu_ring *ring = adev->rings[i]; 1136 if (ring && ring->ready)
808 if (ring && ring->ready) 1137 amdgpu_fence_wait_empty(ring);
809 amdgpu_fence_wait_empty(ring); 1138 }
810 }
811 mutex_unlock(&adev->ring_lock);
812 1139
813 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); 1140 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
814 } else { 1141 } else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 59f735a933a9..be6388f73ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
73 if (ret) 73 if (ret)
74 return ERR_PTR(ret); 74 return ERR_PTR(ret);
75 75
76 mutex_lock(&adev->gem.mutex);
77 list_add_tail(&bo->list, &adev->gem.objects);
78 mutex_unlock(&adev->gem.mutex);
79
80 return &bo->gem_base; 76 return &bo->gem_base;
81} 77}
82 78
@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
121{ 117{
122 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); 118 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
123 119
124 if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) 120 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
125 return ERR_PTR(-EPERM); 121 return ERR_PTR(-EPERM);
126 122
127 return drm_gem_prime_export(dev, gobj, flags); 123 return drm_gem_prime_export(dev, gobj, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d1f234dd2126..56c07e3fdb33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -49,28 +49,6 @@
49static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); 49static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
50 50
51/** 51/**
52 * amdgpu_ring_free_size - update the free size
53 *
54 * @adev: amdgpu_device pointer
55 * @ring: amdgpu_ring structure holding ring information
56 *
57 * Update the free dw slots in the ring buffer (all asics).
58 */
59void amdgpu_ring_free_size(struct amdgpu_ring *ring)
60{
61 uint32_t rptr = amdgpu_ring_get_rptr(ring);
62
63 /* This works because ring_size is a power of 2 */
64 ring->ring_free_dw = rptr + (ring->ring_size / 4);
65 ring->ring_free_dw -= ring->wptr;
66 ring->ring_free_dw &= ring->ptr_mask;
67 if (!ring->ring_free_dw) {
68 /* this is an empty ring */
69 ring->ring_free_dw = ring->ring_size / 4;
70 }
71}
72
73/**
74 * amdgpu_ring_alloc - allocate space on the ring buffer 52 * amdgpu_ring_alloc - allocate space on the ring buffer
75 * 53 *
76 * @adev: amdgpu_device pointer 54 * @adev: amdgpu_device pointer
@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
82 */ 60 */
83int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) 61int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
84{ 62{
85 int r;
86
87 /* make sure we aren't trying to allocate more space than there is on the ring */
88 if (ndw > (ring->ring_size / 4))
89 return -ENOMEM;
90 /* Align requested size with padding so unlock_commit can 63 /* Align requested size with padding so unlock_commit can
91 * pad safely */ 64 * pad safely */
92 amdgpu_ring_free_size(ring);
93 ndw = (ndw + ring->align_mask) & ~ring->align_mask; 65 ndw = (ndw + ring->align_mask) & ~ring->align_mask;
94 while (ndw > (ring->ring_free_dw - 1)) {
95 amdgpu_ring_free_size(ring);
96 if (ndw < ring->ring_free_dw) {
97 break;
98 }
99 r = amdgpu_fence_wait_next(ring);
100 if (r)
101 return r;
102 }
103 ring->count_dw = ndw;
104 ring->wptr_old = ring->wptr;
105 return 0;
106}
107 66
108/** 67 /* Make sure we aren't trying to allocate more space
109 * amdgpu_ring_lock - lock the ring and allocate space on it 68 * than the maximum for one submission
110 * 69 */
111 * @adev: amdgpu_device pointer 70 if (WARN_ON_ONCE(ndw > ring->max_dw))
112 * @ring: amdgpu_ring structure holding ring information 71 return -ENOMEM;
113 * @ndw: number of dwords to allocate in the ring buffer
114 *
115 * Lock the ring and allocate @ndw dwords in the ring buffer
116 * (all asics).
117 * Returns 0 on success, error on failure.
118 */
119int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
120{
121 int r;
122 72
123 mutex_lock(ring->ring_lock); 73 ring->count_dw = ndw;
124 r = amdgpu_ring_alloc(ring, ndw); 74 ring->wptr_old = ring->wptr;
125 if (r) {
126 mutex_unlock(ring->ring_lock);
127 return r;
128 }
129 return 0; 75 return 0;
130} 76}
131 77
@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
144 amdgpu_ring_write(ring, ring->nop); 90 amdgpu_ring_write(ring, ring->nop);
145} 91}
146 92
93/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
94 *
95 * @ring: amdgpu_ring structure holding ring information
96 * @ib: IB to add NOP packets to
97 *
98 * This is the generic pad_ib function for rings except SDMA
99 */
100void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
101{
102 while (ib->length_dw & ring->align_mask)
103 ib->ptr[ib->length_dw++] = ring->nop;
104}
105
147/** 106/**
148 * amdgpu_ring_commit - tell the GPU to execute the new 107 * amdgpu_ring_commit - tell the GPU to execute the new
149 * commands on the ring buffer 108 * commands on the ring buffer
@@ -168,20 +127,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
168} 127}
169 128
170/** 129/**
171 * amdgpu_ring_unlock_commit - tell the GPU to execute the new
172 * commands on the ring buffer and unlock it
173 *
174 * @ring: amdgpu_ring structure holding ring information
175 *
176 * Call amdgpu_ring_commit() then unlock the ring (all asics).
177 */
178void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
179{
180 amdgpu_ring_commit(ring);
181 mutex_unlock(ring->ring_lock);
182}
183
184/**
185 * amdgpu_ring_undo - reset the wptr 130 * amdgpu_ring_undo - reset the wptr
186 * 131 *
187 * @ring: amdgpu_ring structure holding ring information 132 * @ring: amdgpu_ring structure holding ring information
@@ -194,19 +139,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
194} 139}
195 140
196/** 141/**
197 * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
198 *
199 * @ring: amdgpu_ring structure holding ring information
200 *
201 * Call amdgpu_ring_undo() then unlock the ring (all asics).
202 */
203void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
204{
205 amdgpu_ring_undo(ring);
206 mutex_unlock(ring->ring_lock);
207}
208
209/**
210 * amdgpu_ring_backup - Back up the content of a ring 142 * amdgpu_ring_backup - Back up the content of a ring
211 * 143 *
212 * @ring: the ring we want to back up 144 * @ring: the ring we want to back up
@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
218{ 150{
219 unsigned size, ptr, i; 151 unsigned size, ptr, i;
220 152
221 /* just in case lock the ring */
222 mutex_lock(ring->ring_lock);
223 *data = NULL; 153 *data = NULL;
224 154
225 if (ring->ring_obj == NULL) { 155 if (ring->ring_obj == NULL)
226 mutex_unlock(ring->ring_lock);
227 return 0; 156 return 0;
228 }
229 157
230 /* it doesn't make sense to save anything if all fences are signaled */ 158 /* it doesn't make sense to save anything if all fences are signaled */
231 if (!amdgpu_fence_count_emitted(ring)) { 159 if (!amdgpu_fence_count_emitted(ring))
232 mutex_unlock(ring->ring_lock);
233 return 0; 160 return 0;
234 }
235 161
236 ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); 162 ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
237 163
238 size = ring->wptr + (ring->ring_size / 4); 164 size = ring->wptr + (ring->ring_size / 4);
239 size -= ptr; 165 size -= ptr;
240 size &= ring->ptr_mask; 166 size &= ring->ptr_mask;
241 if (size == 0) { 167 if (size == 0)
242 mutex_unlock(ring->ring_lock);
243 return 0; 168 return 0;
244 }
245 169
246 /* and then save the content of the ring */ 170 /* and then save the content of the ring */
247 *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); 171 *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
248 if (!*data) { 172 if (!*data)
249 mutex_unlock(ring->ring_lock);
250 return 0; 173 return 0;
251 }
252 for (i = 0; i < size; ++i) { 174 for (i = 0; i < size; ++i) {
253 (*data)[i] = ring->ring[ptr++]; 175 (*data)[i] = ring->ring[ptr++];
254 ptr &= ring->ptr_mask; 176 ptr &= ring->ptr_mask;
255 } 177 }
256 178
257 mutex_unlock(ring->ring_lock);
258 return size; 179 return size;
259} 180}
260 181
@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
276 return 0; 197 return 0;
277 198
278 /* restore the saved ring content */ 199 /* restore the saved ring content */
279 r = amdgpu_ring_lock(ring, size); 200 r = amdgpu_ring_alloc(ring, size);
280 if (r) 201 if (r)
281 return r; 202 return r;
282 203
@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
284 amdgpu_ring_write(ring, data[i]); 205 amdgpu_ring_write(ring, data[i]);
285 } 206 }
286 207
287 amdgpu_ring_unlock_commit(ring); 208 amdgpu_ring_commit(ring);
288 kfree(data); 209 kfree(data);
289 return 0; 210 return 0;
290} 211}
@@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
352 return r; 273 return r;
353 } 274 }
354 275
355 ring->ring_lock = &adev->ring_lock;
356 /* Align ring size */ 276 /* Align ring size */
357 rb_bufsz = order_base_2(ring_size / 8); 277 rb_bufsz = order_base_2(ring_size / 8);
358 ring_size = (1 << (rb_bufsz + 1)) * 4; 278 ring_size = (1 << (rb_bufsz + 1)) * 4;
@@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
389 } 309 }
390 } 310 }
391 ring->ptr_mask = (ring->ring_size / 4) - 1; 311 ring->ptr_mask = (ring->ring_size / 4) - 1;
392 ring->ring_free_dw = ring->ring_size / 4; 312 ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
313 amdgpu_sched_hw_submission);
393 314
394 if (amdgpu_debugfs_ring_init(adev, ring)) { 315 if (amdgpu_debugfs_ring_init(adev, ring)) {
395 DRM_ERROR("Failed to register debugfs file for rings !\n"); 316 DRM_ERROR("Failed to register debugfs file for rings !\n");
@@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
410 int r; 331 int r;
411 struct amdgpu_bo *ring_obj; 332 struct amdgpu_bo *ring_obj;
412 333
413 if (ring->ring_lock == NULL)
414 return;
415
416 mutex_lock(ring->ring_lock);
417 ring_obj = ring->ring_obj; 334 ring_obj = ring->ring_obj;
418 ring->ready = false; 335 ring->ready = false;
419 ring->ring = NULL; 336 ring->ring = NULL;
420 ring->ring_obj = NULL; 337 ring->ring_obj = NULL;
421 mutex_unlock(ring->ring_lock);
422 338
423 amdgpu_wb_free(ring->adev, ring->fence_offs); 339 amdgpu_wb_free(ring->adev, ring->fence_offs);
424 amdgpu_wb_free(ring->adev, ring->rptr_offs); 340 amdgpu_wb_free(ring->adev, ring->rptr_offs);
@@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
474 struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); 390 struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
475 391
476 uint32_t rptr, wptr, rptr_next; 392 uint32_t rptr, wptr, rptr_next;
477 unsigned count, i, j; 393 unsigned i;
478
479 amdgpu_ring_free_size(ring);
480 count = (ring->ring_size / 4) - ring->ring_free_dw;
481 394
482 wptr = amdgpu_ring_get_wptr(ring); 395 wptr = amdgpu_ring_get_wptr(ring);
483 seq_printf(m, "wptr: 0x%08x [%5d]\n", 396 seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
484 wptr, wptr);
485 397
486 rptr = amdgpu_ring_get_rptr(ring); 398 rptr = amdgpu_ring_get_rptr(ring);
487 seq_printf(m, "rptr: 0x%08x [%5d]\n",
488 rptr, rptr);
489
490 rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); 399 rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
491 400
401 seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
402
492 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", 403 seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
493 ring->wptr, ring->wptr); 404 ring->wptr, ring->wptr);
494 seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
495 ring->last_semaphore_signal_addr);
496 seq_printf(m, "last semaphore wait addr : 0x%016llx\n",
497 ring->last_semaphore_wait_addr);
498 seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
499 seq_printf(m, "%u dwords in ring\n", count);
500 405
501 if (!ring->ready) 406 if (!ring->ready)
502 return 0; 407 return 0;
@@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
505 * packet that is the root issue 410 * packet that is the root issue
506 */ 411 */
507 i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; 412 i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
508 for (j = 0; j <= (count + 32); j++) { 413 while (i != rptr) {
414 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
415 if (i == rptr)
416 seq_puts(m, " *");
417 if (i == rptr_next)
418 seq_puts(m, " #");
419 seq_puts(m, "\n");
420 i = (i + 1) & ring->ptr_mask;
421 }
422 while (i != wptr) {
509 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); 423 seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
510 if (rptr == i) 424 if (i == rptr)
511 seq_puts(m, " *"); 425 seq_puts(m, " *");
512 if (rptr_next == i) 426 if (i == rptr_next)
513 seq_puts(m, " #"); 427 seq_puts(m, " #");
514 seq_puts(m, "\n"); 428 seq_puts(m, "\n");
515 i = (i + 1) & ring->ptr_mask; 429 i = (i + 1) & ring->ptr_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 8b88edb0434b..7d8f8f1e3f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
321 int i, r; 321 int i, r;
322 signed long t; 322 signed long t;
323 323
324 BUG_ON(align > sa_manager->align); 324 if (WARN_ON_ONCE(align > sa_manager->align))
325 BUG_ON(size > sa_manager->size); 325 return -EINVAL;
326
327 if (WARN_ON_ONCE(size > sa_manager->size))
328 return -EINVAL;
326 329
327 *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); 330 *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
328 if ((*sa_bo) == NULL) { 331 if ((*sa_bo) == NULL) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
deleted file mode 100644
index 438c05254695..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ /dev/null
@@ -1,108 +0,0 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 *
23 */
24#include <linux/kthread.h>
25#include <linux/wait.h>
26#include <linux/sched.h>
27#include <drm/drmP.h>
28#include "amdgpu.h"
29#include "amdgpu_trace.h"
30
31static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
32{
33 struct amdgpu_job *job = to_amdgpu_job(sched_job);
34 return amdgpu_sync_get_fence(&job->ibs->sync);
35}
36
37static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
38{
39 struct amdgpu_fence *fence = NULL;
40 struct amdgpu_job *job;
41 int r;
42
43 if (!sched_job) {
44 DRM_ERROR("job is null\n");
45 return NULL;
46 }
47 job = to_amdgpu_job(sched_job);
48 trace_amdgpu_sched_run_job(job);
49 r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
50 if (r) {
51 DRM_ERROR("Error scheduling IBs (%d)\n", r);
52 goto err;
53 }
54
55 fence = job->ibs[job->num_ibs - 1].fence;
56 fence_get(&fence->base);
57
58err:
59 if (job->free_job)
60 job->free_job(job);
61
62 kfree(job);
63 return fence ? &fence->base : NULL;
64}
65
66struct amd_sched_backend_ops amdgpu_sched_ops = {
67 .dependency = amdgpu_sched_dependency,
68 .run_job = amdgpu_sched_run_job,
69};
70
71int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
72 struct amdgpu_ring *ring,
73 struct amdgpu_ib *ibs,
74 unsigned num_ibs,
75 int (*free_job)(struct amdgpu_job *),
76 void *owner,
77 struct fence **f)
78{
79 int r = 0;
80 if (amdgpu_enable_scheduler) {
81 struct amdgpu_job *job =
82 kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
83 if (!job)
84 return -ENOMEM;
85 job->base.sched = &ring->sched;
86 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
87 job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
88 if (!job->base.s_fence) {
89 kfree(job);
90 return -ENOMEM;
91 }
92 *f = fence_get(&job->base.s_fence->base);
93
94 job->adev = adev;
95 job->ibs = ibs;
96 job->num_ibs = num_ibs;
97 job->owner = owner;
98 job->free_job = free_job;
99 amd_sched_entity_push_job(&job->base);
100 } else {
101 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
102 if (r)
103 return r;
104 *f = fence_get(&ibs[num_ibs - 1].fence->base);
105 }
106
107 return 0;
108}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
deleted file mode 100644
index 1caaf201b708..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ /dev/null
@@ -1,102 +0,0 @@
1/*
2 * Copyright 2011 Christian König.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26/*
27 * Authors:
28 * Christian König <deathsimple@vodafone.de>
29 */
30#include <drm/drmP.h>
31#include "amdgpu.h"
32#include "amdgpu_trace.h"
33
34int amdgpu_semaphore_create(struct amdgpu_device *adev,
35 struct amdgpu_semaphore **semaphore)
36{
37 int r;
38
39 *semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
40 if (*semaphore == NULL) {
41 return -ENOMEM;
42 }
43 r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
44 &(*semaphore)->sa_bo, 8, 8);
45 if (r) {
46 kfree(*semaphore);
47 *semaphore = NULL;
48 return r;
49 }
50 (*semaphore)->waiters = 0;
51 (*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
52
53 *((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
54
55 return 0;
56}
57
58bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
59 struct amdgpu_semaphore *semaphore)
60{
61 trace_amdgpu_semaphore_signale(ring->idx, semaphore);
62
63 if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
64 --semaphore->waiters;
65
66 /* for debugging lockup only, used by sysfs debug files */
67 ring->last_semaphore_signal_addr = semaphore->gpu_addr;
68 return true;
69 }
70 return false;
71}
72
73bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
74 struct amdgpu_semaphore *semaphore)
75{
76 trace_amdgpu_semaphore_wait(ring->idx, semaphore);
77
78 if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
79 ++semaphore->waiters;
80
81 /* for debugging lockup only, used by sysfs debug files */
82 ring->last_semaphore_wait_addr = semaphore->gpu_addr;
83 return true;
84 }
85 return false;
86}
87
88void amdgpu_semaphore_free(struct amdgpu_device *adev,
89 struct amdgpu_semaphore **semaphore,
90 struct fence *fence)
91{
92 if (semaphore == NULL || *semaphore == NULL) {
93 return;
94 }
95 if ((*semaphore)->waiters > 0) {
96 dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
97 " hardware lockup imminent!\n", *semaphore);
98 }
99 amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
100 kfree(*semaphore);
101 *semaphore = NULL;
102}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 181ce39ef5e5..c15be00de904 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -46,14 +46,6 @@ struct amdgpu_sync_entry {
46 */ 46 */
47void amdgpu_sync_create(struct amdgpu_sync *sync) 47void amdgpu_sync_create(struct amdgpu_sync *sync)
48{ 48{
49 unsigned i;
50
51 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
52 sync->semaphores[i] = NULL;
53
54 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
55 sync->sync_to[i] = NULL;
56
57 hash_init(sync->fences); 49 hash_init(sync->fences);
58 sync->last_vm_update = NULL; 50 sync->last_vm_update = NULL;
59} 51}
@@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
107 struct fence *f) 99 struct fence *f)
108{ 100{
109 struct amdgpu_sync_entry *e; 101 struct amdgpu_sync_entry *e;
110 struct amdgpu_fence *fence;
111 102
112 if (!f) 103 if (!f)
113 return 0; 104 return 0;
@@ -116,27 +107,20 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
116 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) 107 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
117 amdgpu_sync_keep_later(&sync->last_vm_update, f); 108 amdgpu_sync_keep_later(&sync->last_vm_update, f);
118 109
119 fence = to_amdgpu_fence(f); 110 hash_for_each_possible(sync->fences, e, node, f->context) {
120 if (!fence || fence->ring->adev != adev) { 111 if (unlikely(e->fence->context != f->context))
121 hash_for_each_possible(sync->fences, e, node, f->context) { 112 continue;
122 if (unlikely(e->fence->context != f->context))
123 continue;
124
125 amdgpu_sync_keep_later(&e->fence, f);
126 return 0;
127 }
128
129 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
130 if (!e)
131 return -ENOMEM;
132 113
133 hash_add(sync->fences, &e->node, f->context); 114 amdgpu_sync_keep_later(&e->fence, f);
134 e->fence = fence_get(f);
135 return 0; 115 return 0;
136 } 116 }
137 117
138 amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f); 118 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
119 if (!e)
120 return -ENOMEM;
139 121
122 hash_add(sync->fences, &e->node, f->context);
123 e->fence = fence_get(f);
140 return 0; 124 return 0;
141} 125}
142 126
@@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f)
153} 137}
154 138
155/** 139/**
156 * amdgpu_sync_resv - use the semaphores to sync to a reservation object 140 * amdgpu_sync_resv - sync to a reservation object
157 * 141 *
158 * @sync: sync object to add fences from reservation object to 142 * @sync: sync object to add fences from reservation object to
159 * @resv: reservation object with embedded fence 143 * @resv: reservation object with embedded fence
160 * @shared: true if we should only sync to the exclusive fence 144 * @shared: true if we should only sync to the exclusive fence
161 * 145 *
162 * Sync to the fence using the semaphore objects 146 * Sync to the fence
163 */ 147 */
164int amdgpu_sync_resv(struct amdgpu_device *adev, 148int amdgpu_sync_resv(struct amdgpu_device *adev,
165 struct amdgpu_sync *sync, 149 struct amdgpu_sync *sync,
@@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
250 kfree(e); 234 kfree(e);
251 } 235 }
252 236
253 if (amdgpu_enable_semaphores)
254 return 0;
255
256 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
257 struct fence *fence = sync->sync_to[i];
258 if (!fence)
259 continue;
260
261 r = fence_wait(fence, false);
262 if (r)
263 return r;
264 }
265
266 return 0;
267}
268
269/**
270 * amdgpu_sync_rings - sync ring to all registered fences
271 *
272 * @sync: sync object to use
273 * @ring: ring that needs sync
274 *
275 * Ensure that all registered fences are signaled before letting
276 * the ring continue. The caller must hold the ring lock.
277 */
278int amdgpu_sync_rings(struct amdgpu_sync *sync,
279 struct amdgpu_ring *ring)
280{
281 struct amdgpu_device *adev = ring->adev;
282 unsigned count = 0;
283 int i, r;
284
285 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
286 struct amdgpu_ring *other = adev->rings[i];
287 struct amdgpu_semaphore *semaphore;
288 struct amdgpu_fence *fence;
289
290 if (!sync->sync_to[i])
291 continue;
292
293 fence = to_amdgpu_fence(sync->sync_to[i]);
294
295 /* check if we really need to sync */
296 if (!amdgpu_enable_scheduler &&
297 !amdgpu_fence_need_sync(fence, ring))
298 continue;
299
300 /* prevent GPU deadlocks */
301 if (!other->ready) {
302 dev_err(adev->dev, "Syncing to a disabled ring!");
303 return -EINVAL;
304 }
305
306 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
307 r = fence_wait(sync->sync_to[i], true);
308 if (r)
309 return r;
310 continue;
311 }
312
313 if (count >= AMDGPU_NUM_SYNCS) {
314 /* not enough room, wait manually */
315 r = fence_wait(&fence->base, false);
316 if (r)
317 return r;
318 continue;
319 }
320 r = amdgpu_semaphore_create(adev, &semaphore);
321 if (r)
322 return r;
323
324 sync->semaphores[count++] = semaphore;
325
326 /* allocate enough space for sync command */
327 r = amdgpu_ring_alloc(other, 16);
328 if (r)
329 return r;
330
331 /* emit the signal semaphore */
332 if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
333 /* signaling wasn't successful wait manually */
334 amdgpu_ring_undo(other);
335 r = fence_wait(&fence->base, false);
336 if (r)
337 return r;
338 continue;
339 }
340
341 /* we assume caller has already allocated space on waiters ring */
342 if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
343 /* waiting wasn't successful wait manually */
344 amdgpu_ring_undo(other);
345 r = fence_wait(&fence->base, false);
346 if (r)
347 return r;
348 continue;
349 }
350
351 amdgpu_ring_commit(other);
352 amdgpu_fence_note_sync(fence, ring);
353 }
354
355 return 0; 237 return 0;
356} 238}
357 239
358/** 240/**
359 * amdgpu_sync_free - free the sync object 241 * amdgpu_sync_free - free the sync object
360 * 242 *
361 * @adev: amdgpu_device pointer
362 * @sync: sync object to use 243 * @sync: sync object to use
363 * @fence: fence to use for the free
364 * 244 *
365 * Free the sync object by freeing all semaphores in it. 245 * Free the sync object.
366 */ 246 */
367void amdgpu_sync_free(struct amdgpu_device *adev, 247void amdgpu_sync_free(struct amdgpu_sync *sync)
368 struct amdgpu_sync *sync,
369 struct fence *fence)
370{ 248{
371 struct amdgpu_sync_entry *e; 249 struct amdgpu_sync_entry *e;
372 struct hlist_node *tmp; 250 struct hlist_node *tmp;
@@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
378 kfree(e); 256 kfree(e);
379 } 257 }
380 258
381 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
382 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
383
384 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
385 fence_put(sync->sync_to[i]);
386
387 fence_put(sync->last_vm_update); 259 fence_put(sync->last_vm_update);
388} 260}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 4865615e9c06..05a53f4fc334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
238 amdgpu_do_test_moves(adev); 238 amdgpu_do_test_moves(adev);
239} 239}
240 240
241static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
242 struct amdgpu_ring *ring,
243 struct fence **fence)
244{
245 uint32_t handle = ring->idx ^ 0xdeafbeef;
246 int r;
247
248 if (ring == &adev->uvd.ring) {
249 r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
250 if (r) {
251 DRM_ERROR("Failed to get dummy create msg\n");
252 return r;
253 }
254
255 r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
256 if (r) {
257 DRM_ERROR("Failed to get dummy destroy msg\n");
258 return r;
259 }
260
261 } else if (ring == &adev->vce.ring[0] ||
262 ring == &adev->vce.ring[1]) {
263 r = amdgpu_vce_get_create_msg(ring, handle, NULL);
264 if (r) {
265 DRM_ERROR("Failed to get dummy create msg\n");
266 return r;
267 }
268
269 r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
270 if (r) {
271 DRM_ERROR("Failed to get dummy destroy msg\n");
272 return r;
273 }
274 } else {
275 struct amdgpu_fence *a_fence = NULL;
276 r = amdgpu_ring_lock(ring, 64);
277 if (r) {
278 DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
279 return r;
280 }
281 amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
282 amdgpu_ring_unlock_commit(ring);
283 *fence = &a_fence->base;
284 }
285 return 0;
286}
287
288void amdgpu_test_ring_sync(struct amdgpu_device *adev, 241void amdgpu_test_ring_sync(struct amdgpu_device *adev,
289 struct amdgpu_ring *ringA, 242 struct amdgpu_ring *ringA,
290 struct amdgpu_ring *ringB) 243 struct amdgpu_ring *ringB)
291{ 244{
292 struct fence *fence1 = NULL, *fence2 = NULL;
293 struct amdgpu_semaphore *semaphore = NULL;
294 int r;
295
296 r = amdgpu_semaphore_create(adev, &semaphore);
297 if (r) {
298 DRM_ERROR("Failed to create semaphore\n");
299 goto out_cleanup;
300 }
301
302 r = amdgpu_ring_lock(ringA, 64);
303 if (r) {
304 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
305 goto out_cleanup;
306 }
307 amdgpu_semaphore_emit_wait(ringA, semaphore);
308 amdgpu_ring_unlock_commit(ringA);
309
310 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
311 if (r)
312 goto out_cleanup;
313
314 r = amdgpu_ring_lock(ringA, 64);
315 if (r) {
316 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
317 goto out_cleanup;
318 }
319 amdgpu_semaphore_emit_wait(ringA, semaphore);
320 amdgpu_ring_unlock_commit(ringA);
321
322 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
323 if (r)
324 goto out_cleanup;
325
326 mdelay(1000);
327
328 if (fence_is_signaled(fence1)) {
329 DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
330 goto out_cleanup;
331 }
332
333 r = amdgpu_ring_lock(ringB, 64);
334 if (r) {
335 DRM_ERROR("Failed to lock ring B %p\n", ringB);
336 goto out_cleanup;
337 }
338 amdgpu_semaphore_emit_signal(ringB, semaphore);
339 amdgpu_ring_unlock_commit(ringB);
340
341 r = fence_wait(fence1, false);
342 if (r) {
343 DRM_ERROR("Failed to wait for sync fence 1\n");
344 goto out_cleanup;
345 }
346
347 mdelay(1000);
348
349 if (fence_is_signaled(fence2)) {
350 DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
351 goto out_cleanup;
352 }
353
354 r = amdgpu_ring_lock(ringB, 64);
355 if (r) {
356 DRM_ERROR("Failed to lock ring B %p\n", ringB);
357 goto out_cleanup;
358 }
359 amdgpu_semaphore_emit_signal(ringB, semaphore);
360 amdgpu_ring_unlock_commit(ringB);
361
362 r = fence_wait(fence2, false);
363 if (r) {
364 DRM_ERROR("Failed to wait for sync fence 1\n");
365 goto out_cleanup;
366 }
367
368out_cleanup:
369 amdgpu_semaphore_free(adev, &semaphore, NULL);
370
371 if (fence1)
372 fence_put(fence1);
373
374 if (fence2)
375 fence_put(fence2);
376
377 if (r)
378 printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
379} 245}
380 246
381static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, 247static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
383 struct amdgpu_ring *ringB, 249 struct amdgpu_ring *ringB,
384 struct amdgpu_ring *ringC) 250 struct amdgpu_ring *ringC)
385{ 251{
386 struct fence *fenceA = NULL, *fenceB = NULL;
387 struct amdgpu_semaphore *semaphore = NULL;
388 bool sigA, sigB;
389 int i, r;
390
391 r = amdgpu_semaphore_create(adev, &semaphore);
392 if (r) {
393 DRM_ERROR("Failed to create semaphore\n");
394 goto out_cleanup;
395 }
396
397 r = amdgpu_ring_lock(ringA, 64);
398 if (r) {
399 DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
400 goto out_cleanup;
401 }
402 amdgpu_semaphore_emit_wait(ringA, semaphore);
403 amdgpu_ring_unlock_commit(ringA);
404
405 r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
406 if (r)
407 goto out_cleanup;
408
409 r = amdgpu_ring_lock(ringB, 64);
410 if (r) {
411 DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
412 goto out_cleanup;
413 }
414 amdgpu_semaphore_emit_wait(ringB, semaphore);
415 amdgpu_ring_unlock_commit(ringB);
416 r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
417 if (r)
418 goto out_cleanup;
419
420 mdelay(1000);
421
422 if (fence_is_signaled(fenceA)) {
423 DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
424 goto out_cleanup;
425 }
426 if (fence_is_signaled(fenceB)) {
427 DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
428 goto out_cleanup;
429 }
430
431 r = amdgpu_ring_lock(ringC, 64);
432 if (r) {
433 DRM_ERROR("Failed to lock ring B %p\n", ringC);
434 goto out_cleanup;
435 }
436 amdgpu_semaphore_emit_signal(ringC, semaphore);
437 amdgpu_ring_unlock_commit(ringC);
438
439 for (i = 0; i < 30; ++i) {
440 mdelay(100);
441 sigA = fence_is_signaled(fenceA);
442 sigB = fence_is_signaled(fenceB);
443 if (sigA || sigB)
444 break;
445 }
446
447 if (!sigA && !sigB) {
448 DRM_ERROR("Neither fence A nor B has been signaled\n");
449 goto out_cleanup;
450 } else if (sigA && sigB) {
451 DRM_ERROR("Both fence A and B has been signaled\n");
452 goto out_cleanup;
453 }
454
455 DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
456
457 r = amdgpu_ring_lock(ringC, 64);
458 if (r) {
459 DRM_ERROR("Failed to lock ring B %p\n", ringC);
460 goto out_cleanup;
461 }
462 amdgpu_semaphore_emit_signal(ringC, semaphore);
463 amdgpu_ring_unlock_commit(ringC);
464
465 mdelay(1000);
466
467 r = fence_wait(fenceA, false);
468 if (r) {
469 DRM_ERROR("Failed to wait for sync fence A\n");
470 goto out_cleanup;
471 }
472 r = fence_wait(fenceB, false);
473 if (r) {
474 DRM_ERROR("Failed to wait for sync fence B\n");
475 goto out_cleanup;
476 }
477
478out_cleanup:
479 amdgpu_semaphore_free(adev, &semaphore, NULL);
480
481 if (fenceA)
482 fence_put(fenceA);
483
484 if (fenceB)
485 fence_put(fenceB);
486
487 if (r)
488 printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
489} 252}
490 253
491static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, 254static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8f9834ab1bd5..9ca3735c563c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
38 38
39 TP_fast_assign( 39 TP_fast_assign(
40 __entry->bo_list = p->bo_list; 40 __entry->bo_list = p->bo_list;
41 __entry->ring = p->ibs[i].ring->idx; 41 __entry->ring = p->job->ring->idx;
42 __entry->dw = p->ibs[i].length_dw; 42 __entry->dw = p->job->ibs[i].length_dw;
43 __entry->fences = amdgpu_fence_count_emitted( 43 __entry->fences = amdgpu_fence_count_emitted(
44 p->ibs[i].ring); 44 p->job->ring);
45 ), 45 ),
46 TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", 46 TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
47 __entry->bo_list, __entry->ring, __entry->dw, 47 __entry->bo_list, __entry->ring, __entry->dw,
@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
65 __entry->sched_job = &job->base; 65 __entry->sched_job = &job->base;
66 __entry->ib = job->ibs; 66 __entry->ib = job->ibs;
67 __entry->fence = &job->base.s_fence->base; 67 __entry->fence = &job->base.s_fence->base;
68 __entry->ring_name = job->ibs[0].ring->name; 68 __entry->ring_name = job->ring->name;
69 __entry->num_ibs = job->num_ibs; 69 __entry->num_ibs = job->num_ibs;
70 ), 70 ),
71 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", 71 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
90 __entry->sched_job = &job->base; 90 __entry->sched_job = &job->base;
91 __entry->ib = job->ibs; 91 __entry->ib = job->ibs;
92 __entry->fence = &job->base.s_fence->base; 92 __entry->fence = &job->base.s_fence->base;
93 __entry->ring_name = job->ibs[0].ring->name; 93 __entry->ring_name = job->ring->name;
94 __entry->num_ibs = job->num_ibs; 94 __entry->num_ibs = job->num_ibs;
95 ), 95 ),
96 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", 96 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job,
100 100
101 101
102TRACE_EVENT(amdgpu_vm_grab_id, 102TRACE_EVENT(amdgpu_vm_grab_id,
103 TP_PROTO(unsigned vmid, int ring), 103 TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring),
104 TP_ARGS(vmid, ring), 104 TP_ARGS(vm, vmid, ring),
105 TP_STRUCT__entry( 105 TP_STRUCT__entry(
106 __field(struct amdgpu_vm *, vm)
106 __field(u32, vmid) 107 __field(u32, vmid)
107 __field(u32, ring) 108 __field(u32, ring)
108 ), 109 ),
109 110
110 TP_fast_assign( 111 TP_fast_assign(
112 __entry->vm = vm;
111 __entry->vmid = vmid; 113 __entry->vmid = vmid;
112 __entry->ring = ring; 114 __entry->ring = ring;
113 ), 115 ),
114 TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) 116 TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid,
117 __entry->ring)
115); 118);
116 119
117TRACE_EVENT(amdgpu_vm_bo_map, 120TRACE_EVENT(amdgpu_vm_bo_map,
@@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
247 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) 250 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
248); 251);
249 252
250DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
251
252 TP_PROTO(int ring, struct amdgpu_semaphore *sem),
253
254 TP_ARGS(ring, sem),
255
256 TP_STRUCT__entry(
257 __field(int, ring)
258 __field(signed, waiters)
259 __field(uint64_t, gpu_addr)
260 ),
261
262 TP_fast_assign(
263 __entry->ring = ring;
264 __entry->waiters = sem->waiters;
265 __entry->gpu_addr = sem->gpu_addr;
266 ),
267
268 TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
269 __entry->waiters, __entry->gpu_addr)
270);
271
272DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
273
274 TP_PROTO(int ring, struct amdgpu_semaphore *sem),
275
276 TP_ARGS(ring, sem)
277);
278
279DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
280
281 TP_PROTO(int ring, struct amdgpu_semaphore *sem),
282
283 TP_ARGS(ring, sem)
284);
285
286#endif 253#endif
287 254
288/* This part must be outside protection */ 255/* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 55cf05e1c81c..e52fc641edfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
77static int amdgpu_ttm_global_init(struct amdgpu_device *adev) 77static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
78{ 78{
79 struct drm_global_reference *global_ref; 79 struct drm_global_reference *global_ref;
80 struct amdgpu_ring *ring;
81 struct amd_sched_rq *rq;
80 int r; 82 int r;
81 83
82 adev->mman.mem_global_referenced = false; 84 adev->mman.mem_global_referenced = false;
@@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
106 return r; 108 return r;
107 } 109 }
108 110
111 ring = adev->mman.buffer_funcs_ring;
112 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
113 r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
114 rq, amdgpu_sched_jobs);
115 if (r != 0) {
116 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
117 drm_global_item_unref(&adev->mman.mem_global_ref);
118 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
119 return r;
120 }
121
109 adev->mman.mem_global_referenced = true; 122 adev->mman.mem_global_referenced = true;
123
110 return 0; 124 return 0;
111} 125}
112 126
113static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) 127static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
114{ 128{
115 if (adev->mman.mem_global_referenced) { 129 if (adev->mman.mem_global_referenced) {
130 amd_sched_entity_fini(adev->mman.entity.sched,
131 &adev->mman.entity);
116 drm_global_item_unref(&adev->mman.bo_global_ref.ref); 132 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
117 drm_global_item_unref(&adev->mman.mem_global_ref); 133 drm_global_item_unref(&adev->mman.mem_global_ref);
118 adev->mman.mem_global_referenced = false; 134 adev->mman.mem_global_referenced = false;
@@ -499,9 +515,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
499 enum dma_data_direction direction = write ? 515 enum dma_data_direction direction = write ?
500 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 516 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
501 517
502 if (current->mm != gtt->usermm)
503 return -EPERM;
504
505 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 518 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
506 /* check that we only pin down anonymous memory 519 /* check that we only pin down anonymous memory
507 to prevent problems with writeback */ 520 to prevent problems with writeback */
@@ -773,14 +786,33 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
773 return 0; 786 return 0;
774} 787}
775 788
776bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm) 789struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
790{
791 struct amdgpu_ttm_tt *gtt = (void *)ttm;
792
793 if (gtt == NULL)
794 return NULL;
795
796 return gtt->usermm;
797}
798
799bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
800 unsigned long end)
777{ 801{
778 struct amdgpu_ttm_tt *gtt = (void *)ttm; 802 struct amdgpu_ttm_tt *gtt = (void *)ttm;
803 unsigned long size;
779 804
780 if (gtt == NULL) 805 if (gtt == NULL)
781 return false; 806 return false;
782 807
783 return !!gtt->userptr; 808 if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
809 return false;
810
811 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
812 if (gtt->userptr > end || gtt->userptr + size <= start)
813 return false;
814
815 return true;
784} 816}
785 817
786bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 818bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
@@ -996,9 +1028,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
996 struct fence **fence) 1028 struct fence **fence)
997{ 1029{
998 struct amdgpu_device *adev = ring->adev; 1030 struct amdgpu_device *adev = ring->adev;
1031 struct amdgpu_job *job;
1032
999 uint32_t max_bytes; 1033 uint32_t max_bytes;
1000 unsigned num_loops, num_dw; 1034 unsigned num_loops, num_dw;
1001 struct amdgpu_ib *ib;
1002 unsigned i; 1035 unsigned i;
1003 int r; 1036 int r;
1004 1037
@@ -1010,20 +1043,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
1010 while (num_dw & 0x7) 1043 while (num_dw & 0x7)
1011 num_dw++; 1044 num_dw++;
1012 1045
1013 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 1046 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
1014 if (!ib) 1047 if (r)
1015 return -ENOMEM;
1016
1017 r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
1018 if (r) {
1019 kfree(ib);
1020 return r; 1048 return r;
1021 }
1022
1023 ib->length_dw = 0;
1024 1049
1025 if (resv) { 1050 if (resv) {
1026 r = amdgpu_sync_resv(adev, &ib->sync, resv, 1051 r = amdgpu_sync_resv(adev, &job->sync, resv,
1027 AMDGPU_FENCE_OWNER_UNDEFINED); 1052 AMDGPU_FENCE_OWNER_UNDEFINED);
1028 if (r) { 1053 if (r) {
1029 DRM_ERROR("sync failed (%d).\n", r); 1054 DRM_ERROR("sync failed (%d).\n", r);
@@ -1034,31 +1059,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
1034 for (i = 0; i < num_loops; i++) { 1059 for (i = 0; i < num_loops; i++) {
1035 uint32_t cur_size_in_bytes = min(byte_count, max_bytes); 1060 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1036 1061
1037 amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, 1062 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1038 cur_size_in_bytes); 1063 dst_offset, cur_size_in_bytes);
1039 1064
1040 src_offset += cur_size_in_bytes; 1065 src_offset += cur_size_in_bytes;
1041 dst_offset += cur_size_in_bytes; 1066 dst_offset += cur_size_in_bytes;
1042 byte_count -= cur_size_in_bytes; 1067 byte_count -= cur_size_in_bytes;
1043 } 1068 }
1044 1069
1045 amdgpu_vm_pad_ib(adev, ib); 1070 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
1046 WARN_ON(ib->length_dw > num_dw); 1071 WARN_ON(job->ibs[0].length_dw > num_dw);
1047 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 1072 r = amdgpu_job_submit(job, ring, &adev->mman.entity,
1048 &amdgpu_vm_free_job, 1073 AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1049 AMDGPU_FENCE_OWNER_UNDEFINED,
1050 fence);
1051 if (r) 1074 if (r)
1052 goto error_free; 1075 goto error_free;
1053 1076
1054 if (!amdgpu_enable_scheduler) {
1055 amdgpu_ib_free(adev, ib);
1056 kfree(ib);
1057 }
1058 return 0; 1077 return 0;
1078
1059error_free: 1079error_free:
1060 amdgpu_ib_free(adev, ib); 1080 amdgpu_job_free(job);
1061 kfree(ib);
1062 return r; 1081 return r;
1063} 1082}
1064 1083
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 53f987aeeacf..1de82bf4fc79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -91,6 +91,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
91 91
92int amdgpu_uvd_sw_init(struct amdgpu_device *adev) 92int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
93{ 93{
94 struct amdgpu_ring *ring;
95 struct amd_sched_rq *rq;
94 unsigned long bo_size; 96 unsigned long bo_size;
95 const char *fw_name; 97 const char *fw_name;
96 const struct common_firmware_header *hdr; 98 const struct common_firmware_header *hdr;
@@ -191,6 +193,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
191 193
192 amdgpu_bo_unreserve(adev->uvd.vcpu_bo); 194 amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
193 195
196 ring = &adev->uvd.ring;
197 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
198 r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
199 rq, amdgpu_sched_jobs);
200 if (r != 0) {
201 DRM_ERROR("Failed setting up UVD run queue.\n");
202 return r;
203 }
204
194 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { 205 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
195 atomic_set(&adev->uvd.handles[i], 0); 206 atomic_set(&adev->uvd.handles[i], 0);
196 adev->uvd.filp[i] = NULL; 207 adev->uvd.filp[i] = NULL;
@@ -210,6 +221,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
210 if (adev->uvd.vcpu_bo == NULL) 221 if (adev->uvd.vcpu_bo == NULL)
211 return 0; 222 return 0;
212 223
224 amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
225
213 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); 226 r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
214 if (!r) { 227 if (!r) {
215 amdgpu_bo_kunmap(adev->uvd.vcpu_bo); 228 amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
@@ -241,7 +254,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
241 254
242 amdgpu_uvd_note_usage(adev); 255 amdgpu_uvd_note_usage(adev);
243 256
244 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 257 r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
245 if (r) { 258 if (r) {
246 DRM_ERROR("Error destroying UVD (%d)!\n", r); 259 DRM_ERROR("Error destroying UVD (%d)!\n", r);
247 continue; 260 continue;
@@ -295,7 +308,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
295 308
296 amdgpu_uvd_note_usage(adev); 309 amdgpu_uvd_note_usage(adev);
297 310
298 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); 311 r = amdgpu_uvd_get_destroy_msg(ring, handle,
312 false, &fence);
299 if (r) { 313 if (r) {
300 DRM_ERROR("Error destroying UVD (%d)!\n", r); 314 DRM_ERROR("Error destroying UVD (%d)!\n", r);
301 continue; 315 continue;
@@ -616,7 +630,6 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
616{ 630{
617 struct amdgpu_bo_va_mapping *mapping; 631 struct amdgpu_bo_va_mapping *mapping;
618 struct amdgpu_bo *bo; 632 struct amdgpu_bo *bo;
619 struct amdgpu_ib *ib;
620 uint32_t cmd, lo, hi; 633 uint32_t cmd, lo, hi;
621 uint64_t start, end; 634 uint64_t start, end;
622 uint64_t addr; 635 uint64_t addr;
@@ -638,9 +651,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
638 addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; 651 addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
639 start += addr; 652 start += addr;
640 653
641 ib = &ctx->parser->ibs[ctx->ib_idx]; 654 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
642 ib->ptr[ctx->data0] = start & 0xFFFFFFFF; 655 lower_32_bits(start));
643 ib->ptr[ctx->data1] = start >> 32; 656 amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
657 upper_32_bits(start));
644 658
645 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; 659 cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
646 if (cmd < 0x4) { 660 if (cmd < 0x4) {
@@ -702,7 +716,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
702static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, 716static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
703 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 717 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
704{ 718{
705 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 719 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
706 int i, r; 720 int i, r;
707 721
708 ctx->idx++; 722 ctx->idx++;
@@ -748,7 +762,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
748static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, 762static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
749 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) 763 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
750{ 764{
751 struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; 765 struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
752 int r; 766 int r;
753 767
754 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { 768 for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
@@ -790,7 +804,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
790 [0x00000003] = 2048, 804 [0x00000003] = 2048,
791 [0x00000004] = 0xFFFFFFFF, 805 [0x00000004] = 0xFFFFFFFF,
792 }; 806 };
793 struct amdgpu_ib *ib = &parser->ibs[ib_idx]; 807 struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
794 int r; 808 int r;
795 809
796 if (ib->length_dw % 16) { 810 if (ib->length_dw % 16) {
@@ -823,22 +837,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
823 return 0; 837 return 0;
824} 838}
825 839
826static int amdgpu_uvd_free_job( 840static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
827 struct amdgpu_job *job) 841 bool direct, struct fence **fence)
828{
829 amdgpu_ib_free(job->adev, job->ibs);
830 kfree(job->ibs);
831 return 0;
832}
833
834static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
835 struct amdgpu_bo *bo,
836 struct fence **fence)
837{ 842{
838 struct ttm_validate_buffer tv; 843 struct ttm_validate_buffer tv;
839 struct ww_acquire_ctx ticket; 844 struct ww_acquire_ctx ticket;
840 struct list_head head; 845 struct list_head head;
841 struct amdgpu_ib *ib = NULL; 846 struct amdgpu_job *job;
847 struct amdgpu_ib *ib;
842 struct fence *f = NULL; 848 struct fence *f = NULL;
843 struct amdgpu_device *adev = ring->adev; 849 struct amdgpu_device *adev = ring->adev;
844 uint64_t addr; 850 uint64_t addr;
@@ -862,15 +868,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
862 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 868 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
863 if (r) 869 if (r)
864 goto err; 870 goto err;
865 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 871
866 if (!ib) { 872 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
867 r = -ENOMEM;
868 goto err;
869 }
870 r = amdgpu_ib_get(ring, NULL, 64, ib);
871 if (r) 873 if (r)
872 goto err1; 874 goto err;
873 875
876 ib = &job->ibs[0];
874 addr = amdgpu_bo_gpu_offset(bo); 877 addr = amdgpu_bo_gpu_offset(bo);
875 ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); 878 ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
876 ib->ptr[1] = addr; 879 ib->ptr[1] = addr;
@@ -882,12 +885,19 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
882 ib->ptr[i] = PACKET2(0); 885 ib->ptr[i] = PACKET2(0);
883 ib->length_dw = 16; 886 ib->length_dw = 16;
884 887
885 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 888 if (direct) {
886 &amdgpu_uvd_free_job, 889 r = amdgpu_ib_schedule(ring, 1, ib,
887 AMDGPU_FENCE_OWNER_UNDEFINED, 890 AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f);
888 &f); 891 if (r)
889 if (r) 892 goto err_free;
890 goto err2; 893
894 amdgpu_job_free(job);
895 } else {
896 r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
897 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
898 if (r)
899 goto err_free;
900 }
891 901
892 ttm_eu_fence_buffer_objects(&ticket, &head, f); 902 ttm_eu_fence_buffer_objects(&ticket, &head, f);
893 903
@@ -895,16 +905,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
895 *fence = fence_get(f); 905 *fence = fence_get(f);
896 amdgpu_bo_unref(&bo); 906 amdgpu_bo_unref(&bo);
897 fence_put(f); 907 fence_put(f);
898 if (amdgpu_enable_scheduler)
899 return 0;
900 908
901 amdgpu_ib_free(ring->adev, ib);
902 kfree(ib);
903 return 0; 909 return 0;
904err2: 910
905 amdgpu_ib_free(ring->adev, ib); 911err_free:
906err1: 912 amdgpu_job_free(job);
907 kfree(ib); 913
908err: 914err:
909 ttm_eu_backoff_reservation(&ticket, &head); 915 ttm_eu_backoff_reservation(&ticket, &head);
910 return r; 916 return r;
@@ -959,11 +965,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
959 amdgpu_bo_kunmap(bo); 965 amdgpu_bo_kunmap(bo);
960 amdgpu_bo_unreserve(bo); 966 amdgpu_bo_unreserve(bo);
961 967
962 return amdgpu_uvd_send_msg(ring, bo, fence); 968 return amdgpu_uvd_send_msg(ring, bo, true, fence);
963} 969}
964 970
965int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 971int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
966 struct fence **fence) 972 bool direct, struct fence **fence)
967{ 973{
968 struct amdgpu_device *adev = ring->adev; 974 struct amdgpu_device *adev = ring->adev;
969 struct amdgpu_bo *bo; 975 struct amdgpu_bo *bo;
@@ -1001,7 +1007,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1001 amdgpu_bo_kunmap(bo); 1007 amdgpu_bo_kunmap(bo);
1002 amdgpu_bo_unreserve(bo); 1008 amdgpu_bo_unreserve(bo);
1003 1009
1004 return amdgpu_uvd_send_msg(ring, bo, fence); 1010 return amdgpu_uvd_send_msg(ring, bo, direct, fence);
1005} 1011}
1006 1012
1007static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1013static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 1724c2c86151..9a3b449081a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
31int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 31int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
32 struct fence **fence); 32 struct fence **fence);
33int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 33int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
34 struct fence **fence); 34 bool direct, struct fence **fence);
35void amdgpu_uvd_free_handles(struct amdgpu_device *adev, 35void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
36 struct drm_file *filp); 36 struct drm_file *filp);
37int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); 37int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a745eeeb5d82..39c3aa60381a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -74,6 +74,8 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
74 */ 74 */
75int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) 75int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
76{ 76{
77 struct amdgpu_ring *ring;
78 struct amd_sched_rq *rq;
77 const char *fw_name; 79 const char *fw_name;
78 const struct common_firmware_header *hdr; 80 const struct common_firmware_header *hdr;
79 unsigned ucode_version, version_major, version_minor, binary_id; 81 unsigned ucode_version, version_major, version_minor, binary_id;
@@ -170,6 +172,16 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
170 return r; 172 return r;
171 } 173 }
172 174
175
176 ring = &adev->vce.ring[0];
177 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
178 r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
179 rq, amdgpu_sched_jobs);
180 if (r != 0) {
181 DRM_ERROR("Failed setting up VCE run queue.\n");
182 return r;
183 }
184
173 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { 185 for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
174 atomic_set(&adev->vce.handles[i], 0); 186 atomic_set(&adev->vce.handles[i], 0);
175 adev->vce.filp[i] = NULL; 187 adev->vce.filp[i] = NULL;
@@ -190,6 +202,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
190 if (adev->vce.vcpu_bo == NULL) 202 if (adev->vce.vcpu_bo == NULL)
191 return 0; 203 return 0;
192 204
205 amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
206
193 amdgpu_bo_unref(&adev->vce.vcpu_bo); 207 amdgpu_bo_unref(&adev->vce.vcpu_bo);
194 208
195 amdgpu_ring_fini(&adev->vce.ring[0]); 209 amdgpu_ring_fini(&adev->vce.ring[0]);
@@ -337,7 +351,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
337 351
338 amdgpu_vce_note_usage(adev); 352 amdgpu_vce_note_usage(adev);
339 353
340 r = amdgpu_vce_get_destroy_msg(ring, handle, NULL); 354 r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
341 if (r) 355 if (r)
342 DRM_ERROR("Error destroying VCE handle (%d)!\n", r); 356 DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
343 357
@@ -346,14 +360,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
346 } 360 }
347} 361}
348 362
349static int amdgpu_vce_free_job(
350 struct amdgpu_job *job)
351{
352 amdgpu_ib_free(job->adev, job->ibs);
353 kfree(job->ibs);
354 return 0;
355}
356
357/** 363/**
358 * amdgpu_vce_get_create_msg - generate a VCE create msg 364 * amdgpu_vce_get_create_msg - generate a VCE create msg
359 * 365 *
@@ -368,21 +374,17 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
368 struct fence **fence) 374 struct fence **fence)
369{ 375{
370 const unsigned ib_size_dw = 1024; 376 const unsigned ib_size_dw = 1024;
371 struct amdgpu_ib *ib = NULL; 377 struct amdgpu_job *job;
378 struct amdgpu_ib *ib;
372 struct fence *f = NULL; 379 struct fence *f = NULL;
373 struct amdgpu_device *adev = ring->adev;
374 uint64_t dummy; 380 uint64_t dummy;
375 int i, r; 381 int i, r;
376 382
377 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 383 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
378 if (!ib) 384 if (r)
379 return -ENOMEM;
380 r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
381 if (r) {
382 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
383 kfree(ib);
384 return r; 385 return r;
385 } 386
387 ib = &job->ibs[0];
386 388
387 dummy = ib->gpu_addr + 1024; 389 dummy = ib->gpu_addr + 1024;
388 390
@@ -423,20 +425,19 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
423 for (i = ib->length_dw; i < ib_size_dw; ++i) 425 for (i = ib->length_dw; i < ib_size_dw; ++i)
424 ib->ptr[i] = 0x0; 426 ib->ptr[i] = 0x0;
425 427
426 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 428 r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED,
427 &amdgpu_vce_free_job, 429 NULL, &f);
428 AMDGPU_FENCE_OWNER_UNDEFINED,
429 &f);
430 if (r) 430 if (r)
431 goto err; 431 goto err;
432
433 amdgpu_job_free(job);
432 if (fence) 434 if (fence)
433 *fence = fence_get(f); 435 *fence = fence_get(f);
434 fence_put(f); 436 fence_put(f);
435 if (amdgpu_enable_scheduler) 437 return 0;
436 return 0; 438
437err: 439err:
438 amdgpu_ib_free(adev, ib); 440 amdgpu_job_free(job);
439 kfree(ib);
440 return r; 441 return r;
441} 442}
442 443
@@ -451,26 +452,20 @@ err:
451 * Close up a stream for HW test or if userspace failed to do so 452 * Close up a stream for HW test or if userspace failed to do so
452 */ 453 */
453int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 454int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
454 struct fence **fence) 455 bool direct, struct fence **fence)
455{ 456{
456 const unsigned ib_size_dw = 1024; 457 const unsigned ib_size_dw = 1024;
457 struct amdgpu_ib *ib = NULL; 458 struct amdgpu_job *job;
459 struct amdgpu_ib *ib;
458 struct fence *f = NULL; 460 struct fence *f = NULL;
459 struct amdgpu_device *adev = ring->adev;
460 uint64_t dummy; 461 uint64_t dummy;
461 int i, r; 462 int i, r;
462 463
463 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 464 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
464 if (!ib) 465 if (r)
465 return -ENOMEM;
466
467 r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
468 if (r) {
469 kfree(ib);
470 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
471 return r; 466 return r;
472 }
473 467
468 ib = &job->ibs[0];
474 dummy = ib->gpu_addr + 1024; 469 dummy = ib->gpu_addr + 1024;
475 470
476 /* stitch together an VCE destroy msg */ 471 /* stitch together an VCE destroy msg */
@@ -490,20 +485,29 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
490 485
491 for (i = ib->length_dw; i < ib_size_dw; ++i) 486 for (i = ib->length_dw; i < ib_size_dw; ++i)
492 ib->ptr[i] = 0x0; 487 ib->ptr[i] = 0x0;
493 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 488
494 &amdgpu_vce_free_job, 489 if (direct) {
495 AMDGPU_FENCE_OWNER_UNDEFINED, 490 r = amdgpu_ib_schedule(ring, 1, ib,
496 &f); 491 AMDGPU_FENCE_OWNER_UNDEFINED,
497 if (r) 492 NULL, &f);
498 goto err; 493 if (r)
494 goto err;
495
496 amdgpu_job_free(job);
497 } else {
498 r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
499 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
500 if (r)
501 goto err;
502 }
503
499 if (fence) 504 if (fence)
500 *fence = fence_get(f); 505 *fence = fence_get(f);
501 fence_put(f); 506 fence_put(f);
502 if (amdgpu_enable_scheduler) 507 return 0;
503 return 0; 508
504err: 509err:
505 amdgpu_ib_free(adev, ib); 510 amdgpu_job_free(job);
506 kfree(ib);
507 return r; 511 return r;
508} 512}
509 513
@@ -521,7 +525,6 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
521 int lo, int hi, unsigned size, uint32_t index) 525 int lo, int hi, unsigned size, uint32_t index)
522{ 526{
523 struct amdgpu_bo_va_mapping *mapping; 527 struct amdgpu_bo_va_mapping *mapping;
524 struct amdgpu_ib *ib = &p->ibs[ib_idx];
525 struct amdgpu_bo *bo; 528 struct amdgpu_bo *bo;
526 uint64_t addr; 529 uint64_t addr;
527 530
@@ -550,8 +553,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
550 addr += amdgpu_bo_gpu_offset(bo); 553 addr += amdgpu_bo_gpu_offset(bo);
551 addr -= ((uint64_t)size) * ((uint64_t)index); 554 addr -= ((uint64_t)size) * ((uint64_t)index);
552 555
553 ib->ptr[lo] = addr & 0xFFFFFFFF; 556 amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
554 ib->ptr[hi] = addr >> 32; 557 amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
555 558
556 return 0; 559 return 0;
557} 560}
@@ -606,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
606 */ 609 */
607int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) 610int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
608{ 611{
609 struct amdgpu_ib *ib = &p->ibs[ib_idx]; 612 struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
610 unsigned fb_idx = 0, bs_idx = 0; 613 unsigned fb_idx = 0, bs_idx = 0;
611 int session_idx = -1; 614 int session_idx = -1;
612 bool destroyed = false; 615 bool destroyed = false;
@@ -743,30 +746,6 @@ out:
743} 746}
744 747
745/** 748/**
746 * amdgpu_vce_ring_emit_semaphore - emit a semaphore command
747 *
748 * @ring: engine to use
749 * @semaphore: address of semaphore
750 * @emit_wait: true=emit wait, false=emit signal
751 *
752 */
753bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
754 struct amdgpu_semaphore *semaphore,
755 bool emit_wait)
756{
757 uint64_t addr = semaphore->gpu_addr;
758
759 amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE);
760 amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
761 amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
762 amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
763 if (!emit_wait)
764 amdgpu_ring_write(ring, VCE_CMD_END);
765
766 return true;
767}
768
769/**
770 * amdgpu_vce_ring_emit_ib - execute indirect buffer 749 * amdgpu_vce_ring_emit_ib - execute indirect buffer
771 * 750 *
772 * @ring: engine to use 751 * @ring: engine to use
@@ -814,14 +793,14 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
814 unsigned i; 793 unsigned i;
815 int r; 794 int r;
816 795
817 r = amdgpu_ring_lock(ring, 16); 796 r = amdgpu_ring_alloc(ring, 16);
818 if (r) { 797 if (r) {
819 DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", 798 DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
820 ring->idx, r); 799 ring->idx, r);
821 return r; 800 return r;
822 } 801 }
823 amdgpu_ring_write(ring, VCE_CMD_END); 802 amdgpu_ring_write(ring, VCE_CMD_END);
824 amdgpu_ring_unlock_commit(ring); 803 amdgpu_ring_commit(ring);
825 804
826 for (i = 0; i < adev->usec_timeout; i++) { 805 for (i = 0; i < adev->usec_timeout; i++) {
827 if (amdgpu_ring_get_rptr(ring) != rptr) 806 if (amdgpu_ring_get_rptr(ring) != rptr)
@@ -862,7 +841,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
862 goto error; 841 goto error;
863 } 842 }
864 843
865 r = amdgpu_vce_get_destroy_msg(ring, 1, &fence); 844 r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
866 if (r) { 845 if (r) {
867 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); 846 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
868 goto error; 847 goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ba2da8ee5906..ef99d2370182 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
31int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, 31int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
32 struct fence **fence); 32 struct fence **fence);
33int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, 33int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
34 struct fence **fence); 34 bool direct, struct fence **fence);
35void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); 35void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
36int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); 36int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
37bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
38 struct amdgpu_semaphore *semaphore,
39 bool emit_wait);
40void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); 37void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
41void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 38void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
42 unsigned flags); 39 unsigned flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9599f7559b3d..264c5968a1d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -55,7 +55,7 @@
55 * 55 *
56 * @adev: amdgpu_device pointer 56 * @adev: amdgpu_device pointer
57 * 57 *
58 * Calculate the number of page directory entries (cayman+). 58 * Calculate the number of page directory entries.
59 */ 59 */
60static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) 60static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
61{ 61{
@@ -67,7 +67,7 @@ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
67 * 67 *
68 * @adev: amdgpu_device pointer 68 * @adev: amdgpu_device pointer
69 * 69 *
70 * Calculate the size of the page directory in bytes (cayman+). 70 * Calculate the size of the page directory in bytes.
71 */ 71 */
72static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) 72static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
73{ 73{
@@ -89,8 +89,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
89 struct amdgpu_bo_list_entry *entry) 89 struct amdgpu_bo_list_entry *entry)
90{ 90{
91 entry->robj = vm->page_directory; 91 entry->robj = vm->page_directory;
92 entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
93 entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
94 entry->priority = 0; 92 entry->priority = 0;
95 entry->tv.bo = &vm->page_directory->tbo; 93 entry->tv.bo = &vm->page_directory->tbo;
96 entry->tv.shared = true; 94 entry->tv.shared = true;
@@ -154,29 +152,34 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
154 * @vm: vm to allocate id for 152 * @vm: vm to allocate id for
155 * @ring: ring we want to submit job to 153 * @ring: ring we want to submit job to
156 * @sync: sync object where we add dependencies 154 * @sync: sync object where we add dependencies
155 * @fence: fence protecting ID from reuse
157 * 156 *
158 * Allocate an id for the vm, adding fences to the sync obj as necessary. 157 * Allocate an id for the vm, adding fences to the sync obj as necessary.
159 *
160 * Global mutex must be locked!
161 */ 158 */
162int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 159int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
163 struct amdgpu_sync *sync) 160 struct amdgpu_sync *sync, struct fence *fence)
164{ 161{
165 struct fence *best[AMDGPU_MAX_RINGS] = {};
166 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 162 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
167 struct amdgpu_device *adev = ring->adev; 163 struct amdgpu_device *adev = ring->adev;
164 struct amdgpu_vm_manager_id *id;
165 int r;
168 166
169 unsigned choices[2] = {}; 167 mutex_lock(&adev->vm_manager.lock);
170 unsigned i;
171 168
172 /* check if the id is still valid */ 169 /* check if the id is still valid */
173 if (vm_id->id) { 170 if (vm_id->id) {
174 unsigned id = vm_id->id;
175 long owner; 171 long owner;
176 172
177 owner = atomic_long_read(&adev->vm_manager.ids[id].owner); 173 id = &adev->vm_manager.ids[vm_id->id];
174 owner = atomic_long_read(&id->owner);
178 if (owner == (long)vm) { 175 if (owner == (long)vm) {
179 trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); 176 list_move_tail(&id->list, &adev->vm_manager.ids_lru);
177 trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
178
179 fence_put(id->active);
180 id->active = fence_get(fence);
181
182 mutex_unlock(&adev->vm_manager.lock);
180 return 0; 183 return 0;
181 } 184 }
182 } 185 }
@@ -184,41 +187,24 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
184 /* we definately need to flush */ 187 /* we definately need to flush */
185 vm_id->pd_gpu_addr = ~0ll; 188 vm_id->pd_gpu_addr = ~0ll;
186 189
187 /* skip over VMID 0, since it is the system VM */ 190 id = list_first_entry(&adev->vm_manager.ids_lru,
188 for (i = 1; i < adev->vm_manager.nvm; ++i) { 191 struct amdgpu_vm_manager_id,
189 struct fence *fence = adev->vm_manager.ids[i].active; 192 list);
190 struct amdgpu_ring *fring; 193 list_move_tail(&id->list, &adev->vm_manager.ids_lru);
191 194 atomic_long_set(&id->owner, (long)vm);
192 if (fence == NULL) {
193 /* found a free one */
194 vm_id->id = i;
195 trace_amdgpu_vm_grab_id(i, ring->idx);
196 return 0;
197 }
198
199 fring = amdgpu_ring_from_fence(fence);
200 if (best[fring->idx] == NULL ||
201 fence_is_later(best[fring->idx], fence)) {
202 best[fring->idx] = fence;
203 choices[fring == ring ? 0 : 1] = i;
204 }
205 }
206 195
207 for (i = 0; i < 2; ++i) { 196 vm_id->id = id - adev->vm_manager.ids;
208 if (choices[i]) { 197 trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
209 struct fence *fence;
210 198
211 fence = adev->vm_manager.ids[choices[i]].active; 199 r = amdgpu_sync_fence(ring->adev, sync, id->active);
212 vm_id->id = choices[i];
213 200
214 trace_amdgpu_vm_grab_id(choices[i], ring->idx); 201 if (!r) {
215 return amdgpu_sync_fence(ring->adev, sync, fence); 202 fence_put(id->active);
216 } 203 id->active = fence_get(fence);
217 } 204 }
218 205
219 /* should never happen */ 206 mutex_unlock(&adev->vm_manager.lock);
220 BUG(); 207 return r;
221 return -EINVAL;
222} 208}
223 209
224/** 210/**
@@ -228,9 +214,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
228 * @vm: vm we want to flush 214 * @vm: vm we want to flush
229 * @updates: last vm update that we waited for 215 * @updates: last vm update that we waited for
230 * 216 *
231 * Flush the vm (cayman+). 217 * Flush the vm.
232 *
233 * Global and local mutex must be locked!
234 */ 218 */
235void amdgpu_vm_flush(struct amdgpu_ring *ring, 219void amdgpu_vm_flush(struct amdgpu_ring *ring,
236 struct amdgpu_vm *vm, 220 struct amdgpu_vm *vm,
@@ -260,36 +244,12 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
260} 244}
261 245
262/** 246/**
263 * amdgpu_vm_fence - remember fence for vm
264 *
265 * @adev: amdgpu_device pointer
266 * @vm: vm we want to fence
267 * @fence: fence to remember
268 *
269 * Fence the vm (cayman+).
270 * Set the fence used to protect page table and id.
271 *
272 * Global and local mutex must be locked!
273 */
274void amdgpu_vm_fence(struct amdgpu_device *adev,
275 struct amdgpu_vm *vm,
276 struct fence *fence)
277{
278 struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
279 unsigned vm_id = vm->ids[ring->idx].id;
280
281 fence_put(adev->vm_manager.ids[vm_id].active);
282 adev->vm_manager.ids[vm_id].active = fence_get(fence);
283 atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
284}
285
286/**
287 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 247 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
288 * 248 *
289 * @vm: requested vm 249 * @vm: requested vm
290 * @bo: requested buffer object 250 * @bo: requested buffer object
291 * 251 *
292 * Find @bo inside the requested vm (cayman+). 252 * Find @bo inside the requested vm.
293 * Search inside the @bos vm list for the requested vm 253 * Search inside the @bos vm list for the requested vm
294 * Returns the found bo_va or NULL if none is found 254 * Returns the found bo_va or NULL if none is found
295 * 255 *
@@ -312,32 +272,40 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
312 * amdgpu_vm_update_pages - helper to call the right asic function 272 * amdgpu_vm_update_pages - helper to call the right asic function
313 * 273 *
314 * @adev: amdgpu_device pointer 274 * @adev: amdgpu_device pointer
275 * @gtt: GART instance to use for mapping
276 * @gtt_flags: GTT hw access flags
315 * @ib: indirect buffer to fill with commands 277 * @ib: indirect buffer to fill with commands
316 * @pe: addr of the page entry 278 * @pe: addr of the page entry
317 * @addr: dst addr to write into pe 279 * @addr: dst addr to write into pe
318 * @count: number of page entries to update 280 * @count: number of page entries to update
319 * @incr: increase next addr by incr bytes 281 * @incr: increase next addr by incr bytes
320 * @flags: hw access flags 282 * @flags: hw access flags
321 * @gtt_flags: GTT hw access flags
322 * 283 *
323 * Traces the parameters and calls the right asic functions 284 * Traces the parameters and calls the right asic functions
324 * to setup the page table using the DMA. 285 * to setup the page table using the DMA.
325 */ 286 */
326static void amdgpu_vm_update_pages(struct amdgpu_device *adev, 287static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
288 struct amdgpu_gart *gtt,
289 uint32_t gtt_flags,
327 struct amdgpu_ib *ib, 290 struct amdgpu_ib *ib,
328 uint64_t pe, uint64_t addr, 291 uint64_t pe, uint64_t addr,
329 unsigned count, uint32_t incr, 292 unsigned count, uint32_t incr,
330 uint32_t flags, uint32_t gtt_flags) 293 uint32_t flags)
331{ 294{
332 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); 295 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
333 296
334 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 297 if ((gtt == &adev->gart) && (flags == gtt_flags)) {
335 uint64_t src = adev->gart.table_addr + (addr >> 12) * 8; 298 uint64_t src = gtt->table_addr + (addr >> 12) * 8;
336 amdgpu_vm_copy_pte(adev, ib, pe, src, count); 299 amdgpu_vm_copy_pte(adev, ib, pe, src, count);
337 300
338 } else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) { 301 } else if (gtt) {
339 amdgpu_vm_write_pte(adev, ib, pe, addr, 302 dma_addr_t *pages_addr = gtt->pages_addr;
340 count, incr, flags); 303 amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
304 count, incr, flags);
305
306 } else if (count < 3) {
307 amdgpu_vm_write_pte(adev, ib, NULL, pe, addr,
308 count, incr, flags);
341 309
342 } else { 310 } else {
343 amdgpu_vm_set_pte_pde(adev, ib, pe, addr, 311 amdgpu_vm_set_pte_pde(adev, ib, pe, addr,
@@ -345,15 +313,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
345 } 313 }
346} 314}
347 315
348int amdgpu_vm_free_job(struct amdgpu_job *job)
349{
350 int i;
351 for (i = 0; i < job->num_ibs; i++)
352 amdgpu_ib_free(job->adev, &job->ibs[i]);
353 kfree(job->ibs);
354 return 0;
355}
356
357/** 316/**
358 * amdgpu_vm_clear_bo - initially clear the page dir/table 317 * amdgpu_vm_clear_bo - initially clear the page dir/table
359 * 318 *
@@ -363,15 +322,18 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
363 * need to reserve bo first before calling it. 322 * need to reserve bo first before calling it.
364 */ 323 */
365static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 324static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
325 struct amdgpu_vm *vm,
366 struct amdgpu_bo *bo) 326 struct amdgpu_bo *bo)
367{ 327{
368 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 328 struct amdgpu_ring *ring;
369 struct fence *fence = NULL; 329 struct fence *fence = NULL;
370 struct amdgpu_ib *ib; 330 struct amdgpu_job *job;
371 unsigned entries; 331 unsigned entries;
372 uint64_t addr; 332 uint64_t addr;
373 int r; 333 int r;
374 334
335 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
336
375 r = reservation_object_reserve_shared(bo->tbo.resv); 337 r = reservation_object_reserve_shared(bo->tbo.resv);
376 if (r) 338 if (r)
377 return r; 339 return r;
@@ -383,56 +345,57 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
383 addr = amdgpu_bo_gpu_offset(bo); 345 addr = amdgpu_bo_gpu_offset(bo);
384 entries = amdgpu_bo_size(bo) / 8; 346 entries = amdgpu_bo_size(bo) / 8;
385 347
386 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 348 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
387 if (!ib) 349 if (r)
388 goto error; 350 goto error;
389 351
390 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); 352 amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
353 0, 0);
354 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
355
356 WARN_ON(job->ibs[0].length_dw > 64);
357 r = amdgpu_job_submit(job, ring, &vm->entity,
358 AMDGPU_FENCE_OWNER_VM, &fence);
391 if (r) 359 if (r)
392 goto error_free; 360 goto error_free;
393 361
394 ib->length_dw = 0; 362 amdgpu_bo_fence(bo, fence, true);
395
396 amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
397 amdgpu_vm_pad_ib(adev, ib);
398 WARN_ON(ib->length_dw > 64);
399 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
400 &amdgpu_vm_free_job,
401 AMDGPU_FENCE_OWNER_VM,
402 &fence);
403 if (!r)
404 amdgpu_bo_fence(bo, fence, true);
405 fence_put(fence); 363 fence_put(fence);
406 if (amdgpu_enable_scheduler) 364 return 0;
407 return 0;
408 365
409error_free: 366error_free:
410 amdgpu_ib_free(adev, ib); 367 amdgpu_job_free(job);
411 kfree(ib);
412 368
413error: 369error:
414 return r; 370 return r;
415} 371}
416 372
417/** 373/**
418 * amdgpu_vm_map_gart - get the physical address of a gart page 374 * amdgpu_vm_map_gart - Resolve gart mapping of addr
419 * 375 *
420 * @adev: amdgpu_device pointer 376 * @pages_addr: optional DMA address to use for lookup
421 * @addr: the unmapped addr 377 * @addr: the unmapped addr
422 * 378 *
423 * Look up the physical address of the page that the pte resolves 379 * Look up the physical address of the page that the pte resolves
424 * to (cayman+). 380 * to and return the pointer for the page table entry.
425 * Returns the physical address of the page.
426 */ 381 */
427uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) 382uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
428{ 383{
429 uint64_t result; 384 uint64_t result;
430 385
431 /* page table offset */ 386 if (pages_addr) {
432 result = adev->gart.pages_addr[addr >> PAGE_SHIFT]; 387 /* page table offset */
388 result = pages_addr[addr >> PAGE_SHIFT];
433 389
434 /* in case cpu page size != gpu page size*/ 390 /* in case cpu page size != gpu page size*/
435 result |= addr & (~PAGE_MASK); 391 result |= addr & (~PAGE_MASK);
392
393 } else {
394 /* No mapping required */
395 result = addr;
396 }
397
398 result &= 0xFFFFFFFFFFFFF000ULL;
436 399
437 return result; 400 return result;
438} 401}
@@ -446,45 +409,37 @@ uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr)
446 * @end: end of GPU address range 409 * @end: end of GPU address range
447 * 410 *
448 * Allocates new page tables if necessary 411 * Allocates new page tables if necessary
449 * and updates the page directory (cayman+). 412 * and updates the page directory.
450 * Returns 0 for success, error for failure. 413 * Returns 0 for success, error for failure.
451 *
452 * Global and local mutex must be locked!
453 */ 414 */
454int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, 415int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
455 struct amdgpu_vm *vm) 416 struct amdgpu_vm *vm)
456{ 417{
457 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 418 struct amdgpu_ring *ring;
458 struct amdgpu_bo *pd = vm->page_directory; 419 struct amdgpu_bo *pd = vm->page_directory;
459 uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); 420 uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
460 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; 421 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
461 uint64_t last_pde = ~0, last_pt = ~0; 422 uint64_t last_pde = ~0, last_pt = ~0;
462 unsigned count = 0, pt_idx, ndw; 423 unsigned count = 0, pt_idx, ndw;
424 struct amdgpu_job *job;
463 struct amdgpu_ib *ib; 425 struct amdgpu_ib *ib;
464 struct fence *fence = NULL; 426 struct fence *fence = NULL;
465 427
466 int r; 428 int r;
467 429
430 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
431
468 /* padding, etc. */ 432 /* padding, etc. */
469 ndw = 64; 433 ndw = 64;
470 434
471 /* assume the worst case */ 435 /* assume the worst case */
472 ndw += vm->max_pde_used * 6; 436 ndw += vm->max_pde_used * 6;
473 437
474 /* update too big for an IB */ 438 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
475 if (ndw > 0xfffff) 439 if (r)
476 return -ENOMEM;
477
478 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
479 if (!ib)
480 return -ENOMEM;
481
482 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
483 if (r) {
484 kfree(ib);
485 return r; 440 return r;
486 } 441
487 ib->length_dw = 0; 442 ib = &job->ibs[0];
488 443
489 /* walk over the address space and update the page directory */ 444 /* walk over the address space and update the page directory */
490 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 445 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -504,9 +459,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
504 ((last_pt + incr * count) != pt)) { 459 ((last_pt + incr * count) != pt)) {
505 460
506 if (count) { 461 if (count) {
507 amdgpu_vm_update_pages(adev, ib, last_pde, 462 amdgpu_vm_update_pages(adev, NULL, 0, ib,
508 last_pt, count, incr, 463 last_pde, last_pt,
509 AMDGPU_PTE_VALID, 0); 464 count, incr,
465 AMDGPU_PTE_VALID);
510 } 466 }
511 467
512 count = 1; 468 count = 1;
@@ -518,17 +474,16 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
518 } 474 }
519 475
520 if (count) 476 if (count)
521 amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, 477 amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
522 incr, AMDGPU_PTE_VALID, 0); 478 count, incr, AMDGPU_PTE_VALID);
523 479
524 if (ib->length_dw != 0) { 480 if (ib->length_dw != 0) {
525 amdgpu_vm_pad_ib(adev, ib); 481 amdgpu_ring_pad_ib(ring, ib);
526 amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); 482 amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
483 AMDGPU_FENCE_OWNER_VM);
527 WARN_ON(ib->length_dw > ndw); 484 WARN_ON(ib->length_dw > ndw);
528 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 485 r = amdgpu_job_submit(job, ring, &vm->entity,
529 &amdgpu_vm_free_job, 486 AMDGPU_FENCE_OWNER_VM, &fence);
530 AMDGPU_FENCE_OWNER_VM,
531 &fence);
532 if (r) 487 if (r)
533 goto error_free; 488 goto error_free;
534 489
@@ -536,18 +491,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
536 fence_put(vm->page_directory_fence); 491 fence_put(vm->page_directory_fence);
537 vm->page_directory_fence = fence_get(fence); 492 vm->page_directory_fence = fence_get(fence);
538 fence_put(fence); 493 fence_put(fence);
539 }
540 494
541 if (!amdgpu_enable_scheduler || ib->length_dw == 0) { 495 } else {
542 amdgpu_ib_free(adev, ib); 496 amdgpu_job_free(job);
543 kfree(ib);
544 } 497 }
545 498
546 return 0; 499 return 0;
547 500
548error_free: 501error_free:
549 amdgpu_ib_free(adev, ib); 502 amdgpu_job_free(job);
550 kfree(ib);
551 return r; 503 return r;
552} 504}
553 505
@@ -555,20 +507,20 @@ error_free:
555 * amdgpu_vm_frag_ptes - add fragment information to PTEs 507 * amdgpu_vm_frag_ptes - add fragment information to PTEs
556 * 508 *
557 * @adev: amdgpu_device pointer 509 * @adev: amdgpu_device pointer
510 * @gtt: GART instance to use for mapping
511 * @gtt_flags: GTT hw mapping flags
558 * @ib: IB for the update 512 * @ib: IB for the update
559 * @pe_start: first PTE to handle 513 * @pe_start: first PTE to handle
560 * @pe_end: last PTE to handle 514 * @pe_end: last PTE to handle
561 * @addr: addr those PTEs should point to 515 * @addr: addr those PTEs should point to
562 * @flags: hw mapping flags 516 * @flags: hw mapping flags
563 * @gtt_flags: GTT hw mapping flags
564 *
565 * Global and local mutex must be locked!
566 */ 517 */
567static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, 518static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
519 struct amdgpu_gart *gtt,
520 uint32_t gtt_flags,
568 struct amdgpu_ib *ib, 521 struct amdgpu_ib *ib,
569 uint64_t pe_start, uint64_t pe_end, 522 uint64_t pe_start, uint64_t pe_end,
570 uint64_t addr, uint32_t flags, 523 uint64_t addr, uint32_t flags)
571 uint32_t gtt_flags)
572{ 524{
573 /** 525 /**
574 * The MC L1 TLB supports variable sized pages, based on a fragment 526 * The MC L1 TLB supports variable sized pages, based on a fragment
@@ -598,36 +550,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
598 550
599 unsigned count; 551 unsigned count;
600 552
553 /* Abort early if there isn't anything to do */
554 if (pe_start == pe_end)
555 return;
556
601 /* system pages are non continuously */ 557 /* system pages are non continuously */
602 if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) || 558 if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
603 (frag_start >= frag_end)) {
604 559
605 count = (pe_end - pe_start) / 8; 560 count = (pe_end - pe_start) / 8;
606 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 561 amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
607 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 562 addr, count, AMDGPU_GPU_PAGE_SIZE,
563 flags);
608 return; 564 return;
609 } 565 }
610 566
611 /* handle the 4K area at the beginning */ 567 /* handle the 4K area at the beginning */
612 if (pe_start != frag_start) { 568 if (pe_start != frag_start) {
613 count = (frag_start - pe_start) / 8; 569 count = (frag_start - pe_start) / 8;
614 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 570 amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
615 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 571 count, AMDGPU_GPU_PAGE_SIZE, flags);
616 addr += AMDGPU_GPU_PAGE_SIZE * count; 572 addr += AMDGPU_GPU_PAGE_SIZE * count;
617 } 573 }
618 574
619 /* handle the area in the middle */ 575 /* handle the area in the middle */
620 count = (frag_end - frag_start) / 8; 576 count = (frag_end - frag_start) / 8;
621 amdgpu_vm_update_pages(adev, ib, frag_start, addr, count, 577 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
622 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags, 578 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
623 gtt_flags);
624 579
625 /* handle the 4K area at the end */ 580 /* handle the 4K area at the end */
626 if (frag_end != pe_end) { 581 if (frag_end != pe_end) {
627 addr += AMDGPU_GPU_PAGE_SIZE * count; 582 addr += AMDGPU_GPU_PAGE_SIZE * count;
628 count = (pe_end - frag_end) / 8; 583 count = (pe_end - frag_end) / 8;
629 amdgpu_vm_update_pages(adev, ib, frag_end, addr, count, 584 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
630 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 585 count, AMDGPU_GPU_PAGE_SIZE, flags);
631 } 586 }
632} 587}
633 588
@@ -635,122 +590,105 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
635 * amdgpu_vm_update_ptes - make sure that page tables are valid 590 * amdgpu_vm_update_ptes - make sure that page tables are valid
636 * 591 *
637 * @adev: amdgpu_device pointer 592 * @adev: amdgpu_device pointer
593 * @gtt: GART instance to use for mapping
594 * @gtt_flags: GTT hw mapping flags
638 * @vm: requested vm 595 * @vm: requested vm
639 * @start: start of GPU address range 596 * @start: start of GPU address range
640 * @end: end of GPU address range 597 * @end: end of GPU address range
641 * @dst: destination address to map to 598 * @dst: destination address to map to
642 * @flags: mapping flags 599 * @flags: mapping flags
643 * 600 *
644 * Update the page tables in the range @start - @end (cayman+). 601 * Update the page tables in the range @start - @end.
645 *
646 * Global and local mutex must be locked!
647 */ 602 */
648static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, 603static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
649 struct amdgpu_vm *vm, 604 struct amdgpu_gart *gtt,
650 struct amdgpu_ib *ib, 605 uint32_t gtt_flags,
651 uint64_t start, uint64_t end, 606 struct amdgpu_vm *vm,
652 uint64_t dst, uint32_t flags, 607 struct amdgpu_ib *ib,
653 uint32_t gtt_flags) 608 uint64_t start, uint64_t end,
609 uint64_t dst, uint32_t flags)
654{ 610{
655 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 611 const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
656 uint64_t last_pte = ~0, last_dst = ~0;
657 void *owner = AMDGPU_FENCE_OWNER_VM;
658 unsigned count = 0;
659 uint64_t addr;
660 612
661 /* sync to everything on unmapping */ 613 uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
662 if (!(flags & AMDGPU_PTE_VALID)) 614 uint64_t addr;
663 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
664 615
665 /* walk over the address space and update the page tables */ 616 /* walk over the address space and update the page tables */
666 for (addr = start; addr < end; ) { 617 for (addr = start; addr < end; ) {
667 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 618 uint64_t pt_idx = addr >> amdgpu_vm_block_size;
668 struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; 619 struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
669 unsigned nptes; 620 unsigned nptes;
670 uint64_t pte; 621 uint64_t pe_start;
671 int r;
672
673 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
674 r = reservation_object_reserve_shared(pt->tbo.resv);
675 if (r)
676 return r;
677 622
678 if ((addr & ~mask) == (end & ~mask)) 623 if ((addr & ~mask) == (end & ~mask))
679 nptes = end - addr; 624 nptes = end - addr;
680 else 625 else
681 nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); 626 nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
682 627
683 pte = amdgpu_bo_gpu_offset(pt); 628 pe_start = amdgpu_bo_gpu_offset(pt);
684 pte += (addr & mask) * 8; 629 pe_start += (addr & mask) * 8;
685 630
686 if ((last_pte + 8 * count) != pte) { 631 if (last_pe_end != pe_start) {
687 632
688 if (count) { 633 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
689 amdgpu_vm_frag_ptes(adev, ib, last_pte, 634 last_pe_start, last_pe_end,
690 last_pte + 8 * count, 635 last_dst, flags);
691 last_dst, flags,
692 gtt_flags);
693 }
694 636
695 count = nptes; 637 last_pe_start = pe_start;
696 last_pte = pte; 638 last_pe_end = pe_start + 8 * nptes;
697 last_dst = dst; 639 last_dst = dst;
698 } else { 640 } else {
699 count += nptes; 641 last_pe_end += 8 * nptes;
700 } 642 }
701 643
702 addr += nptes; 644 addr += nptes;
703 dst += nptes * AMDGPU_GPU_PAGE_SIZE; 645 dst += nptes * AMDGPU_GPU_PAGE_SIZE;
704 } 646 }
705 647
706 if (count) { 648 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
707 amdgpu_vm_frag_ptes(adev, ib, last_pte, 649 last_pe_start, last_pe_end,
708 last_pte + 8 * count, 650 last_dst, flags);
709 last_dst, flags, gtt_flags);
710 }
711
712 return 0;
713} 651}
714 652
715/** 653/**
716 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 654 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
717 * 655 *
718 * @adev: amdgpu_device pointer 656 * @adev: amdgpu_device pointer
657 * @gtt: GART instance to use for mapping
658 * @gtt_flags: flags as they are used for GTT
719 * @vm: requested vm 659 * @vm: requested vm
720 * @mapping: mapped range and flags to use for the update 660 * @start: start of mapped range
661 * @last: last mapped entry
662 * @flags: flags for the entries
721 * @addr: addr to set the area to 663 * @addr: addr to set the area to
722 * @gtt_flags: flags as they are used for GTT
723 * @fence: optional resulting fence 664 * @fence: optional resulting fence
724 * 665 *
725 * Fill in the page table entries for @mapping. 666 * Fill in the page table entries between @start and @last.
726 * Returns 0 for success, -EINVAL for failure. 667 * Returns 0 for success, -EINVAL for failure.
727 *
728 * Object have to be reserved and mutex must be locked!
729 */ 668 */
730static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 669static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
670 struct amdgpu_gart *gtt,
671 uint32_t gtt_flags,
731 struct amdgpu_vm *vm, 672 struct amdgpu_vm *vm,
732 struct amdgpu_bo_va_mapping *mapping, 673 uint64_t start, uint64_t last,
733 uint64_t addr, uint32_t gtt_flags, 674 uint32_t flags, uint64_t addr,
734 struct fence **fence) 675 struct fence **fence)
735{ 676{
736 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 677 struct amdgpu_ring *ring;
678 void *owner = AMDGPU_FENCE_OWNER_VM;
737 unsigned nptes, ncmds, ndw; 679 unsigned nptes, ncmds, ndw;
738 uint32_t flags = gtt_flags; 680 struct amdgpu_job *job;
739 struct amdgpu_ib *ib; 681 struct amdgpu_ib *ib;
740 struct fence *f = NULL; 682 struct fence *f = NULL;
741 int r; 683 int r;
742 684
743 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 685 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
744 * but in case of something, we filter the flags in first place
745 */
746 if (!(mapping->flags & AMDGPU_PTE_READABLE))
747 flags &= ~AMDGPU_PTE_READABLE;
748 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
749 flags &= ~AMDGPU_PTE_WRITEABLE;
750 686
751 trace_amdgpu_vm_bo_update(mapping); 687 /* sync to everything on unmapping */
688 if (!(flags & AMDGPU_PTE_VALID))
689 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
752 690
753 nptes = mapping->it.last - mapping->it.start + 1; 691 nptes = last - start + 1;
754 692
755 /* 693 /*
756 * reserve space for one command every (1 << BLOCK_SIZE) 694 * reserve space for one command every (1 << BLOCK_SIZE)
@@ -761,11 +699,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
761 /* padding, etc. */ 699 /* padding, etc. */
762 ndw = 64; 700 ndw = 64;
763 701
764 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 702 if ((gtt == &adev->gart) && (flags == gtt_flags)) {
765 /* only copy commands needed */ 703 /* only copy commands needed */
766 ndw += ncmds * 7; 704 ndw += ncmds * 7;
767 705
768 } else if (flags & AMDGPU_PTE_SYSTEM) { 706 } else if (gtt) {
769 /* header for write data commands */ 707 /* header for write data commands */
770 ndw += ncmds * 4; 708 ndw += ncmds * 4;
771 709
@@ -780,38 +718,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
780 ndw += 2 * 10; 718 ndw += 2 * 10;
781 } 719 }
782 720
783 /* update too big for an IB */ 721 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
784 if (ndw > 0xfffff) 722 if (r)
785 return -ENOMEM;
786
787 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
788 if (!ib)
789 return -ENOMEM;
790
791 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
792 if (r) {
793 kfree(ib);
794 return r; 723 return r;
795 }
796 724
797 ib->length_dw = 0; 725 ib = &job->ibs[0];
798 726
799 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, 727 r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
800 mapping->it.last + 1, addr + mapping->offset, 728 owner);
801 flags, gtt_flags); 729 if (r)
730 goto error_free;
802 731
803 if (r) { 732 r = reservation_object_reserve_shared(vm->page_directory->tbo.resv);
804 amdgpu_ib_free(adev, ib); 733 if (r)
805 kfree(ib); 734 goto error_free;
806 return r; 735
807 } 736 amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
737 addr, flags);
808 738
809 amdgpu_vm_pad_ib(adev, ib); 739 amdgpu_ring_pad_ib(ring, ib);
810 WARN_ON(ib->length_dw > ndw); 740 WARN_ON(ib->length_dw > ndw);
811 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 741 r = amdgpu_job_submit(job, ring, &vm->entity,
812 &amdgpu_vm_free_job, 742 AMDGPU_FENCE_OWNER_VM, &f);
813 AMDGPU_FENCE_OWNER_VM,
814 &f);
815 if (r) 743 if (r)
816 goto error_free; 744 goto error_free;
817 745
@@ -821,19 +749,76 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
821 *fence = fence_get(f); 749 *fence = fence_get(f);
822 } 750 }
823 fence_put(f); 751 fence_put(f);
824 if (!amdgpu_enable_scheduler) {
825 amdgpu_ib_free(adev, ib);
826 kfree(ib);
827 }
828 return 0; 752 return 0;
829 753
830error_free: 754error_free:
831 amdgpu_ib_free(adev, ib); 755 amdgpu_job_free(job);
832 kfree(ib);
833 return r; 756 return r;
834} 757}
835 758
836/** 759/**
760 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
761 *
762 * @adev: amdgpu_device pointer
763 * @gtt: GART instance to use for mapping
764 * @vm: requested vm
765 * @mapping: mapped range and flags to use for the update
766 * @addr: addr to set the area to
767 * @gtt_flags: flags as they are used for GTT
768 * @fence: optional resulting fence
769 *
770 * Split the mapping into smaller chunks so that each update fits
771 * into a SDMA IB.
772 * Returns 0 for success, -EINVAL for failure.
773 */
774static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
775 struct amdgpu_gart *gtt,
776 uint32_t gtt_flags,
777 struct amdgpu_vm *vm,
778 struct amdgpu_bo_va_mapping *mapping,
779 uint64_t addr, struct fence **fence)
780{
781 const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
782
783 uint64_t start = mapping->it.start;
784 uint32_t flags = gtt_flags;
785 int r;
786
787 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
788 * but in case of something, we filter the flags in first place
789 */
790 if (!(mapping->flags & AMDGPU_PTE_READABLE))
791 flags &= ~AMDGPU_PTE_READABLE;
792 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
793 flags &= ~AMDGPU_PTE_WRITEABLE;
794
795 trace_amdgpu_vm_bo_update(mapping);
796
797 addr += mapping->offset;
798
799 if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
800 return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
801 start, mapping->it.last,
802 flags, addr, fence);
803
804 while (start != mapping->it.last + 1) {
805 uint64_t last;
806
807 last = min((uint64_t)mapping->it.last, start + max_size);
808 r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
809 start, last, flags, addr,
810 fence);
811 if (r)
812 return r;
813
814 start = last + 1;
815 addr += max_size;
816 }
817
818 return 0;
819}
820
821/**
837 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 822 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
838 * 823 *
839 * @adev: amdgpu_device pointer 824 * @adev: amdgpu_device pointer
@@ -851,14 +836,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
851{ 836{
852 struct amdgpu_vm *vm = bo_va->vm; 837 struct amdgpu_vm *vm = bo_va->vm;
853 struct amdgpu_bo_va_mapping *mapping; 838 struct amdgpu_bo_va_mapping *mapping;
839 struct amdgpu_gart *gtt = NULL;
854 uint32_t flags; 840 uint32_t flags;
855 uint64_t addr; 841 uint64_t addr;
856 int r; 842 int r;
857 843
858 if (mem) { 844 if (mem) {
859 addr = (u64)mem->start << PAGE_SHIFT; 845 addr = (u64)mem->start << PAGE_SHIFT;
860 if (mem->mem_type != TTM_PL_TT) 846 switch (mem->mem_type) {
847 case TTM_PL_TT:
848 gtt = &bo_va->bo->adev->gart;
849 break;
850
851 case TTM_PL_VRAM:
861 addr += adev->vm_manager.vram_base_offset; 852 addr += adev->vm_manager.vram_base_offset;
853 break;
854
855 default:
856 break;
857 }
862 } else { 858 } else {
863 addr = 0; 859 addr = 0;
864 } 860 }
@@ -871,8 +867,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
871 spin_unlock(&vm->status_lock); 867 spin_unlock(&vm->status_lock);
872 868
873 list_for_each_entry(mapping, &bo_va->invalids, list) { 869 list_for_each_entry(mapping, &bo_va->invalids, list) {
874 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, 870 r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
875 flags, &bo_va->last_pt_update); 871 &bo_va->last_pt_update);
876 if (r) 872 if (r)
877 return r; 873 return r;
878 } 874 }
@@ -918,7 +914,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
918 struct amdgpu_bo_va_mapping, list); 914 struct amdgpu_bo_va_mapping, list);
919 list_del(&mapping->list); 915 list_del(&mapping->list);
920 spin_unlock(&vm->freed_lock); 916 spin_unlock(&vm->freed_lock);
921 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); 917 r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
918 0, NULL);
922 kfree(mapping); 919 kfree(mapping);
923 if (r) 920 if (r)
924 return r; 921 return r;
@@ -976,7 +973,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
976 * @vm: requested vm 973 * @vm: requested vm
977 * @bo: amdgpu buffer object 974 * @bo: amdgpu buffer object
978 * 975 *
979 * Add @bo into the requested vm (cayman+). 976 * Add @bo into the requested vm.
980 * Add @bo to the list of bos associated with the vm 977 * Add @bo to the list of bos associated with the vm
981 * Returns newly added bo_va or NULL for failure 978 * Returns newly added bo_va or NULL for failure
982 * 979 *
@@ -1117,15 +1114,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1117 */ 1114 */
1118 pt->parent = amdgpu_bo_ref(vm->page_directory); 1115 pt->parent = amdgpu_bo_ref(vm->page_directory);
1119 1116
1120 r = amdgpu_vm_clear_bo(adev, pt); 1117 r = amdgpu_vm_clear_bo(adev, vm, pt);
1121 if (r) { 1118 if (r) {
1122 amdgpu_bo_unref(&pt); 1119 amdgpu_bo_unref(&pt);
1123 goto error_free; 1120 goto error_free;
1124 } 1121 }
1125 1122
1126 entry->robj = pt; 1123 entry->robj = pt;
1127 entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
1128 entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
1129 entry->priority = 0; 1124 entry->priority = 0;
1130 entry->tv.bo = &entry->robj->tbo; 1125 entry->tv.bo = &entry->robj->tbo;
1131 entry->tv.shared = true; 1126 entry->tv.shared = true;
@@ -1210,7 +1205,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1210 * @adev: amdgpu_device pointer 1205 * @adev: amdgpu_device pointer
1211 * @bo_va: requested bo_va 1206 * @bo_va: requested bo_va
1212 * 1207 *
1213 * Remove @bo_va->bo from the requested vm (cayman+). 1208 * Remove @bo_va->bo from the requested vm.
1214 * 1209 *
1215 * Object have to be reserved! 1210 * Object have to be reserved!
1216 */ 1211 */
@@ -1255,7 +1250,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1255 * @vm: requested vm 1250 * @vm: requested vm
1256 * @bo: amdgpu buffer object 1251 * @bo: amdgpu buffer object
1257 * 1252 *
1258 * Mark @bo as invalid (cayman+). 1253 * Mark @bo as invalid.
1259 */ 1254 */
1260void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 1255void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1261 struct amdgpu_bo *bo) 1256 struct amdgpu_bo *bo)
@@ -1276,13 +1271,16 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1276 * @adev: amdgpu_device pointer 1271 * @adev: amdgpu_device pointer
1277 * @vm: requested vm 1272 * @vm: requested vm
1278 * 1273 *
1279 * Init @vm fields (cayman+). 1274 * Init @vm fields.
1280 */ 1275 */
1281int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1276int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1282{ 1277{
1283 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 1278 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
1284 AMDGPU_VM_PTE_COUNT * 8); 1279 AMDGPU_VM_PTE_COUNT * 8);
1285 unsigned pd_size, pd_entries; 1280 unsigned pd_size, pd_entries;
1281 unsigned ring_instance;
1282 struct amdgpu_ring *ring;
1283 struct amd_sched_rq *rq;
1286 int i, r; 1284 int i, r;
1287 1285
1288 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1286 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1306,6 +1304,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1306 return -ENOMEM; 1304 return -ENOMEM;
1307 } 1305 }
1308 1306
1307 /* create scheduler entity for page table updates */
1308
1309 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
1310 ring_instance %= adev->vm_manager.vm_pte_num_rings;
1311 ring = adev->vm_manager.vm_pte_rings[ring_instance];
1312 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
1313 r = amd_sched_entity_init(&ring->sched, &vm->entity,
1314 rq, amdgpu_sched_jobs);
1315 if (r)
1316 return r;
1317
1309 vm->page_directory_fence = NULL; 1318 vm->page_directory_fence = NULL;
1310 1319
1311 r = amdgpu_bo_create(adev, pd_size, align, true, 1320 r = amdgpu_bo_create(adev, pd_size, align, true,
@@ -1313,22 +1322,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1313 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1322 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
1314 NULL, NULL, &vm->page_directory); 1323 NULL, NULL, &vm->page_directory);
1315 if (r) 1324 if (r)
1316 return r; 1325 goto error_free_sched_entity;
1326
1317 r = amdgpu_bo_reserve(vm->page_directory, false); 1327 r = amdgpu_bo_reserve(vm->page_directory, false);
1318 if (r) { 1328 if (r)
1319 amdgpu_bo_unref(&vm->page_directory); 1329 goto error_free_page_directory;
1320 vm->page_directory = NULL; 1330
1321 return r; 1331 r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory);
1322 }
1323 r = amdgpu_vm_clear_bo(adev, vm->page_directory);
1324 amdgpu_bo_unreserve(vm->page_directory); 1332 amdgpu_bo_unreserve(vm->page_directory);
1325 if (r) { 1333 if (r)
1326 amdgpu_bo_unref(&vm->page_directory); 1334 goto error_free_page_directory;
1327 vm->page_directory = NULL;
1328 return r;
1329 }
1330 1335
1331 return 0; 1336 return 0;
1337
1338error_free_page_directory:
1339 amdgpu_bo_unref(&vm->page_directory);
1340 vm->page_directory = NULL;
1341
1342error_free_sched_entity:
1343 amd_sched_entity_fini(&ring->sched, &vm->entity);
1344
1345 return r;
1332} 1346}
1333 1347
1334/** 1348/**
@@ -1337,7 +1351,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1337 * @adev: amdgpu_device pointer 1351 * @adev: amdgpu_device pointer
1338 * @vm: requested vm 1352 * @vm: requested vm
1339 * 1353 *
1340 * Tear down @vm (cayman+). 1354 * Tear down @vm.
1341 * Unbind the VM and remove all bos from the vm bo list 1355 * Unbind the VM and remove all bos from the vm bo list
1342 */ 1356 */
1343void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1357void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
@@ -1345,6 +1359,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1345 struct amdgpu_bo_va_mapping *mapping, *tmp; 1359 struct amdgpu_bo_va_mapping *mapping, *tmp;
1346 int i; 1360 int i;
1347 1361
1362 amd_sched_entity_fini(vm->entity.sched, &vm->entity);
1363
1348 if (!RB_EMPTY_ROOT(&vm->va)) { 1364 if (!RB_EMPTY_ROOT(&vm->va)) {
1349 dev_err(adev->dev, "still active bo inside vm\n"); 1365 dev_err(adev->dev, "still active bo inside vm\n");
1350 } 1366 }
@@ -1375,6 +1391,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1375} 1391}
1376 1392
1377/** 1393/**
1394 * amdgpu_vm_manager_init - init the VM manager
1395 *
1396 * @adev: amdgpu_device pointer
1397 *
1398 * Initialize the VM manager structures
1399 */
1400void amdgpu_vm_manager_init(struct amdgpu_device *adev)
1401{
1402 unsigned i;
1403
1404 INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
1405
1406 /* skip over VMID 0, since it is the system VM */
1407 for (i = 1; i < adev->vm_manager.num_ids; ++i)
1408 list_add_tail(&adev->vm_manager.ids[i].list,
1409 &adev->vm_manager.ids_lru);
1410
1411 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
1412}
1413
1414/**
1378 * amdgpu_vm_manager_fini - cleanup VM manager 1415 * amdgpu_vm_manager_fini - cleanup VM manager
1379 * 1416 *
1380 * @adev: amdgpu_device pointer 1417 * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index fd9c9588ef46..6b1f0539ce9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1059,257 +1059,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
1059 return -EINVAL; 1059 return -EINVAL;
1060} 1060}
1061 1061
1062static void cik_print_gpu_status_regs(struct amdgpu_device *adev)
1063{
1064 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
1065 RREG32(mmGRBM_STATUS));
1066 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
1067 RREG32(mmGRBM_STATUS2));
1068 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
1069 RREG32(mmGRBM_STATUS_SE0));
1070 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
1071 RREG32(mmGRBM_STATUS_SE1));
1072 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
1073 RREG32(mmGRBM_STATUS_SE2));
1074 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
1075 RREG32(mmGRBM_STATUS_SE3));
1076 dev_info(adev->dev, " SRBM_STATUS=0x%08X\n",
1077 RREG32(mmSRBM_STATUS));
1078 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
1079 RREG32(mmSRBM_STATUS2));
1080 dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
1081 RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
1082 dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
1083 RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
1084 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
1085 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
1086 RREG32(mmCP_STALLED_STAT1));
1087 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
1088 RREG32(mmCP_STALLED_STAT2));
1089 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
1090 RREG32(mmCP_STALLED_STAT3));
1091 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
1092 RREG32(mmCP_CPF_BUSY_STAT));
1093 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
1094 RREG32(mmCP_CPF_STALLED_STAT1));
1095 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
1096 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
1097 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
1098 RREG32(mmCP_CPC_STALLED_STAT1));
1099 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
1100}
1101
1102/**
1103 * cik_gpu_check_soft_reset - check which blocks are busy
1104 *
1105 * @adev: amdgpu_device pointer
1106 *
1107 * Check which blocks are busy and return the relevant reset
1108 * mask to be used by cik_gpu_soft_reset().
1109 * Returns a mask of the blocks to be reset.
1110 */
1111u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev)
1112{
1113 u32 reset_mask = 0;
1114 u32 tmp;
1115
1116 /* GRBM_STATUS */
1117 tmp = RREG32(mmGRBM_STATUS);
1118 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
1119 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
1120 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
1121 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
1122 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
1123 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
1124 reset_mask |= AMDGPU_RESET_GFX;
1125
1126 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
1127 reset_mask |= AMDGPU_RESET_CP;
1128
1129 /* GRBM_STATUS2 */
1130 tmp = RREG32(mmGRBM_STATUS2);
1131 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
1132 reset_mask |= AMDGPU_RESET_RLC;
1133
1134 /* SDMA0_STATUS_REG */
1135 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
1136 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1137 reset_mask |= AMDGPU_RESET_DMA;
1138
1139 /* SDMA1_STATUS_REG */
1140 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
1141 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1142 reset_mask |= AMDGPU_RESET_DMA1;
1143
1144 /* SRBM_STATUS2 */
1145 tmp = RREG32(mmSRBM_STATUS2);
1146 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
1147 reset_mask |= AMDGPU_RESET_DMA;
1148
1149 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
1150 reset_mask |= AMDGPU_RESET_DMA1;
1151
1152 /* SRBM_STATUS */
1153 tmp = RREG32(mmSRBM_STATUS);
1154
1155 if (tmp & SRBM_STATUS__IH_BUSY_MASK)
1156 reset_mask |= AMDGPU_RESET_IH;
1157
1158 if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
1159 reset_mask |= AMDGPU_RESET_SEM;
1160
1161 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
1162 reset_mask |= AMDGPU_RESET_GRBM;
1163
1164 if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
1165 reset_mask |= AMDGPU_RESET_VMC;
1166
1167 if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
1168 SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
1169 reset_mask |= AMDGPU_RESET_MC;
1170
1171 if (amdgpu_display_is_display_hung(adev))
1172 reset_mask |= AMDGPU_RESET_DISPLAY;
1173
1174 /* Skip MC reset as it's mostly likely not hung, just busy */
1175 if (reset_mask & AMDGPU_RESET_MC) {
1176 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1177 reset_mask &= ~AMDGPU_RESET_MC;
1178 }
1179
1180 return reset_mask;
1181}
1182
1183/**
1184 * cik_gpu_soft_reset - soft reset GPU
1185 *
1186 * @adev: amdgpu_device pointer
1187 * @reset_mask: mask of which blocks to reset
1188 *
1189 * Soft reset the blocks specified in @reset_mask.
1190 */
1191static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
1192{
1193 struct amdgpu_mode_mc_save save;
1194 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1195 u32 tmp;
1196
1197 if (reset_mask == 0)
1198 return;
1199
1200 dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1201
1202 cik_print_gpu_status_regs(adev);
1203 dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
1204 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
1205 dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1206 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
1207
1208 /* disable CG/PG */
1209
1210 /* stop the rlc */
1211 gfx_v7_0_rlc_stop(adev);
1212
1213 /* Disable GFX parsing/prefetching */
1214 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
1215
1216 /* Disable MEC parsing/prefetching */
1217 WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
1218
1219 if (reset_mask & AMDGPU_RESET_DMA) {
1220 /* sdma0 */
1221 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
1222 tmp |= SDMA0_F32_CNTL__HALT_MASK;
1223 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
1224 }
1225 if (reset_mask & AMDGPU_RESET_DMA1) {
1226 /* sdma1 */
1227 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
1228 tmp |= SDMA0_F32_CNTL__HALT_MASK;
1229 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
1230 }
1231
1232 gmc_v7_0_mc_stop(adev, &save);
1233 if (amdgpu_asic_wait_for_mc_idle(adev)) {
1234 dev_warn(adev->dev, "Wait for MC idle timedout !\n");
1235 }
1236
1237 if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP))
1238 grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
1239 GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
1240
1241 if (reset_mask & AMDGPU_RESET_CP) {
1242 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
1243
1244 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
1245 }
1246
1247 if (reset_mask & AMDGPU_RESET_DMA)
1248 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
1249
1250 if (reset_mask & AMDGPU_RESET_DMA1)
1251 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
1252
1253 if (reset_mask & AMDGPU_RESET_DISPLAY)
1254 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
1255
1256 if (reset_mask & AMDGPU_RESET_RLC)
1257 grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
1258
1259 if (reset_mask & AMDGPU_RESET_SEM)
1260 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK;
1261
1262 if (reset_mask & AMDGPU_RESET_IH)
1263 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
1264
1265 if (reset_mask & AMDGPU_RESET_GRBM)
1266 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
1267
1268 if (reset_mask & AMDGPU_RESET_VMC)
1269 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK;
1270
1271 if (!(adev->flags & AMD_IS_APU)) {
1272 if (reset_mask & AMDGPU_RESET_MC)
1273 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK;
1274 }
1275
1276 if (grbm_soft_reset) {
1277 tmp = RREG32(mmGRBM_SOFT_RESET);
1278 tmp |= grbm_soft_reset;
1279 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1280 WREG32(mmGRBM_SOFT_RESET, tmp);
1281 tmp = RREG32(mmGRBM_SOFT_RESET);
1282
1283 udelay(50);
1284
1285 tmp &= ~grbm_soft_reset;
1286 WREG32(mmGRBM_SOFT_RESET, tmp);
1287 tmp = RREG32(mmGRBM_SOFT_RESET);
1288 }
1289
1290 if (srbm_soft_reset) {
1291 tmp = RREG32(mmSRBM_SOFT_RESET);
1292 tmp |= srbm_soft_reset;
1293 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1294 WREG32(mmSRBM_SOFT_RESET, tmp);
1295 tmp = RREG32(mmSRBM_SOFT_RESET);
1296
1297 udelay(50);
1298
1299 tmp &= ~srbm_soft_reset;
1300 WREG32(mmSRBM_SOFT_RESET, tmp);
1301 tmp = RREG32(mmSRBM_SOFT_RESET);
1302 }
1303
1304 /* Wait a little for things to settle down */
1305 udelay(50);
1306
1307 gmc_v7_0_mc_resume(adev, &save);
1308 udelay(50);
1309
1310 cik_print_gpu_status_regs(adev);
1311}
1312
1313struct kv_reset_save_regs { 1062struct kv_reset_save_regs {
1314 u32 gmcon_reng_execute; 1063 u32 gmcon_reng_execute;
1315 u32 gmcon_misc; 1064 u32 gmcon_misc;
@@ -1405,45 +1154,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
1405 1154
1406static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) 1155static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
1407{ 1156{
1408 struct amdgpu_mode_mc_save save;
1409 struct kv_reset_save_regs kv_save = { 0 }; 1157 struct kv_reset_save_regs kv_save = { 0 };
1410 u32 tmp, i; 1158 u32 i;
1411 1159
1412 dev_info(adev->dev, "GPU pci config reset\n"); 1160 dev_info(adev->dev, "GPU pci config reset\n");
1413 1161
1414 /* disable dpm? */
1415
1416 /* disable cg/pg */
1417
1418 /* Disable GFX parsing/prefetching */
1419 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
1420 CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
1421
1422 /* Disable MEC parsing/prefetching */
1423 WREG32(mmCP_MEC_CNTL,
1424 CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
1425
1426 /* sdma0 */
1427 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
1428 tmp |= SDMA0_F32_CNTL__HALT_MASK;
1429 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
1430 /* sdma1 */
1431 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
1432 tmp |= SDMA0_F32_CNTL__HALT_MASK;
1433 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
1434 /* XXX other engines? */
1435
1436 /* halt the rlc, disable cp internal ints */
1437 gfx_v7_0_rlc_stop(adev);
1438
1439 udelay(50);
1440
1441 /* disable mem access */
1442 gmc_v7_0_mc_stop(adev, &save);
1443 if (amdgpu_asic_wait_for_mc_idle(adev)) {
1444 dev_warn(adev->dev, "Wait for MC idle timed out !\n");
1445 }
1446
1447 if (adev->flags & AMD_IS_APU) 1162 if (adev->flags & AMD_IS_APU)
1448 kv_save_regs_for_reset(adev, &kv_save); 1163 kv_save_regs_for_reset(adev, &kv_save);
1449 1164
@@ -1489,26 +1204,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
1489 */ 1204 */
1490static int cik_asic_reset(struct amdgpu_device *adev) 1205static int cik_asic_reset(struct amdgpu_device *adev)
1491{ 1206{
1492 u32 reset_mask; 1207 cik_set_bios_scratch_engine_hung(adev, true);
1493
1494 reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
1495
1496 if (reset_mask)
1497 cik_set_bios_scratch_engine_hung(adev, true);
1498
1499 /* try soft reset */
1500 cik_gpu_soft_reset(adev, reset_mask);
1501
1502 reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
1503
1504 /* try pci config reset */
1505 if (reset_mask && amdgpu_hard_reset)
1506 cik_gpu_pci_config_reset(adev);
1507 1208
1508 reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); 1209 cik_gpu_pci_config_reset(adev);
1509 1210
1510 if (!reset_mask) 1211 cik_set_bios_scratch_engine_hung(adev, false);
1511 cik_set_bios_scratch_engine_hung(adev, false);
1512 1212
1513 return 0; 1213 return 0;
1514} 1214}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 5f712ceddf08..675f34916aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -295,30 +295,6 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
295} 295}
296 296
297/** 297/**
298 * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring
299 *
300 * @ring: amdgpu_ring structure holding ring information
301 * @semaphore: amdgpu semaphore object
302 * @emit_wait: wait or signal semaphore
303 *
304 * Add a DMA semaphore packet to the ring wait on or signal
305 * other rings (CIK).
306 */
307static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring,
308 struct amdgpu_semaphore *semaphore,
309 bool emit_wait)
310{
311 u64 addr = semaphore->gpu_addr;
312 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
313
314 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
315 amdgpu_ring_write(ring, addr & 0xfffffff8);
316 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
317
318 return true;
319}
320
321/**
322 * cik_sdma_gfx_stop - stop the gfx async dma engines 298 * cik_sdma_gfx_stop - stop the gfx async dma engines
323 * 299 *
324 * @adev: amdgpu_device pointer 300 * @adev: amdgpu_device pointer
@@ -417,6 +393,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
417 cik_srbm_select(adev, 0, 0, 0, 0); 393 cik_srbm_select(adev, 0, 0, 0, 0);
418 mutex_unlock(&adev->srbm_mutex); 394 mutex_unlock(&adev->srbm_mutex);
419 395
396 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
397 adev->gfx.config.gb_addr_config & 0x70);
398
420 WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); 399 WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
421 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 400 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
422 401
@@ -584,7 +563,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
584 tmp = 0xCAFEDEAD; 563 tmp = 0xCAFEDEAD;
585 adev->wb.wb[index] = cpu_to_le32(tmp); 564 adev->wb.wb[index] = cpu_to_le32(tmp);
586 565
587 r = amdgpu_ring_lock(ring, 5); 566 r = amdgpu_ring_alloc(ring, 5);
588 if (r) { 567 if (r) {
589 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 568 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
590 amdgpu_wb_free(adev, index); 569 amdgpu_wb_free(adev, index);
@@ -595,7 +574,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
595 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 574 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
596 amdgpu_ring_write(ring, 1); /* number of DWs to follow */ 575 amdgpu_ring_write(ring, 1); /* number of DWs to follow */
597 amdgpu_ring_write(ring, 0xDEADBEEF); 576 amdgpu_ring_write(ring, 0xDEADBEEF);
598 amdgpu_ring_unlock_commit(ring); 577 amdgpu_ring_commit(ring);
599 578
600 for (i = 0; i < adev->usec_timeout; i++) { 579 for (i = 0; i < adev->usec_timeout; i++) {
601 tmp = le32_to_cpu(adev->wb.wb[index]); 580 tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -645,7 +624,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
645 tmp = 0xCAFEDEAD; 624 tmp = 0xCAFEDEAD;
646 adev->wb.wb[index] = cpu_to_le32(tmp); 625 adev->wb.wb[index] = cpu_to_le32(tmp);
647 memset(&ib, 0, sizeof(ib)); 626 memset(&ib, 0, sizeof(ib));
648 r = amdgpu_ib_get(ring, NULL, 256, &ib); 627 r = amdgpu_ib_get(adev, NULL, 256, &ib);
649 if (r) { 628 if (r) {
650 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 629 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
651 goto err0; 630 goto err0;
@@ -657,9 +636,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
657 ib.ptr[3] = 1; 636 ib.ptr[3] = 1;
658 ib.ptr[4] = 0xDEADBEEF; 637 ib.ptr[4] = 0xDEADBEEF;
659 ib.length_dw = 5; 638 ib.length_dw = 5;
660 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 639 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
661 AMDGPU_FENCE_OWNER_UNDEFINED, 640 NULL, &f);
662 &f);
663 if (r) 641 if (r)
664 goto err1; 642 goto err1;
665 643
@@ -738,7 +716,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
738 * Update PTEs by writing them manually using sDMA (CIK). 716 * Update PTEs by writing them manually using sDMA (CIK).
739 */ 717 */
740static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, 718static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
741 uint64_t pe, 719 const dma_addr_t *pages_addr, uint64_t pe,
742 uint64_t addr, unsigned count, 720 uint64_t addr, unsigned count,
743 uint32_t incr, uint32_t flags) 721 uint32_t incr, uint32_t flags)
744{ 722{
@@ -757,14 +735,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
757 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 735 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
758 ib->ptr[ib->length_dw++] = ndw; 736 ib->ptr[ib->length_dw++] = ndw;
759 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 737 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
760 if (flags & AMDGPU_PTE_SYSTEM) { 738 value = amdgpu_vm_map_gart(pages_addr, addr);
761 value = amdgpu_vm_map_gart(ib->ring->adev, addr);
762 value &= 0xFFFFFFFFFFFFF000ULL;
763 } else if (flags & AMDGPU_PTE_VALID) {
764 value = addr;
765 } else {
766 value = 0;
767 }
768 addr += incr; 739 addr += incr;
769 value |= flags; 740 value |= flags;
770 ib->ptr[ib->length_dw++] = value; 741 ib->ptr[ib->length_dw++] = value;
@@ -827,9 +798,9 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
827 * @ib: indirect buffer to fill with padding 798 * @ib: indirect buffer to fill with padding
828 * 799 *
829 */ 800 */
830static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) 801static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
831{ 802{
832 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); 803 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
833 u32 pad_count; 804 u32 pad_count;
834 int i; 805 int i;
835 806
@@ -1097,6 +1068,8 @@ static void cik_sdma_print_status(void *handle)
1097 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); 1068 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
1098 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", 1069 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
1099 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); 1070 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
1071 dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
1072 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
1100 mutex_lock(&adev->srbm_mutex); 1073 mutex_lock(&adev->srbm_mutex);
1101 for (j = 0; j < 16; j++) { 1074 for (j = 0; j < 16; j++) {
1102 cik_srbm_select(adev, 0, 0, 0, j); 1075 cik_srbm_select(adev, 0, 0, 0, j);
@@ -1297,12 +1270,12 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1297 .parse_cs = NULL, 1270 .parse_cs = NULL,
1298 .emit_ib = cik_sdma_ring_emit_ib, 1271 .emit_ib = cik_sdma_ring_emit_ib,
1299 .emit_fence = cik_sdma_ring_emit_fence, 1272 .emit_fence = cik_sdma_ring_emit_fence,
1300 .emit_semaphore = cik_sdma_ring_emit_semaphore,
1301 .emit_vm_flush = cik_sdma_ring_emit_vm_flush, 1273 .emit_vm_flush = cik_sdma_ring_emit_vm_flush,
1302 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, 1274 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
1303 .test_ring = cik_sdma_ring_test_ring, 1275 .test_ring = cik_sdma_ring_test_ring,
1304 .test_ib = cik_sdma_ring_test_ib, 1276 .test_ib = cik_sdma_ring_test_ib,
1305 .insert_nop = cik_sdma_ring_insert_nop, 1277 .insert_nop = cik_sdma_ring_insert_nop,
1278 .pad_ib = cik_sdma_ring_pad_ib,
1306}; 1279};
1307 1280
1308static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) 1281static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1399,14 +1372,18 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
1399 .copy_pte = cik_sdma_vm_copy_pte, 1372 .copy_pte = cik_sdma_vm_copy_pte,
1400 .write_pte = cik_sdma_vm_write_pte, 1373 .write_pte = cik_sdma_vm_write_pte,
1401 .set_pte_pde = cik_sdma_vm_set_pte_pde, 1374 .set_pte_pde = cik_sdma_vm_set_pte_pde,
1402 .pad_ib = cik_sdma_vm_pad_ib,
1403}; 1375};
1404 1376
1405static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) 1377static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
1406{ 1378{
1379 unsigned i;
1380
1407 if (adev->vm_manager.vm_pte_funcs == NULL) { 1381 if (adev->vm_manager.vm_pte_funcs == NULL) {
1408 adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; 1382 adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
1409 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; 1383 for (i = 0; i < adev->sdma.num_instances; i++)
1410 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1384 adev->vm_manager.vm_pte_rings[i] =
1385 &adev->sdma.instance[i].ring;
1386
1387 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1411 } 1388 }
1412} 1389}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 34830189311e..e3ff809a0cae 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
2670 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2670 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2671 2671
2672 drm_crtc_cleanup(crtc); 2672 drm_crtc_cleanup(crtc);
2673 destroy_workqueue(amdgpu_crtc->pflip_queue);
2674 kfree(amdgpu_crtc); 2673 kfree(amdgpu_crtc);
2675} 2674}
2676 2675
@@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
2890 2889
2891 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); 2890 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
2892 amdgpu_crtc->crtc_id = index; 2891 amdgpu_crtc->crtc_id = index;
2893 amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
2894 adev->mode_info.crtcs[index] = amdgpu_crtc; 2892 adev->mode_info.crtcs[index] = amdgpu_crtc;
2895 2893
2896 amdgpu_crtc->max_cursor_width = 128; 2894 amdgpu_crtc->max_cursor_width = 128;
@@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
3366 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3364 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3367 3365
3368 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3366 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3369 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3367 schedule_work(&works->unpin_work);
3370 3368
3371 return 0; 3369 return 0;
3372} 3370}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 36deea162779..6b6c9b6879ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
2661 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2661 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2662 2662
2663 drm_crtc_cleanup(crtc); 2663 drm_crtc_cleanup(crtc);
2664 destroy_workqueue(amdgpu_crtc->pflip_queue);
2665 kfree(amdgpu_crtc); 2664 kfree(amdgpu_crtc);
2666} 2665}
2667 2666
@@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
2881 2880
2882 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); 2881 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
2883 amdgpu_crtc->crtc_id = index; 2882 amdgpu_crtc->crtc_id = index;
2884 amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
2885 adev->mode_info.crtcs[index] = amdgpu_crtc; 2883 adev->mode_info.crtcs[index] = amdgpu_crtc;
2886 2884
2887 amdgpu_crtc->max_cursor_width = 128; 2885 amdgpu_crtc->max_cursor_width = 128;
@@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
3361 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3359 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3362 3360
3363 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3361 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3364 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3362 schedule_work(&works->unpin_work);
3365 3363
3366 return 0; 3364 return 0;
3367} 3365}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 25dd8b668ea5..56bea36a6b18 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
2582 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2582 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2583 2583
2584 drm_crtc_cleanup(crtc); 2584 drm_crtc_cleanup(crtc);
2585 destroy_workqueue(amdgpu_crtc->pflip_queue);
2586 kfree(amdgpu_crtc); 2585 kfree(amdgpu_crtc);
2587} 2586}
2588 2587
@@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
2809 2808
2810 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); 2809 drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
2811 amdgpu_crtc->crtc_id = index; 2810 amdgpu_crtc->crtc_id = index;
2812 amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
2813 adev->mode_info.crtcs[index] = amdgpu_crtc; 2811 adev->mode_info.crtcs[index] = amdgpu_crtc;
2814 2812
2815 amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH; 2813 amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
@@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
3375 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3373 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3376 3374
3377 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3375 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3378 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3376 schedule_work(&works->unpin_work);
3379 3377
3380 return 0; 3378 return 0;
3381} 3379}
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index e35340afd3db..b336c918d6a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -272,6 +272,12 @@ static int fiji_smu_upload_firmware_image(struct amdgpu_device *adev)
272 if (!adev->pm.fw) 272 if (!adev->pm.fw)
273 return -EINVAL; 273 return -EINVAL;
274 274
275 /* Skip SMC ucode loading on SR-IOV capable boards.
276 * vbios does this for us in asic_init in that case.
277 */
278 if (adev->virtualization.supports_sr_iov)
279 return 0;
280
275 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; 281 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
276 amdgpu_ucode_print_smc_hdr(&hdr->header); 282 amdgpu_ucode_print_smc_hdr(&hdr->header);
277 283
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 6c76139de1c9..250bcbce7fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -31,8 +31,6 @@
31#include "amdgpu_ucode.h" 31#include "amdgpu_ucode.h"
32#include "clearstate_ci.h" 32#include "clearstate_ci.h"
33 33
34#include "uvd/uvd_4_2_d.h"
35
36#include "dce/dce_8_0_d.h" 34#include "dce/dce_8_0_d.h"
37#include "dce/dce_8_0_sh_mask.h" 35#include "dce/dce_8_0_sh_mask.h"
38 36
@@ -1006,9 +1004,15 @@ out:
1006 */ 1004 */
1007static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) 1005static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1008{ 1006{
1009 const u32 num_tile_mode_states = 32; 1007 const u32 num_tile_mode_states =
1010 const u32 num_secondary_tile_mode_states = 16; 1008 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1011 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 1009 const u32 num_secondary_tile_mode_states =
1010 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1011 u32 reg_offset, split_equal_to_row_size;
1012 uint32_t *tile, *macrotile;
1013
1014 tile = adev->gfx.config.tile_mode_array;
1015 macrotile = adev->gfx.config.macrotile_mode_array;
1012 1016
1013 switch (adev->gfx.config.mem_row_size_in_kb) { 1017 switch (adev->gfx.config.mem_row_size_in_kb) {
1014 case 1: 1018 case 1:
@@ -1023,832 +1027,531 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
1023 break; 1027 break;
1024 } 1028 }
1025 1029
1030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1031 tile[reg_offset] = 0;
1032 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1033 macrotile[reg_offset] = 0;
1034
1026 switch (adev->asic_type) { 1035 switch (adev->asic_type) {
1027 case CHIP_BONAIRE: 1036 case CHIP_BONAIRE:
1028 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1037 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1029 switch (reg_offset) { 1038 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1030 case 0: 1039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1032 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1041 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 1042 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1035 break; 1044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1036 case 1: 1045 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1046 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1038 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 1048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1049 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1041 break; 1050 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1042 case 2: 1051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1043 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1044 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1053 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1054 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1047 break; 1056 TILE_SPLIT(split_equal_to_row_size));
1048 case 3: 1057 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1049 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1058 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1050 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1060 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1061 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1053 break; 1062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1054 case 4: 1063 TILE_SPLIT(split_equal_to_row_size));
1055 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1064 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1056 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1065 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1057 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1066 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1058 TILE_SPLIT(split_equal_to_row_size)); 1067 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1059 break; 1068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1060 case 5: 1069 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1061 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1070 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1062 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1064 break; 1073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1065 case 6: 1074 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1066 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1075 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1067 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1076 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1069 TILE_SPLIT(split_equal_to_row_size)); 1078 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1070 break; 1079 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1071 case 7: 1080 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1072 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1081 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1073 break; 1082 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1074 1083 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1075 case 8: 1084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1076 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1077 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 1086 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1078 break; 1087 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1079 case 9: 1088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1080 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1081 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1090 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1082 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1083 break; 1092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1084 case 10: 1093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1085 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1094 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1086 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1095 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1096 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1097 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1089 break; 1098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1090 case 11: 1099 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1091 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1100 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1092 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1101 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1093 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1102 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1095 break; 1104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1096 case 12: 1105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1097 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1106 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1098 break; 1107 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1099 case 13: 1108 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1100 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1101 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1110 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1111 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1103 break; 1112 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1104 case 14: 1113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1105 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1114 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1106 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1115 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1116 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1117 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1109 break; 1118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1110 case 15: 1119 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1111 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 1120 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1112 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1121 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1123 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1115 break; 1124 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1116 case 16: 1125 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1117 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1118 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1127 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1119 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1128 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1129 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1121 break; 1130 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1122 case 17: 1131 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1123 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1124 break; 1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1125 case 18: 1134 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1126 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1135 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1127 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1136 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1128 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1138 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1130 break; 1139
1131 case 19: 1140 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1132 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1133 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1143 NUM_BANKS(ADDR_SURF_16_BANK));
1135 break; 1144 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 case 20: 1145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1137 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1147 NUM_BANKS(ADDR_SURF_16_BANK));
1139 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1148 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1141 break; 1150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1142 case 21: 1151 NUM_BANKS(ADDR_SURF_16_BANK));
1143 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 1152 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1155 NUM_BANKS(ADDR_SURF_16_BANK));
1147 break; 1156 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1148 case 22: 1157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1149 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1150 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1159 NUM_BANKS(ADDR_SURF_16_BANK));
1151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1160 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1153 break; 1162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1154 case 23: 1163 NUM_BANKS(ADDR_SURF_8_BANK));
1155 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1164 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1156 break; 1165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1157 case 24: 1166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1167 NUM_BANKS(ADDR_SURF_4_BANK));
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1168 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1162 break; 1171 NUM_BANKS(ADDR_SURF_16_BANK));
1163 case 25: 1172 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1166 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1175 NUM_BANKS(ADDR_SURF_16_BANK));
1167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1176 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168 break; 1177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169 case 26: 1178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1170 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 1179 NUM_BANKS(ADDR_SURF_16_BANK));
1171 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1180 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1172 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1174 break; 1183 NUM_BANKS(ADDR_SURF_16_BANK));
1175 case 27: 1184 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1176 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1177 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1187 NUM_BANKS(ADDR_SURF_16_BANK));
1179 break; 1188 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1180 case 28: 1189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1181 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1182 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1191 NUM_BANKS(ADDR_SURF_8_BANK));
1183 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1192 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1185 break; 1194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1186 case 29: 1195 NUM_BANKS(ADDR_SURF_4_BANK));
1187 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1196
1188 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1197 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1189 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1198 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1199 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1191 break; 1200 if (reg_offset != 7)
1192 case 30: 1201 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1193 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1194 break;
1195 default:
1196 gb_tile_moden = 0;
1197 break;
1198 }
1199 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1200 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1201 }
1202 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1203 switch (reg_offset) {
1204 case 0:
1205 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1208 NUM_BANKS(ADDR_SURF_16_BANK));
1209 break;
1210 case 1:
1211 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1214 NUM_BANKS(ADDR_SURF_16_BANK));
1215 break;
1216 case 2:
1217 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1220 NUM_BANKS(ADDR_SURF_16_BANK));
1221 break;
1222 case 3:
1223 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1226 NUM_BANKS(ADDR_SURF_16_BANK));
1227 break;
1228 case 4:
1229 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1232 NUM_BANKS(ADDR_SURF_16_BANK));
1233 break;
1234 case 5:
1235 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1238 NUM_BANKS(ADDR_SURF_8_BANK));
1239 break;
1240 case 6:
1241 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1244 NUM_BANKS(ADDR_SURF_4_BANK));
1245 break;
1246 case 8:
1247 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1250 NUM_BANKS(ADDR_SURF_16_BANK));
1251 break;
1252 case 9:
1253 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1256 NUM_BANKS(ADDR_SURF_16_BANK));
1257 break;
1258 case 10:
1259 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1262 NUM_BANKS(ADDR_SURF_16_BANK));
1263 break;
1264 case 11:
1265 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1268 NUM_BANKS(ADDR_SURF_16_BANK));
1269 break;
1270 case 12:
1271 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1274 NUM_BANKS(ADDR_SURF_16_BANK));
1275 break;
1276 case 13:
1277 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1280 NUM_BANKS(ADDR_SURF_8_BANK));
1281 break;
1282 case 14:
1283 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1286 NUM_BANKS(ADDR_SURF_4_BANK));
1287 break;
1288 default:
1289 gb_tile_moden = 0;
1290 break;
1291 }
1292 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1293 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1294 }
1295 break; 1202 break;
1296 case CHIP_HAWAII: 1203 case CHIP_HAWAII:
1297 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1204 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1298 switch (reg_offset) { 1205 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1299 case 0: 1206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1300 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1301 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1208 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 1209 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1304 break; 1211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1305 case 1: 1212 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1306 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1213 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 1215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1216 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1310 break; 1217 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1311 case 2: 1218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1313 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1220 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1221 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1315 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1316 break; 1223 TILE_SPLIT(split_equal_to_row_size));
1317 case 3: 1224 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1318 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1225 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1227 TILE_SPLIT(split_equal_to_row_size));
1321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1228 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1322 break; 1229 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1323 case 4: 1230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1324 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1231 TILE_SPLIT(split_equal_to_row_size));
1325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1232 tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1233 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1327 TILE_SPLIT(split_equal_to_row_size)); 1234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1328 break; 1235 TILE_SPLIT(split_equal_to_row_size));
1329 case 5: 1236 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1330 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1237 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1238 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1333 TILE_SPLIT(split_equal_to_row_size)); 1240 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1334 break; 1241 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1335 case 6: 1242 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1336 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1243 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1245 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1339 TILE_SPLIT(split_equal_to_row_size)); 1246 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1340 break; 1247 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1341 case 7: 1248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1342 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1249 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1343 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1250 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1251 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 TILE_SPLIT(split_equal_to_row_size)); 1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1346 break; 1253 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1347 1254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1348 case 8: 1255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1349 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1256 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 1257 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1351 break; 1258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1352 case 9: 1259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1353 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1260 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1354 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1261 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1355 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356 break; 1263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 case 10: 1264 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1358 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1265 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1268 tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1362 break; 1269 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1363 case 11: 1270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1272 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1273 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1368 break; 1275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1369 case 12: 1276 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | 1277 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1371 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1279 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1280 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1374 break; 1281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1375 case 13: 1282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1283 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1284 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1378 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1379 break; 1286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1380 case 14: 1287 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1381 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1291 tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1385 break; 1292 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1386 case 15: 1293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1387 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 1294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1295 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1296 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1391 break; 1298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1392 case 16: 1299 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1393 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1300 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1395 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1303 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1397 break; 1304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1398 case 17: 1305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1399 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1400 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1307 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1401 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1403 break; 1310 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1404 case 18: 1311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1405 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1314 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1409 break; 1316 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1410 case 19: 1317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1411 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1318 tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1319 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1413 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); 1320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1414 break; 1321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1415 case 20: 1322
1416 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1323 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1418 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1326 NUM_BANKS(ADDR_SURF_16_BANK));
1420 break; 1327 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421 case 21: 1328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1422 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 1329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1330 NUM_BANKS(ADDR_SURF_16_BANK));
1424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1331 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1426 break; 1333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1427 case 22: 1334 NUM_BANKS(ADDR_SURF_16_BANK));
1428 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1335 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1338 NUM_BANKS(ADDR_SURF_16_BANK));
1432 break; 1339 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1433 case 23: 1340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1434 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1435 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1342 NUM_BANKS(ADDR_SURF_8_BANK));
1436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1343 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1438 break; 1345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1439 case 24: 1346 NUM_BANKS(ADDR_SURF_4_BANK));
1440 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1347 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1350 NUM_BANKS(ADDR_SURF_4_BANK));
1444 break; 1351 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 case 25: 1352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1446 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1354 NUM_BANKS(ADDR_SURF_16_BANK));
1448 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1355 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1450 break; 1357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1451 case 26: 1358 NUM_BANKS(ADDR_SURF_16_BANK));
1452 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 1359 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1362 NUM_BANKS(ADDR_SURF_16_BANK));
1456 break; 1363 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1457 case 27: 1364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1458 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1366 NUM_BANKS(ADDR_SURF_8_BANK));
1460 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1367 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1461 break; 1368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1462 case 28: 1369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1463 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1370 NUM_BANKS(ADDR_SURF_16_BANK));
1464 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1371 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467 break; 1374 NUM_BANKS(ADDR_SURF_8_BANK));
1468 case 29: 1375 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1470 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 1377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1471 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1378 NUM_BANKS(ADDR_SURF_4_BANK));
1472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1379
1473 break; 1380 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1474 case 30: 1381 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1475 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1382 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1476 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 1383 if (reg_offset != 7)
1477 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1384 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1479 break;
1480 default:
1481 gb_tile_moden = 0;
1482 break;
1483 }
1484 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1485 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1486 }
1487 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1488 switch (reg_offset) {
1489 case 0:
1490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1493 NUM_BANKS(ADDR_SURF_16_BANK));
1494 break;
1495 case 1:
1496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1499 NUM_BANKS(ADDR_SURF_16_BANK));
1500 break;
1501 case 2:
1502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1505 NUM_BANKS(ADDR_SURF_16_BANK));
1506 break;
1507 case 3:
1508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1511 NUM_BANKS(ADDR_SURF_16_BANK));
1512 break;
1513 case 4:
1514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1517 NUM_BANKS(ADDR_SURF_8_BANK));
1518 break;
1519 case 5:
1520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1523 NUM_BANKS(ADDR_SURF_4_BANK));
1524 break;
1525 case 6:
1526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1529 NUM_BANKS(ADDR_SURF_4_BANK));
1530 break;
1531 case 8:
1532 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1535 NUM_BANKS(ADDR_SURF_16_BANK));
1536 break;
1537 case 9:
1538 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1541 NUM_BANKS(ADDR_SURF_16_BANK));
1542 break;
1543 case 10:
1544 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1547 NUM_BANKS(ADDR_SURF_16_BANK));
1548 break;
1549 case 11:
1550 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1553 NUM_BANKS(ADDR_SURF_8_BANK));
1554 break;
1555 case 12:
1556 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1559 NUM_BANKS(ADDR_SURF_16_BANK));
1560 break;
1561 case 13:
1562 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1565 NUM_BANKS(ADDR_SURF_8_BANK));
1566 break;
1567 case 14:
1568 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1571 NUM_BANKS(ADDR_SURF_4_BANK));
1572 break;
1573 default:
1574 gb_tile_moden = 0;
1575 break;
1576 }
1577 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1578 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1579 }
1580 break; 1385 break;
1581 case CHIP_KABINI: 1386 case CHIP_KABINI:
1582 case CHIP_KAVERI: 1387 case CHIP_KAVERI:
1583 case CHIP_MULLINS: 1388 case CHIP_MULLINS:
1584 default: 1389 default:
1585 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1390 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1586 switch (reg_offset) { 1391 PIPE_CONFIG(ADDR_SURF_P2) |
1587 case 0: 1392 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1588 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1393 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1589 PIPE_CONFIG(ADDR_SURF_P2) | 1394 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1590 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 1395 PIPE_CONFIG(ADDR_SURF_P2) |
1591 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1592 break; 1397 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1593 case 1: 1398 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1594 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1399 PIPE_CONFIG(ADDR_SURF_P2) |
1595 PIPE_CONFIG(ADDR_SURF_P2) | 1400 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 1401 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1402 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1598 break; 1403 PIPE_CONFIG(ADDR_SURF_P2) |
1599 case 2: 1404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1600 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1601 PIPE_CONFIG(ADDR_SURF_P2) | 1406 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1407 PIPE_CONFIG(ADDR_SURF_P2) |
1603 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1604 break; 1409 TILE_SPLIT(split_equal_to_row_size));
1605 case 3: 1410 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1606 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1411 PIPE_CONFIG(ADDR_SURF_P2) |
1607 PIPE_CONFIG(ADDR_SURF_P2) | 1412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1413 tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1414 PIPE_CONFIG(ADDR_SURF_P2) |
1610 break; 1415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1611 case 4: 1416 TILE_SPLIT(split_equal_to_row_size));
1612 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1417 tile[7] = (TILE_SPLIT(split_equal_to_row_size));
1613 PIPE_CONFIG(ADDR_SURF_P2) | 1418 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1419 PIPE_CONFIG(ADDR_SURF_P2));
1615 TILE_SPLIT(split_equal_to_row_size)); 1420 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1616 break; 1421 PIPE_CONFIG(ADDR_SURF_P2) |
1617 case 5: 1422 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1618 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1423 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1619 PIPE_CONFIG(ADDR_SURF_P2) | 1424 PIPE_CONFIG(ADDR_SURF_P2) |
1620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 1425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1621 break; 1426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1622 case 6: 1427 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1623 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1428 PIPE_CONFIG(ADDR_SURF_P2) |
1624 PIPE_CONFIG(ADDR_SURF_P2) | 1429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1625 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | 1430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1626 TILE_SPLIT(split_equal_to_row_size)); 1431 tile[12] = (TILE_SPLIT(split_equal_to_row_size));
1627 break; 1432 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1628 case 7: 1433 PIPE_CONFIG(ADDR_SURF_P2) |
1629 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1434 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1630 break; 1435 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1631 1436 PIPE_CONFIG(ADDR_SURF_P2) |
1632 case 8: 1437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1633 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 1438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1634 PIPE_CONFIG(ADDR_SURF_P2)); 1439 tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1635 break; 1440 PIPE_CONFIG(ADDR_SURF_P2) |
1636 case 9: 1441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1637 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1638 PIPE_CONFIG(ADDR_SURF_P2) | 1443 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1639 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); 1444 PIPE_CONFIG(ADDR_SURF_P2) |
1640 break; 1445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1641 case 10: 1446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1642 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1447 tile[17] = (TILE_SPLIT(split_equal_to_row_size));
1643 PIPE_CONFIG(ADDR_SURF_P2) | 1448 tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1644 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1449 PIPE_CONFIG(ADDR_SURF_P2) |
1645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1646 break; 1451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1647 case 11: 1452 tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1648 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1453 PIPE_CONFIG(ADDR_SURF_P2) |
1649 PIPE_CONFIG(ADDR_SURF_P2) | 1454 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
1650 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 1455 tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1456 PIPE_CONFIG(ADDR_SURF_P2) |
1652 break; 1457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1653 case 12: 1458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1654 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1459 tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1655 break; 1460 PIPE_CONFIG(ADDR_SURF_P2) |
1656 case 13: 1461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1657 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1658 PIPE_CONFIG(ADDR_SURF_P2) | 1463 tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1659 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); 1464 PIPE_CONFIG(ADDR_SURF_P2) |
1660 break; 1465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1661 case 14: 1466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1662 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1467 tile[23] = (TILE_SPLIT(split_equal_to_row_size));
1663 PIPE_CONFIG(ADDR_SURF_P2) | 1468 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1664 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1469 PIPE_CONFIG(ADDR_SURF_P2) |
1665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1666 break; 1471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1667 case 15: 1472 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1668 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 1473 PIPE_CONFIG(ADDR_SURF_P2) |
1669 PIPE_CONFIG(ADDR_SURF_P2) | 1474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1670 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1476 tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1672 break; 1477 PIPE_CONFIG(ADDR_SURF_P2) |
1673 case 16: 1478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1674 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1675 PIPE_CONFIG(ADDR_SURF_P2) | 1480 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1481 PIPE_CONFIG(ADDR_SURF_P2) |
1677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1482 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1678 break; 1483 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679 case 17: 1484 PIPE_CONFIG(ADDR_SURF_P2) |
1680 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1681 break; 1486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1682 case 18: 1487 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1683 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1488 PIPE_CONFIG(ADDR_SURF_P2) |
1684 PIPE_CONFIG(ADDR_SURF_P2) | 1489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1685 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1491 tile[30] = (TILE_SPLIT(split_equal_to_row_size));
1687 break; 1492
1688 case 19: 1493 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1689 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1690 PIPE_CONFIG(ADDR_SURF_P2) | 1495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1691 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); 1496 NUM_BANKS(ADDR_SURF_8_BANK));
1692 break; 1497 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 case 20: 1498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1694 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 PIPE_CONFIG(ADDR_SURF_P2) | 1500 NUM_BANKS(ADDR_SURF_8_BANK));
1696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1501 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1698 break; 1503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1699 case 21: 1504 NUM_BANKS(ADDR_SURF_8_BANK));
1700 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 1505 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1701 PIPE_CONFIG(ADDR_SURF_P2) | 1506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1508 NUM_BANKS(ADDR_SURF_8_BANK));
1704 break; 1509 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1705 case 22: 1510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1706 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 1511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1707 PIPE_CONFIG(ADDR_SURF_P2) | 1512 NUM_BANKS(ADDR_SURF_8_BANK));
1708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1513 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1710 break; 1515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1711 case 23: 1516 NUM_BANKS(ADDR_SURF_8_BANK));
1712 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); 1517 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1713 break; 1518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1714 case 24: 1519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1715 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1520 NUM_BANKS(ADDR_SURF_8_BANK));
1716 PIPE_CONFIG(ADDR_SURF_P2) | 1521 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1717 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 1522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1719 break; 1524 NUM_BANKS(ADDR_SURF_16_BANK));
1720 case 25: 1525 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1721 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1722 PIPE_CONFIG(ADDR_SURF_P2) | 1527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1528 NUM_BANKS(ADDR_SURF_16_BANK));
1724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1529 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1725 break; 1530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1726 case 26: 1531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1727 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 1532 NUM_BANKS(ADDR_SURF_16_BANK));
1728 PIPE_CONFIG(ADDR_SURF_P2) | 1533 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1729 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 1534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 1535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1731 break; 1536 NUM_BANKS(ADDR_SURF_16_BANK));
1732 case 27: 1537 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1733 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 1538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1734 PIPE_CONFIG(ADDR_SURF_P2) | 1539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); 1540 NUM_BANKS(ADDR_SURF_16_BANK));
1736 break; 1541 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1737 case 28: 1542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1738 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1739 PIPE_CONFIG(ADDR_SURF_P2) | 1544 NUM_BANKS(ADDR_SURF_16_BANK));
1740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1545 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 1546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1742 break; 1547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1743 case 29: 1548 NUM_BANKS(ADDR_SURF_8_BANK));
1744 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 1549
1745 PIPE_CONFIG(ADDR_SURF_P2) | 1550 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 1551 WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
1747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 1552 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1748 break; 1553 if (reg_offset != 7)
1749 case 30: 1554 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
1750 gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
1751 break;
1752 default:
1753 gb_tile_moden = 0;
1754 break;
1755 }
1756 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1757 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1758 }
1759 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1760 switch (reg_offset) {
1761 case 0:
1762 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1765 NUM_BANKS(ADDR_SURF_8_BANK));
1766 break;
1767 case 1:
1768 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1771 NUM_BANKS(ADDR_SURF_8_BANK));
1772 break;
1773 case 2:
1774 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1777 NUM_BANKS(ADDR_SURF_8_BANK));
1778 break;
1779 case 3:
1780 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1783 NUM_BANKS(ADDR_SURF_8_BANK));
1784 break;
1785 case 4:
1786 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 NUM_BANKS(ADDR_SURF_8_BANK));
1790 break;
1791 case 5:
1792 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1793 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1794 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1795 NUM_BANKS(ADDR_SURF_8_BANK));
1796 break;
1797 case 6:
1798 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 NUM_BANKS(ADDR_SURF_8_BANK));
1802 break;
1803 case 8:
1804 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1807 NUM_BANKS(ADDR_SURF_16_BANK));
1808 break;
1809 case 9:
1810 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 NUM_BANKS(ADDR_SURF_16_BANK));
1814 break;
1815 case 10:
1816 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819 NUM_BANKS(ADDR_SURF_16_BANK));
1820 break;
1821 case 11:
1822 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 NUM_BANKS(ADDR_SURF_16_BANK));
1826 break;
1827 case 12:
1828 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1830 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1831 NUM_BANKS(ADDR_SURF_16_BANK));
1832 break;
1833 case 13:
1834 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1837 NUM_BANKS(ADDR_SURF_16_BANK));
1838 break;
1839 case 14:
1840 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1841 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1842 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1843 NUM_BANKS(ADDR_SURF_8_BANK));
1844 break;
1845 default:
1846 gb_tile_moden = 0;
1847 break;
1848 }
1849 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1850 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1851 }
1852 break; 1555 break;
1853 } 1556 }
1854} 1557}
@@ -1893,45 +1596,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
1893 */ 1596 */
1894static u32 gfx_v7_0_create_bitmask(u32 bit_width) 1597static u32 gfx_v7_0_create_bitmask(u32 bit_width)
1895{ 1598{
1896 u32 i, mask = 0; 1599 return (u32)((1ULL << bit_width) - 1);
1897
1898 for (i = 0; i < bit_width; i++) {
1899 mask <<= 1;
1900 mask |= 1;
1901 }
1902 return mask;
1903} 1600}
1904 1601
1905/** 1602/**
1906 * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs 1603 * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
1907 * 1604 *
1908 * @adev: amdgpu_device pointer 1605 * @adev: amdgpu_device pointer
1909 * @max_rb_num: max RBs (render backends) for the asic
1910 * @se_num: number of SEs (shader engines) for the asic
1911 * @sh_per_se: number of SH blocks per SE for the asic
1912 * 1606 *
1913 * Calculates the bitmask of disabled RBs (CIK). 1607 * Calculates the bitmask of enabled RBs (CIK).
1914 * Returns the disabled RB bitmask. 1608 * Returns the enabled RB bitmask.
1915 */ 1609 */
1916static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, 1610static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1917 u32 max_rb_num_per_se,
1918 u32 sh_per_se)
1919{ 1611{
1920 u32 data, mask; 1612 u32 data, mask;
1921 1613
1922 data = RREG32(mmCC_RB_BACKEND_DISABLE); 1614 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1923 if (data & 1)
1924 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1925 else
1926 data = 0;
1927
1928 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); 1615 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1929 1616
1617 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1930 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1618 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1931 1619
1932 mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se); 1620 mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
1621 adev->gfx.config.max_sh_per_se);
1933 1622
1934 return data & mask; 1623 return (~data) & mask;
1935} 1624}
1936 1625
1937/** 1626/**
@@ -1940,73 +1629,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
1940 * @adev: amdgpu_device pointer 1629 * @adev: amdgpu_device pointer
1941 * @se_num: number of SEs (shader engines) for the asic 1630 * @se_num: number of SEs (shader engines) for the asic
1942 * @sh_per_se: number of SH blocks per SE for the asic 1631 * @sh_per_se: number of SH blocks per SE for the asic
1943 * @max_rb_num: max RBs (render backends) for the asic
1944 * 1632 *
1945 * Configures per-SE/SH RB registers (CIK). 1633 * Configures per-SE/SH RB registers (CIK).
1946 */ 1634 */
1947static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, 1635static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1948 u32 se_num, u32 sh_per_se,
1949 u32 max_rb_num_per_se)
1950{ 1636{
1951 int i, j; 1637 int i, j;
1952 u32 data, mask; 1638 u32 data, tmp, num_rbs = 0;
1953 u32 disabled_rbs = 0; 1639 u32 active_rbs = 0;
1954 u32 enabled_rbs = 0;
1955 1640
1956 mutex_lock(&adev->grbm_idx_mutex); 1641 mutex_lock(&adev->grbm_idx_mutex);
1957 for (i = 0; i < se_num; i++) { 1642 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1958 for (j = 0; j < sh_per_se; j++) { 1643 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1959 gfx_v7_0_select_se_sh(adev, i, j); 1644 gfx_v7_0_select_se_sh(adev, i, j);
1960 data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); 1645 data = gfx_v7_0_get_rb_active_bitmap(adev);
1961 if (adev->asic_type == CHIP_HAWAII) 1646 if (adev->asic_type == CHIP_HAWAII)
1962 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); 1647 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1648 HAWAII_RB_BITMAP_WIDTH_PER_SH);
1963 else 1649 else
1964 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); 1650 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1651 CIK_RB_BITMAP_WIDTH_PER_SH);
1965 } 1652 }
1966 } 1653 }
1967 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 1654 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
1968 mutex_unlock(&adev->grbm_idx_mutex); 1655 mutex_unlock(&adev->grbm_idx_mutex);
1969 1656
1970 mask = 1; 1657 adev->gfx.config.backend_enable_mask = active_rbs;
1971 for (i = 0; i < max_rb_num_per_se * se_num; i++) { 1658 tmp = active_rbs;
1972 if (!(disabled_rbs & mask)) 1659 while (tmp >>= 1)
1973 enabled_rbs |= mask; 1660 num_rbs++;
1974 mask <<= 1; 1661 adev->gfx.config.num_rbs = num_rbs;
1975 }
1976
1977 adev->gfx.config.backend_enable_mask = enabled_rbs;
1978
1979 mutex_lock(&adev->grbm_idx_mutex);
1980 for (i = 0; i < se_num; i++) {
1981 gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
1982 data = 0;
1983 for (j = 0; j < sh_per_se; j++) {
1984 switch (enabled_rbs & 3) {
1985 case 0:
1986 if (j == 0)
1987 data |= (RASTER_CONFIG_RB_MAP_3 <<
1988 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1989 else
1990 data |= (RASTER_CONFIG_RB_MAP_0 <<
1991 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1992 break;
1993 case 1:
1994 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1995 break;
1996 case 2:
1997 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1998 break;
1999 case 3:
2000 default:
2001 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2002 break;
2003 }
2004 enabled_rbs >>= 2;
2005 }
2006 WREG32(mmPA_SC_RASTER_CONFIG, data);
2007 }
2008 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2009 mutex_unlock(&adev->grbm_idx_mutex);
2010} 1662}
2011 1663
2012/** 1664/**
@@ -2059,192 +1711,23 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
2059 */ 1711 */
2060static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) 1712static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
2061{ 1713{
2062 u32 gb_addr_config; 1714 u32 tmp, sh_mem_cfg;
2063 u32 mc_shared_chmap, mc_arb_ramcfg;
2064 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
2065 u32 sh_mem_cfg;
2066 u32 tmp;
2067 int i; 1715 int i;
2068 1716
2069 switch (adev->asic_type) {
2070 case CHIP_BONAIRE:
2071 adev->gfx.config.max_shader_engines = 2;
2072 adev->gfx.config.max_tile_pipes = 4;
2073 adev->gfx.config.max_cu_per_sh = 7;
2074 adev->gfx.config.max_sh_per_se = 1;
2075 adev->gfx.config.max_backends_per_se = 2;
2076 adev->gfx.config.max_texture_channel_caches = 4;
2077 adev->gfx.config.max_gprs = 256;
2078 adev->gfx.config.max_gs_threads = 32;
2079 adev->gfx.config.max_hw_contexts = 8;
2080
2081 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2082 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2083 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2084 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2085 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2086 break;
2087 case CHIP_HAWAII:
2088 adev->gfx.config.max_shader_engines = 4;
2089 adev->gfx.config.max_tile_pipes = 16;
2090 adev->gfx.config.max_cu_per_sh = 11;
2091 adev->gfx.config.max_sh_per_se = 1;
2092 adev->gfx.config.max_backends_per_se = 4;
2093 adev->gfx.config.max_texture_channel_caches = 16;
2094 adev->gfx.config.max_gprs = 256;
2095 adev->gfx.config.max_gs_threads = 32;
2096 adev->gfx.config.max_hw_contexts = 8;
2097
2098 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2099 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2100 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2101 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2102 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
2103 break;
2104 case CHIP_KAVERI:
2105 adev->gfx.config.max_shader_engines = 1;
2106 adev->gfx.config.max_tile_pipes = 4;
2107 if ((adev->pdev->device == 0x1304) ||
2108 (adev->pdev->device == 0x1305) ||
2109 (adev->pdev->device == 0x130C) ||
2110 (adev->pdev->device == 0x130F) ||
2111 (adev->pdev->device == 0x1310) ||
2112 (adev->pdev->device == 0x1311) ||
2113 (adev->pdev->device == 0x131C)) {
2114 adev->gfx.config.max_cu_per_sh = 8;
2115 adev->gfx.config.max_backends_per_se = 2;
2116 } else if ((adev->pdev->device == 0x1309) ||
2117 (adev->pdev->device == 0x130A) ||
2118 (adev->pdev->device == 0x130D) ||
2119 (adev->pdev->device == 0x1313) ||
2120 (adev->pdev->device == 0x131D)) {
2121 adev->gfx.config.max_cu_per_sh = 6;
2122 adev->gfx.config.max_backends_per_se = 2;
2123 } else if ((adev->pdev->device == 0x1306) ||
2124 (adev->pdev->device == 0x1307) ||
2125 (adev->pdev->device == 0x130B) ||
2126 (adev->pdev->device == 0x130E) ||
2127 (adev->pdev->device == 0x1315) ||
2128 (adev->pdev->device == 0x131B)) {
2129 adev->gfx.config.max_cu_per_sh = 4;
2130 adev->gfx.config.max_backends_per_se = 1;
2131 } else {
2132 adev->gfx.config.max_cu_per_sh = 3;
2133 adev->gfx.config.max_backends_per_se = 1;
2134 }
2135 adev->gfx.config.max_sh_per_se = 1;
2136 adev->gfx.config.max_texture_channel_caches = 4;
2137 adev->gfx.config.max_gprs = 256;
2138 adev->gfx.config.max_gs_threads = 16;
2139 adev->gfx.config.max_hw_contexts = 8;
2140
2141 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2144 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2145 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2146 break;
2147 case CHIP_KABINI:
2148 case CHIP_MULLINS:
2149 default:
2150 adev->gfx.config.max_shader_engines = 1;
2151 adev->gfx.config.max_tile_pipes = 2;
2152 adev->gfx.config.max_cu_per_sh = 2;
2153 adev->gfx.config.max_sh_per_se = 1;
2154 adev->gfx.config.max_backends_per_se = 1;
2155 adev->gfx.config.max_texture_channel_caches = 2;
2156 adev->gfx.config.max_gprs = 256;
2157 adev->gfx.config.max_gs_threads = 16;
2158 adev->gfx.config.max_hw_contexts = 8;
2159
2160 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2161 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2162 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2163 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2164 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2165 break;
2166 }
2167
2168 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); 1717 WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
2169 1718
2170 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1719 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2171 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1720 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2172 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1721 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2173
2174 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2175 adev->gfx.config.mem_max_burst_length_bytes = 256;
2176 if (adev->flags & AMD_IS_APU) {
2177 /* Get memory bank mapping mode. */
2178 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2179 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2180 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2181
2182 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2183 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2184 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2185
2186 /* Validate settings in case only one DIMM installed. */
2187 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2188 dimm00_addr_map = 0;
2189 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2190 dimm01_addr_map = 0;
2191 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2192 dimm10_addr_map = 0;
2193 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2194 dimm11_addr_map = 0;
2195
2196 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2197 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2198 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2199 adev->gfx.config.mem_row_size_in_kb = 2;
2200 else
2201 adev->gfx.config.mem_row_size_in_kb = 1;
2202 } else {
2203 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
2204 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2205 if (adev->gfx.config.mem_row_size_in_kb > 4)
2206 adev->gfx.config.mem_row_size_in_kb = 4;
2207 }
2208 /* XXX use MC settings? */
2209 adev->gfx.config.shader_engine_tile_size = 32;
2210 adev->gfx.config.num_gpus = 1;
2211 adev->gfx.config.multi_gpu_tile_size = 64;
2212
2213 /* fix up row size */
2214 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
2215 switch (adev->gfx.config.mem_row_size_in_kb) {
2216 case 1:
2217 default:
2218 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2219 break;
2220 case 2:
2221 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2222 break;
2223 case 4:
2224 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
2225 break;
2226 }
2227 adev->gfx.config.gb_addr_config = gb_addr_config;
2228
2229 WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
2230 WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
2231 WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
2232 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2233 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2234 WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
2235 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2236 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2237 1722
2238 gfx_v7_0_tiling_mode_table_init(adev); 1723 gfx_v7_0_tiling_mode_table_init(adev);
2239 1724
2240 gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines, 1725 gfx_v7_0_setup_rb(adev);
2241 adev->gfx.config.max_sh_per_se,
2242 adev->gfx.config.max_backends_per_se);
2243 1726
2244 /* set HW defaults for 3D engine */ 1727 /* set HW defaults for 3D engine */
2245 WREG32(mmCP_MEQ_THRESHOLDS, 1728 WREG32(mmCP_MEQ_THRESHOLDS,
2246 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | 1729 (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
2247 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); 1730 (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
2248 1731
2249 mutex_lock(&adev->grbm_idx_mutex); 1732 mutex_lock(&adev->grbm_idx_mutex);
2250 /* 1733 /*
@@ -2255,7 +1738,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
2255 1738
2256 /* XXX SH_MEM regs */ 1739 /* XXX SH_MEM regs */
2257 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1740 /* where to put LDS, scratch, GPUVM in FSA64 space */
2258 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1741 sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2259 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1742 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2260 1743
2261 mutex_lock(&adev->srbm_mutex); 1744 mutex_lock(&adev->srbm_mutex);
@@ -2379,7 +1862,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2379 return r; 1862 return r;
2380 } 1863 }
2381 WREG32(scratch, 0xCAFEDEAD); 1864 WREG32(scratch, 0xCAFEDEAD);
2382 r = amdgpu_ring_lock(ring, 3); 1865 r = amdgpu_ring_alloc(ring, 3);
2383 if (r) { 1866 if (r) {
2384 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); 1867 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
2385 amdgpu_gfx_scratch_free(adev, scratch); 1868 amdgpu_gfx_scratch_free(adev, scratch);
@@ -2388,7 +1871,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2388 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1871 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2389 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1872 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2390 amdgpu_ring_write(ring, 0xDEADBEEF); 1873 amdgpu_ring_write(ring, 0xDEADBEEF);
2391 amdgpu_ring_unlock_commit(ring); 1874 amdgpu_ring_commit(ring);
2392 1875
2393 for (i = 0; i < adev->usec_timeout; i++) { 1876 for (i = 0; i < adev->usec_timeout; i++) {
2394 tmp = RREG32(scratch); 1877 tmp = RREG32(scratch);
@@ -2516,36 +1999,6 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2516 amdgpu_ring_write(ring, upper_32_bits(seq)); 1999 amdgpu_ring_write(ring, upper_32_bits(seq));
2517} 2000}
2518 2001
2519/**
2520 * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring
2521 *
2522 * @ring: amdgpu ring buffer object
2523 * @semaphore: amdgpu semaphore object
2524 * @emit_wait: Is this a sempahore wait?
2525 *
2526 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
2527 * from running ahead of semaphore waits.
2528 */
2529static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
2530 struct amdgpu_semaphore *semaphore,
2531 bool emit_wait)
2532{
2533 uint64_t addr = semaphore->gpu_addr;
2534 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2535
2536 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2537 amdgpu_ring_write(ring, addr & 0xffffffff);
2538 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2539
2540 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
2541 /* Prevent the PFP from running ahead of the semaphore wait */
2542 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2543 amdgpu_ring_write(ring, 0x0);
2544 }
2545
2546 return true;
2547}
2548
2549/* 2002/*
2550 * IB stuff 2003 * IB stuff
2551 */ 2004 */
@@ -2661,7 +2114,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2661 } 2114 }
2662 WREG32(scratch, 0xCAFEDEAD); 2115 WREG32(scratch, 0xCAFEDEAD);
2663 memset(&ib, 0, sizeof(ib)); 2116 memset(&ib, 0, sizeof(ib));
2664 r = amdgpu_ib_get(ring, NULL, 256, &ib); 2117 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2665 if (r) { 2118 if (r) {
2666 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 2119 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
2667 goto err1; 2120 goto err1;
@@ -2671,9 +2124,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2671 ib.ptr[2] = 0xDEADBEEF; 2124 ib.ptr[2] = 0xDEADBEEF;
2672 ib.length_dw = 3; 2125 ib.length_dw = 3;
2673 2126
2674 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 2127 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
2675 AMDGPU_FENCE_OWNER_UNDEFINED, 2128 NULL, &f);
2676 &f);
2677 if (r) 2129 if (r)
2678 goto err2; 2130 goto err2;
2679 2131
@@ -2842,7 +2294,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2842 2294
2843 gfx_v7_0_cp_gfx_enable(adev, true); 2295 gfx_v7_0_cp_gfx_enable(adev, true);
2844 2296
2845 r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8); 2297 r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
2846 if (r) { 2298 if (r) {
2847 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2299 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2848 return r; 2300 return r;
@@ -2911,7 +2363,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
2911 amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ 2363 amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2912 amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ 2364 amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2913 2365
2914 amdgpu_ring_unlock_commit(ring); 2366 amdgpu_ring_commit(ring);
2915 2367
2916 return 0; 2368 return 0;
2917} 2369}
@@ -2989,21 +2441,14 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2989 2441
2990static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 2442static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
2991{ 2443{
2992 u32 rptr; 2444 return ring->adev->wb.wb[ring->rptr_offs];
2993
2994 rptr = ring->adev->wb.wb[ring->rptr_offs];
2995
2996 return rptr;
2997} 2445}
2998 2446
2999static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 2447static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3000{ 2448{
3001 struct amdgpu_device *adev = ring->adev; 2449 struct amdgpu_device *adev = ring->adev;
3002 u32 wptr;
3003 2450
3004 wptr = RREG32(mmCP_RB0_WPTR); 2451 return RREG32(mmCP_RB0_WPTR);
3005
3006 return wptr;
3007} 2452}
3008 2453
3009static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 2454static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
@@ -3016,21 +2461,13 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3016 2461
3017static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 2462static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3018{ 2463{
3019 u32 rptr; 2464 return ring->adev->wb.wb[ring->rptr_offs];
3020
3021 rptr = ring->adev->wb.wb[ring->rptr_offs];
3022
3023 return rptr;
3024} 2465}
3025 2466
3026static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 2467static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3027{ 2468{
3028 u32 wptr;
3029
3030 /* XXX check if swapping is necessary on BE */ 2469 /* XXX check if swapping is necessary on BE */
3031 wptr = ring->adev->wb.wb[ring->wptr_offs]; 2470 return ring->adev->wb.wb[ring->wptr_offs];
3032
3033 return wptr;
3034} 2471}
3035 2472
3036static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 2473static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -3126,21 +2563,6 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3126} 2563}
3127 2564
3128/** 2565/**
3129 * gfx_v7_0_cp_compute_start - start the compute queues
3130 *
3131 * @adev: amdgpu_device pointer
3132 *
3133 * Enable the compute queues.
3134 * Returns 0 for success, error for failure.
3135 */
3136static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev)
3137{
3138 gfx_v7_0_cp_compute_enable(adev, true);
3139
3140 return 0;
3141}
3142
3143/**
3144 * gfx_v7_0_cp_compute_fini - stop the compute queues 2566 * gfx_v7_0_cp_compute_fini - stop the compute queues
3145 * 2567 *
3146 * @adev: amdgpu_device pointer 2568 * @adev: amdgpu_device pointer
@@ -3330,9 +2752,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3330 u32 *buf; 2752 u32 *buf;
3331 struct bonaire_mqd *mqd; 2753 struct bonaire_mqd *mqd;
3332 2754
3333 r = gfx_v7_0_cp_compute_start(adev); 2755 gfx_v7_0_cp_compute_enable(adev, true);
3334 if (r)
3335 return r;
3336 2756
3337 /* fix up chicken bits */ 2757 /* fix up chicken bits */
3338 tmp = RREG32(mmCP_CPF_DEBUG); 2758 tmp = RREG32(mmCP_CPF_DEBUG);
@@ -4395,28 +3815,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
4395 } 3815 }
4396} 3816}
4397 3817
4398static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev, 3818static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
4399 u32 se, u32 sh)
4400{ 3819{
4401 u32 mask = 0, tmp, tmp1; 3820 u32 data, mask;
4402 int i;
4403
4404 gfx_v7_0_select_se_sh(adev, se, sh);
4405 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4406 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4407 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4408 3821
4409 tmp &= 0xffff0000; 3822 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
3823 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4410 3824
4411 tmp |= tmp1; 3825 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4412 tmp >>= 16; 3826 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4413 3827
4414 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { 3828 mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
4415 mask <<= 1; 3829 adev->gfx.config.max_sh_per_se);
4416 mask |= 1;
4417 }
4418 3830
4419 return (~tmp) & mask; 3831 return (~data) & mask;
4420} 3832}
4421 3833
4422static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) 3834static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
@@ -4754,6 +4166,172 @@ static int gfx_v7_0_late_init(void *handle)
4754 return 0; 4166 return 0;
4755} 4167}
4756 4168
4169static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4170{
4171 u32 gb_addr_config;
4172 u32 mc_shared_chmap, mc_arb_ramcfg;
4173 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
4174 u32 tmp;
4175
4176 switch (adev->asic_type) {
4177 case CHIP_BONAIRE:
4178 adev->gfx.config.max_shader_engines = 2;
4179 adev->gfx.config.max_tile_pipes = 4;
4180 adev->gfx.config.max_cu_per_sh = 7;
4181 adev->gfx.config.max_sh_per_se = 1;
4182 adev->gfx.config.max_backends_per_se = 2;
4183 adev->gfx.config.max_texture_channel_caches = 4;
4184 adev->gfx.config.max_gprs = 256;
4185 adev->gfx.config.max_gs_threads = 32;
4186 adev->gfx.config.max_hw_contexts = 8;
4187
4188 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4189 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4190 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4191 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4192 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4193 break;
4194 case CHIP_HAWAII:
4195 adev->gfx.config.max_shader_engines = 4;
4196 adev->gfx.config.max_tile_pipes = 16;
4197 adev->gfx.config.max_cu_per_sh = 11;
4198 adev->gfx.config.max_sh_per_se = 1;
4199 adev->gfx.config.max_backends_per_se = 4;
4200 adev->gfx.config.max_texture_channel_caches = 16;
4201 adev->gfx.config.max_gprs = 256;
4202 adev->gfx.config.max_gs_threads = 32;
4203 adev->gfx.config.max_hw_contexts = 8;
4204
4205 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4206 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4207 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4208 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4209 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
4210 break;
4211 case CHIP_KAVERI:
4212 adev->gfx.config.max_shader_engines = 1;
4213 adev->gfx.config.max_tile_pipes = 4;
4214 if ((adev->pdev->device == 0x1304) ||
4215 (adev->pdev->device == 0x1305) ||
4216 (adev->pdev->device == 0x130C) ||
4217 (adev->pdev->device == 0x130F) ||
4218 (adev->pdev->device == 0x1310) ||
4219 (adev->pdev->device == 0x1311) ||
4220 (adev->pdev->device == 0x131C)) {
4221 adev->gfx.config.max_cu_per_sh = 8;
4222 adev->gfx.config.max_backends_per_se = 2;
4223 } else if ((adev->pdev->device == 0x1309) ||
4224 (adev->pdev->device == 0x130A) ||
4225 (adev->pdev->device == 0x130D) ||
4226 (adev->pdev->device == 0x1313) ||
4227 (adev->pdev->device == 0x131D)) {
4228 adev->gfx.config.max_cu_per_sh = 6;
4229 adev->gfx.config.max_backends_per_se = 2;
4230 } else if ((adev->pdev->device == 0x1306) ||
4231 (adev->pdev->device == 0x1307) ||
4232 (adev->pdev->device == 0x130B) ||
4233 (adev->pdev->device == 0x130E) ||
4234 (adev->pdev->device == 0x1315) ||
4235 (adev->pdev->device == 0x131B)) {
4236 adev->gfx.config.max_cu_per_sh = 4;
4237 adev->gfx.config.max_backends_per_se = 1;
4238 } else {
4239 adev->gfx.config.max_cu_per_sh = 3;
4240 adev->gfx.config.max_backends_per_se = 1;
4241 }
4242 adev->gfx.config.max_sh_per_se = 1;
4243 adev->gfx.config.max_texture_channel_caches = 4;
4244 adev->gfx.config.max_gprs = 256;
4245 adev->gfx.config.max_gs_threads = 16;
4246 adev->gfx.config.max_hw_contexts = 8;
4247
4248 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4249 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4250 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4251 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4252 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4253 break;
4254 case CHIP_KABINI:
4255 case CHIP_MULLINS:
4256 default:
4257 adev->gfx.config.max_shader_engines = 1;
4258 adev->gfx.config.max_tile_pipes = 2;
4259 adev->gfx.config.max_cu_per_sh = 2;
4260 adev->gfx.config.max_sh_per_se = 1;
4261 adev->gfx.config.max_backends_per_se = 1;
4262 adev->gfx.config.max_texture_channel_caches = 2;
4263 adev->gfx.config.max_gprs = 256;
4264 adev->gfx.config.max_gs_threads = 16;
4265 adev->gfx.config.max_hw_contexts = 8;
4266
4267 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
4268 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
4269 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
4270 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
4271 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
4272 break;
4273 }
4274
4275 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
4276 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
4277 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
4278
4279 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
4280 adev->gfx.config.mem_max_burst_length_bytes = 256;
4281 if (adev->flags & AMD_IS_APU) {
4282 /* Get memory bank mapping mode. */
4283 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
4284 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4285 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4286
4287 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
4288 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
4289 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
4290
4291 /* Validate settings in case only one DIMM installed. */
4292 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
4293 dimm00_addr_map = 0;
4294 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
4295 dimm01_addr_map = 0;
4296 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
4297 dimm10_addr_map = 0;
4298 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
4299 dimm11_addr_map = 0;
4300
4301 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
4302 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
4303 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
4304 adev->gfx.config.mem_row_size_in_kb = 2;
4305 else
4306 adev->gfx.config.mem_row_size_in_kb = 1;
4307 } else {
4308 tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
4309 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
4310 if (adev->gfx.config.mem_row_size_in_kb > 4)
4311 adev->gfx.config.mem_row_size_in_kb = 4;
4312 }
4313 /* XXX use MC settings? */
4314 adev->gfx.config.shader_engine_tile_size = 32;
4315 adev->gfx.config.num_gpus = 1;
4316 adev->gfx.config.multi_gpu_tile_size = 64;
4317
4318 /* fix up row size */
4319 gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
4320 switch (adev->gfx.config.mem_row_size_in_kb) {
4321 case 1:
4322 default:
4323 gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4324 break;
4325 case 2:
4326 gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4327 break;
4328 case 4:
4329 gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
4330 break;
4331 }
4332 adev->gfx.config.gb_addr_config = gb_addr_config;
4333}
4334
4757static int gfx_v7_0_sw_init(void *handle) 4335static int gfx_v7_0_sw_init(void *handle)
4758{ 4336{
4759 struct amdgpu_ring *ring; 4337 struct amdgpu_ring *ring;
@@ -4857,6 +4435,10 @@ static int gfx_v7_0_sw_init(void *handle)
4857 if (r) 4435 if (r)
4858 return r; 4436 return r;
4859 4437
4438 adev->gfx.ce_ram_size = 0x8000;
4439
4440 gfx_v7_0_gpu_early_init(adev);
4441
4860 return r; 4442 return r;
4861} 4443}
4862 4444
@@ -4897,8 +4479,6 @@ static int gfx_v7_0_hw_init(void *handle)
4897 if (r) 4479 if (r)
4898 return r; 4480 return r;
4899 4481
4900 adev->gfx.ce_ram_size = 0x8000;
4901
4902 return r; 4482 return r;
4903} 4483}
4904 4484
@@ -5015,16 +4595,6 @@ static void gfx_v7_0_print_status(void *handle)
5015 RREG32(mmHDP_ADDR_CONFIG)); 4595 RREG32(mmHDP_ADDR_CONFIG));
5016 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", 4596 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
5017 RREG32(mmDMIF_ADDR_CALC)); 4597 RREG32(mmDMIF_ADDR_CALC));
5018 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
5019 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
5020 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
5021 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
5022 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
5023 RREG32(mmUVD_UDEC_ADDR_CONFIG));
5024 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
5025 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
5026 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
5027 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
5028 4598
5029 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", 4599 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
5030 RREG32(mmCP_MEQ_THRESHOLDS)); 4600 RREG32(mmCP_MEQ_THRESHOLDS));
@@ -5567,13 +5137,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5567 .parse_cs = NULL, 5137 .parse_cs = NULL,
5568 .emit_ib = gfx_v7_0_ring_emit_ib_gfx, 5138 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
5569 .emit_fence = gfx_v7_0_ring_emit_fence_gfx, 5139 .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
5570 .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5571 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, 5140 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5572 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, 5141 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5573 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5142 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5574 .test_ring = gfx_v7_0_ring_test_ring, 5143 .test_ring = gfx_v7_0_ring_test_ring,
5575 .test_ib = gfx_v7_0_ring_test_ib, 5144 .test_ib = gfx_v7_0_ring_test_ib,
5576 .insert_nop = amdgpu_ring_insert_nop, 5145 .insert_nop = amdgpu_ring_insert_nop,
5146 .pad_ib = amdgpu_ring_generic_pad_ib,
5577}; 5147};
5578 5148
5579static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 5149static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5583,13 +5153,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5583 .parse_cs = NULL, 5153 .parse_cs = NULL,
5584 .emit_ib = gfx_v7_0_ring_emit_ib_compute, 5154 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
5585 .emit_fence = gfx_v7_0_ring_emit_fence_compute, 5155 .emit_fence = gfx_v7_0_ring_emit_fence_compute,
5586 .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
5587 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, 5156 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5588 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, 5157 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5589 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5158 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5590 .test_ring = gfx_v7_0_ring_test_ring, 5159 .test_ring = gfx_v7_0_ring_test_ring,
5591 .test_ib = gfx_v7_0_ring_test_ib, 5160 .test_ib = gfx_v7_0_ring_test_ib,
5592 .insert_nop = amdgpu_ring_insert_nop, 5161 .insert_nop = amdgpu_ring_insert_nop,
5162 .pad_ib = amdgpu_ring_generic_pad_ib,
5593}; 5163};
5594 5164
5595static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) 5165static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5659,7 +5229,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5659 5229
5660 5230
5661int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, 5231int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
5662 struct amdgpu_cu_info *cu_info) 5232 struct amdgpu_cu_info *cu_info)
5663{ 5233{
5664 int i, j, k, counter, active_cu_number = 0; 5234 int i, j, k, counter, active_cu_number = 0;
5665 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5235 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5673,10 +5243,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
5673 mask = 1; 5243 mask = 1;
5674 ao_bitmap = 0; 5244 ao_bitmap = 0;
5675 counter = 0; 5245 counter = 0;
5676 bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j); 5246 gfx_v7_0_select_se_sh(adev, i, j);
5247 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5677 cu_info->bitmap[i][j] = bitmap; 5248 cu_info->bitmap[i][j] = bitmap;
5678 5249
5679 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5250 for (k = 0; k < 16; k ++) {
5680 if (bitmap & mask) { 5251 if (bitmap & mask) {
5681 if (counter < 2) 5252 if (counter < 2)
5682 ao_bitmap |= mask; 5253 ao_bitmap |= mask;
@@ -5688,9 +5259,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
5688 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5259 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5689 } 5260 }
5690 } 5261 }
5262 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5263 mutex_unlock(&adev->grbm_idx_mutex);
5691 5264
5692 cu_info->number = active_cu_number; 5265 cu_info->number = active_cu_number;
5693 cu_info->ao_cu_mask = ao_cu_mask; 5266 cu_info->ao_cu_mask = ao_cu_mask;
5694 mutex_unlock(&adev->grbm_idx_mutex); 5267
5695 return 0; 5268 return 0;
5696} 5269}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8f8ec37ecd88..10c865087d0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -43,9 +43,6 @@
43#include "gca/gfx_8_0_sh_mask.h" 43#include "gca/gfx_8_0_sh_mask.h"
44#include "gca/gfx_8_0_enum.h" 44#include "gca/gfx_8_0_enum.h"
45 45
46#include "uvd/uvd_5_0_d.h"
47#include "uvd/uvd_5_0_sh_mask.h"
48
49#include "dce/dce_10_0_d.h" 46#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h" 47#include "dce/dce_10_0_sh_mask.h"
51 48
@@ -652,7 +649,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
652 return r; 649 return r;
653 } 650 }
654 WREG32(scratch, 0xCAFEDEAD); 651 WREG32(scratch, 0xCAFEDEAD);
655 r = amdgpu_ring_lock(ring, 3); 652 r = amdgpu_ring_alloc(ring, 3);
656 if (r) { 653 if (r) {
657 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 654 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
658 ring->idx, r); 655 ring->idx, r);
@@ -662,7 +659,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
662 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 659 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
663 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 660 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
664 amdgpu_ring_write(ring, 0xDEADBEEF); 661 amdgpu_ring_write(ring, 0xDEADBEEF);
665 amdgpu_ring_unlock_commit(ring); 662 amdgpu_ring_commit(ring);
666 663
667 for (i = 0; i < adev->usec_timeout; i++) { 664 for (i = 0; i < adev->usec_timeout; i++) {
668 tmp = RREG32(scratch); 665 tmp = RREG32(scratch);
@@ -699,7 +696,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
699 } 696 }
700 WREG32(scratch, 0xCAFEDEAD); 697 WREG32(scratch, 0xCAFEDEAD);
701 memset(&ib, 0, sizeof(ib)); 698 memset(&ib, 0, sizeof(ib));
702 r = amdgpu_ib_get(ring, NULL, 256, &ib); 699 r = amdgpu_ib_get(adev, NULL, 256, &ib);
703 if (r) { 700 if (r) {
704 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 701 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
705 goto err1; 702 goto err1;
@@ -709,9 +706,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
709 ib.ptr[2] = 0xDEADBEEF; 706 ib.ptr[2] = 0xDEADBEEF;
710 ib.length_dw = 3; 707 ib.length_dw = 3;
711 708
712 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 709 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
713 AMDGPU_FENCE_OWNER_UNDEFINED, 710 NULL, &f);
714 &f);
715 if (r) 711 if (r)
716 goto err2; 712 goto err2;
717 713
@@ -1171,7 +1167,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1171 1167
1172 /* allocate an indirect buffer to put the commands in */ 1168 /* allocate an indirect buffer to put the commands in */
1173 memset(&ib, 0, sizeof(ib)); 1169 memset(&ib, 0, sizeof(ib));
1174 r = amdgpu_ib_get(ring, NULL, total_size, &ib); 1170 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1175 if (r) { 1171 if (r) {
1176 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1172 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177 return r; 1173 return r;
@@ -1266,9 +1262,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1266 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1262 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267 1263
1268 /* shedule the ib on the ring */ 1264 /* shedule the ib on the ring */
1269 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 1265 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
1270 AMDGPU_FENCE_OWNER_UNDEFINED, 1266 NULL, &f);
1271 &f);
1272 if (r) { 1267 if (r) {
1273 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1268 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274 goto fail; 1269 goto fail;
@@ -2574,11 +2569,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2574 } 2569 }
2575} 2570}
2576 2571
2577static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578{
2579 return (u32)((1ULL << bit_width) - 1);
2580}
2581
2582void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) 2572void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583{ 2573{
2584 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2574 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
@@ -2599,89 +2589,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2599 WREG32(mmGRBM_GFX_INDEX, data); 2589 WREG32(mmGRBM_GFX_INDEX, data);
2600} 2590}
2601 2591
2602static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, 2592static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2603 u32 max_rb_num_per_se, 2593{
2604 u32 sh_per_se) 2594 return (u32)((1ULL << bit_width) - 1);
2595}
2596
2597static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2605{ 2598{
2606 u32 data, mask; 2599 u32 data, mask;
2607 2600
2608 data = RREG32(mmCC_RB_BACKEND_DISABLE); 2601 data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2610
2611 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); 2602 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612 2603
2604 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2613 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2605 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614 2606
2615 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); 2607 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608 adev->gfx.config.max_sh_per_se);
2616 2609
2617 return data & mask; 2610 return (~data) & mask;
2618} 2611}
2619 2612
2620static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, 2613static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2621 u32 se_num, u32 sh_per_se,
2622 u32 max_rb_num_per_se)
2623{ 2614{
2624 int i, j; 2615 int i, j;
2625 u32 data, mask; 2616 u32 data, tmp, num_rbs = 0;
2626 u32 disabled_rbs = 0; 2617 u32 active_rbs = 0;
2627 u32 enabled_rbs = 0;
2628 2618
2629 mutex_lock(&adev->grbm_idx_mutex); 2619 mutex_lock(&adev->grbm_idx_mutex);
2630 for (i = 0; i < se_num; i++) { 2620 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2631 for (j = 0; j < sh_per_se; j++) { 2621 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2632 gfx_v8_0_select_se_sh(adev, i, j); 2622 gfx_v8_0_select_se_sh(adev, i, j);
2633 data = gfx_v8_0_get_rb_disabled(adev, 2623 data = gfx_v8_0_get_rb_active_bitmap(adev);
2634 max_rb_num_per_se, sh_per_se); 2624 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2635 disabled_rbs |= data << ((i * sh_per_se + j) * 2625 RB_BITMAP_WIDTH_PER_SH);
2636 RB_BITMAP_WIDTH_PER_SH);
2637 } 2626 }
2638 } 2627 }
2639 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 2628 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640 mutex_unlock(&adev->grbm_idx_mutex); 2629 mutex_unlock(&adev->grbm_idx_mutex);
2641 2630
2642 mask = 1; 2631 adev->gfx.config.backend_enable_mask = active_rbs;
2643 for (i = 0; i < max_rb_num_per_se * se_num; i++) { 2632 tmp = active_rbs;
2644 if (!(disabled_rbs & mask)) 2633 while (tmp >>= 1)
2645 enabled_rbs |= mask; 2634 num_rbs++;
2646 mask <<= 1; 2635 adev->gfx.config.num_rbs = num_rbs;
2647 }
2648
2649 adev->gfx.config.backend_enable_mask = enabled_rbs;
2650
2651 mutex_lock(&adev->grbm_idx_mutex);
2652 for (i = 0; i < se_num; i++) {
2653 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654 data = RREG32(mmPA_SC_RASTER_CONFIG);
2655 for (j = 0; j < sh_per_se; j++) {
2656 switch (enabled_rbs & 3) {
2657 case 0:
2658 if (j == 0)
2659 data |= (RASTER_CONFIG_RB_MAP_3 <<
2660 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661 else
2662 data |= (RASTER_CONFIG_RB_MAP_0 <<
2663 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664 break;
2665 case 1:
2666 data |= (RASTER_CONFIG_RB_MAP_0 <<
2667 (i * sh_per_se + j) * 2);
2668 break;
2669 case 2:
2670 data |= (RASTER_CONFIG_RB_MAP_3 <<
2671 (i * sh_per_se + j) * 2);
2672 break;
2673 case 3:
2674 default:
2675 data |= (RASTER_CONFIG_RB_MAP_2 <<
2676 (i * sh_per_se + j) * 2);
2677 break;
2678 }
2679 enabled_rbs >>= 2;
2680 }
2681 WREG32(mmPA_SC_RASTER_CONFIG, data);
2682 }
2683 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684 mutex_unlock(&adev->grbm_idx_mutex);
2685} 2636}
2686 2637
2687/** 2638/**
@@ -2741,19 +2692,10 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2741 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2692 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 2693 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 2694 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745 adev->gfx.config.gb_addr_config & 0x70);
2746 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747 adev->gfx.config.gb_addr_config & 0x70);
2748 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2751 2695
2752 gfx_v8_0_tiling_mode_table_init(adev); 2696 gfx_v8_0_tiling_mode_table_init(adev);
2753 2697
2754 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, 2698 gfx_v8_0_setup_rb(adev);
2755 adev->gfx.config.max_sh_per_se,
2756 adev->gfx.config.max_backends_per_se);
2757 2699
2758 /* XXX SH_MEM regs */ 2700 /* XXX SH_MEM regs */
2759 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2701 /* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -3062,7 +3004,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3062 3004
3063 gfx_v8_0_cp_gfx_enable(adev, true); 3005 gfx_v8_0_cp_gfx_enable(adev, true);
3064 3006
3065 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4); 3007 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3066 if (r) { 3008 if (r) {
3067 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3009 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068 return r; 3010 return r;
@@ -3126,7 +3068,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3126 amdgpu_ring_write(ring, 0x8000); 3068 amdgpu_ring_write(ring, 0x8000);
3127 amdgpu_ring_write(ring, 0x8000); 3069 amdgpu_ring_write(ring, 0x8000);
3128 3070
3129 amdgpu_ring_unlock_commit(ring); 3071 amdgpu_ring_commit(ring);
3130 3072
3131 return 0; 3073 return 0;
3132} 3074}
@@ -3226,13 +3168,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3226 udelay(50); 3168 udelay(50);
3227} 3169}
3228 3170
3229static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3230{
3231 gfx_v8_0_cp_compute_enable(adev, true);
3232
3233 return 0;
3234}
3235
3236static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3171static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3237{ 3172{
3238 const struct gfx_firmware_header_v1_0 *mec_hdr; 3173 const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -3802,9 +3737,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3802 WREG32(mmCP_PQ_STATUS, tmp); 3737 WREG32(mmCP_PQ_STATUS, tmp);
3803 } 3738 }
3804 3739
3805 r = gfx_v8_0_cp_compute_start(adev); 3740 gfx_v8_0_cp_compute_enable(adev, true);
3806 if (r)
3807 return r;
3808 3741
3809 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3742 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3743 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
@@ -4016,16 +3949,6 @@ static void gfx_v8_0_print_status(void *handle)
4016 RREG32(mmHDP_ADDR_CONFIG)); 3949 RREG32(mmHDP_ADDR_CONFIG));
4017 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", 3950 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
4018 RREG32(mmDMIF_ADDR_CALC)); 3951 RREG32(mmDMIF_ADDR_CALC));
4019 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
4020 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4021 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
4022 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4023 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4024 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4025 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4026 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4027 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4028 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4029 3952
4030 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", 3953 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
4031 RREG32(mmCP_MEQ_THRESHOLDS)); 3954 RREG32(mmCP_MEQ_THRESHOLDS));
@@ -4762,49 +4685,11 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4762 4685
4763} 4686}
4764 4687
4765/**
4766 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4767 *
4768 * @ring: amdgpu ring buffer object
4769 * @semaphore: amdgpu semaphore object
4770 * @emit_wait: Is this a sempahore wait?
4771 *
4772 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4773 * from running ahead of semaphore waits.
4774 */
4775static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4776 struct amdgpu_semaphore *semaphore,
4777 bool emit_wait)
4778{
4779 uint64_t addr = semaphore->gpu_addr;
4780 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4781
4782 if (ring->adev->asic_type == CHIP_TOPAZ ||
4783 ring->adev->asic_type == CHIP_TONGA ||
4784 ring->adev->asic_type == CHIP_FIJI)
4785 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4786 return false;
4787 else {
4788 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4789 amdgpu_ring_write(ring, lower_32_bits(addr));
4790 amdgpu_ring_write(ring, upper_32_bits(addr));
4791 amdgpu_ring_write(ring, sel);
4792 }
4793
4794 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4795 /* Prevent the PFP from running ahead of the semaphore wait */
4796 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4797 amdgpu_ring_write(ring, 0x0);
4798 }
4799
4800 return true;
4801}
4802
4803static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4688static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4804 unsigned vm_id, uint64_t pd_addr) 4689 unsigned vm_id, uint64_t pd_addr)
4805{ 4690{
4806 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 4691 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4807 uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; 4692 uint32_t seq = ring->fence_drv.sync_seq;
4808 uint64_t addr = ring->fence_drv.gpu_addr; 4693 uint64_t addr = ring->fence_drv.gpu_addr;
4809 4694
4810 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 4695 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -5145,13 +5030,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5145 .parse_cs = NULL, 5030 .parse_cs = NULL,
5146 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 5031 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5147 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 5032 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5148 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5149 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 5033 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5150 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 5034 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5151 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5035 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5152 .test_ring = gfx_v8_0_ring_test_ring, 5036 .test_ring = gfx_v8_0_ring_test_ring,
5153 .test_ib = gfx_v8_0_ring_test_ib, 5037 .test_ib = gfx_v8_0_ring_test_ib,
5154 .insert_nop = amdgpu_ring_insert_nop, 5038 .insert_nop = amdgpu_ring_insert_nop,
5039 .pad_ib = amdgpu_ring_generic_pad_ib,
5155}; 5040};
5156 5041
5157static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 5042static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -5161,13 +5046,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5161 .parse_cs = NULL, 5046 .parse_cs = NULL,
5162 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 5047 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5163 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 5048 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5164 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5165 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 5049 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5166 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 5050 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5167 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5051 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5168 .test_ring = gfx_v8_0_ring_test_ring, 5052 .test_ring = gfx_v8_0_ring_test_ring,
5169 .test_ib = gfx_v8_0_ring_test_ib, 5053 .test_ib = gfx_v8_0_ring_test_ib,
5170 .insert_nop = amdgpu_ring_insert_nop, 5054 .insert_nop = amdgpu_ring_insert_nop,
5055 .pad_ib = amdgpu_ring_generic_pad_ib,
5171}; 5056};
5172 5057
5173static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 5058static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5236,32 +5121,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5236 } 5121 }
5237} 5122}
5238 5123
5239static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, 5124static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5240 u32 se, u32 sh)
5241{ 5125{
5242 u32 mask = 0, tmp, tmp1; 5126 u32 data, mask;
5243 int i;
5244
5245 gfx_v8_0_select_se_sh(adev, se, sh);
5246 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5247 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5248 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5249 5127
5250 tmp &= 0xffff0000; 5128 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5129 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5251 5130
5252 tmp |= tmp1; 5131 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5253 tmp >>= 16; 5132 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5254 5133
5255 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { 5134 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
5256 mask <<= 1; 5135 adev->gfx.config.max_sh_per_se);
5257 mask |= 1;
5258 }
5259 5136
5260 return (~tmp) & mask; 5137 return (~data) & mask;
5261} 5138}
5262 5139
5263int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, 5140int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5264 struct amdgpu_cu_info *cu_info) 5141 struct amdgpu_cu_info *cu_info)
5265{ 5142{
5266 int i, j, k, counter, active_cu_number = 0; 5143 int i, j, k, counter, active_cu_number = 0;
5267 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5144 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5275,10 +5152,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5275 mask = 1; 5152 mask = 1;
5276 ao_bitmap = 0; 5153 ao_bitmap = 0;
5277 counter = 0; 5154 counter = 0;
5278 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); 5155 gfx_v8_0_select_se_sh(adev, i, j);
5156 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5279 cu_info->bitmap[i][j] = bitmap; 5157 cu_info->bitmap[i][j] = bitmap;
5280 5158
5281 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5159 for (k = 0; k < 16; k ++) {
5282 if (bitmap & mask) { 5160 if (bitmap & mask) {
5283 if (counter < 2) 5161 if (counter < 2)
5284 ao_bitmap |= mask; 5162 ao_bitmap |= mask;
@@ -5290,9 +5168,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5290 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5168 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5291 } 5169 }
5292 } 5170 }
5171 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5172 mutex_unlock(&adev->grbm_idx_mutex);
5293 5173
5294 cu_info->number = active_cu_number; 5174 cu_info->number = active_cu_number;
5295 cu_info->ao_cu_mask = ao_cu_mask; 5175 cu_info->ao_cu_mask = ao_cu_mask;
5296 mutex_unlock(&adev->grbm_idx_mutex); 5176
5297 return 0; 5177 return 0;
5298} 5178}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 8aa2991ab379..68ee66b38e5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -694,7 +694,8 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
694 * amdgpu graphics/compute will use VMIDs 1-7 694 * amdgpu graphics/compute will use VMIDs 1-7
695 * amdkfd will use VMIDs 8-15 695 * amdkfd will use VMIDs 8-15
696 */ 696 */
697 adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; 697 adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
698 amdgpu_vm_manager_init(adev);
698 699
699 /* base offset of vram pages */ 700 /* base offset of vram pages */
700 if (adev->flags & AMD_IS_APU) { 701 if (adev->flags & AMD_IS_APU) {
@@ -926,10 +927,6 @@ static int gmc_v7_0_sw_init(void *handle)
926 int dma_bits; 927 int dma_bits;
927 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 928 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
928 929
929 r = amdgpu_gem_init(adev);
930 if (r)
931 return r;
932
933 r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); 930 r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
934 if (r) 931 if (r)
935 return r; 932 return r;
@@ -1010,7 +1007,7 @@ static int gmc_v7_0_sw_fini(void *handle)
1010 adev->vm_manager.enabled = false; 1007 adev->vm_manager.enabled = false;
1011 } 1008 }
1012 gmc_v7_0_gart_fini(adev); 1009 gmc_v7_0_gart_fini(adev);
1013 amdgpu_gem_fini(adev); 1010 amdgpu_gem_force_release(adev);
1014 amdgpu_bo_fini(adev); 1011 amdgpu_bo_fini(adev);
1015 1012
1016 return 0; 1013 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3efd45546241..757803ae7c4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -252,6 +252,12 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
252 if (!adev->mc.fw) 252 if (!adev->mc.fw)
253 return -EINVAL; 253 return -EINVAL;
254 254
255 /* Skip MC ucode loading on SR-IOV capable boards.
256 * vbios does this for us in asic_init in that case.
257 */
258 if (adev->virtualization.supports_sr_iov)
259 return 0;
260
255 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; 261 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
256 amdgpu_ucode_print_mc_hdr(&hdr->header); 262 amdgpu_ucode_print_mc_hdr(&hdr->header);
257 263
@@ -774,7 +780,8 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
774 * amdgpu graphics/compute will use VMIDs 1-7 780 * amdgpu graphics/compute will use VMIDs 1-7
775 * amdkfd will use VMIDs 8-15 781 * amdkfd will use VMIDs 8-15
776 */ 782 */
777 adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; 783 adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
784 amdgpu_vm_manager_init(adev);
778 785
779 /* base offset of vram pages */ 786 /* base offset of vram pages */
780 if (adev->flags & AMD_IS_APU) { 787 if (adev->flags & AMD_IS_APU) {
@@ -880,10 +887,6 @@ static int gmc_v8_0_sw_init(void *handle)
880 int dma_bits; 887 int dma_bits;
881 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 888 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
882 889
883 r = amdgpu_gem_init(adev);
884 if (r)
885 return r;
886
887 r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); 890 r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
888 if (r) 891 if (r)
889 return r; 892 return r;
@@ -964,7 +967,7 @@ static int gmc_v8_0_sw_fini(void *handle)
964 adev->vm_manager.enabled = false; 967 adev->vm_manager.enabled = false;
965 } 968 }
966 gmc_v8_0_gart_fini(adev); 969 gmc_v8_0_gart_fini(adev);
967 amdgpu_gem_fini(adev); 970 amdgpu_gem_force_release(adev);
968 amdgpu_bo_fini(adev); 971 amdgpu_bo_fini(adev);
969 972
970 return 0; 973 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 090486c18249..52ee08193295 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -279,6 +279,12 @@ static int iceland_smu_upload_firmware_image(struct amdgpu_device *adev)
279 if (!adev->pm.fw) 279 if (!adev->pm.fw)
280 return -EINVAL; 280 return -EINVAL;
281 281
282 /* Skip SMC ucode loading on SR-IOV capable boards.
283 * vbios does this for us in asic_init in that case.
284 */
285 if (adev->virtualization.supports_sr_iov)
286 return 0;
287
282 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; 288 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
283 amdgpu_ucode_print_smc_hdr(&hdr->header); 289 amdgpu_ucode_print_smc_hdr(&hdr->header);
284 290
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 2cf50180cc51..29ec986dd6fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -335,31 +335,6 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
335} 335}
336 336
337/** 337/**
338 * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring
339 *
340 * @ring: amdgpu_ring structure holding ring information
341 * @semaphore: amdgpu semaphore object
342 * @emit_wait: wait or signal semaphore
343 *
344 * Add a DMA semaphore packet to the ring wait on or signal
345 * other rings (VI).
346 */
347static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring,
348 struct amdgpu_semaphore *semaphore,
349 bool emit_wait)
350{
351 u64 addr = semaphore->gpu_addr;
352 u32 sig = emit_wait ? 0 : 1;
353
354 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
355 SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
356 amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
357 amdgpu_ring_write(ring, upper_32_bits(addr));
358
359 return true;
360}
361
362/**
363 * sdma_v2_4_gfx_stop - stop the gfx async dma engines 338 * sdma_v2_4_gfx_stop - stop the gfx async dma engines
364 * 339 *
365 * @adev: amdgpu_device pointer 340 * @adev: amdgpu_device pointer
@@ -459,6 +434,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
459 vi_srbm_select(adev, 0, 0, 0, 0); 434 vi_srbm_select(adev, 0, 0, 0, 0);
460 mutex_unlock(&adev->srbm_mutex); 435 mutex_unlock(&adev->srbm_mutex);
461 436
437 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
438 adev->gfx.config.gb_addr_config & 0x70);
439
462 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 440 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
463 441
464 /* Set ring buffer size in dwords */ 442 /* Set ring buffer size in dwords */
@@ -636,7 +614,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
636 tmp = 0xCAFEDEAD; 614 tmp = 0xCAFEDEAD;
637 adev->wb.wb[index] = cpu_to_le32(tmp); 615 adev->wb.wb[index] = cpu_to_le32(tmp);
638 616
639 r = amdgpu_ring_lock(ring, 5); 617 r = amdgpu_ring_alloc(ring, 5);
640 if (r) { 618 if (r) {
641 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 619 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
642 amdgpu_wb_free(adev, index); 620 amdgpu_wb_free(adev, index);
@@ -649,7 +627,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
649 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 627 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
650 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 628 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
651 amdgpu_ring_write(ring, 0xDEADBEEF); 629 amdgpu_ring_write(ring, 0xDEADBEEF);
652 amdgpu_ring_unlock_commit(ring); 630 amdgpu_ring_commit(ring);
653 631
654 for (i = 0; i < adev->usec_timeout; i++) { 632 for (i = 0; i < adev->usec_timeout; i++) {
655 tmp = le32_to_cpu(adev->wb.wb[index]); 633 tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -699,7 +677,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
699 tmp = 0xCAFEDEAD; 677 tmp = 0xCAFEDEAD;
700 adev->wb.wb[index] = cpu_to_le32(tmp); 678 adev->wb.wb[index] = cpu_to_le32(tmp);
701 memset(&ib, 0, sizeof(ib)); 679 memset(&ib, 0, sizeof(ib));
702 r = amdgpu_ib_get(ring, NULL, 256, &ib); 680 r = amdgpu_ib_get(adev, NULL, 256, &ib);
703 if (r) { 681 if (r) {
704 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 682 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
705 goto err0; 683 goto err0;
@@ -716,9 +694,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
716 ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 694 ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
717 ib.length_dw = 8; 695 ib.length_dw = 8;
718 696
719 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 697 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
720 AMDGPU_FENCE_OWNER_UNDEFINED, 698 NULL, &f);
721 &f);
722 if (r) 699 if (r)
723 goto err1; 700 goto err1;
724 701
@@ -797,7 +774,7 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
797 * Update PTEs by writing them manually using sDMA (CIK). 774 * Update PTEs by writing them manually using sDMA (CIK).
798 */ 775 */
799static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, 776static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
800 uint64_t pe, 777 const dma_addr_t *pages_addr, uint64_t pe,
801 uint64_t addr, unsigned count, 778 uint64_t addr, unsigned count,
802 uint32_t incr, uint32_t flags) 779 uint32_t incr, uint32_t flags)
803{ 780{
@@ -816,14 +793,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
816 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 793 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
817 ib->ptr[ib->length_dw++] = ndw; 794 ib->ptr[ib->length_dw++] = ndw;
818 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 795 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
819 if (flags & AMDGPU_PTE_SYSTEM) { 796 value = amdgpu_vm_map_gart(pages_addr, addr);
820 value = amdgpu_vm_map_gart(ib->ring->adev, addr);
821 value &= 0xFFFFFFFFFFFFF000ULL;
822 } else if (flags & AMDGPU_PTE_VALID) {
823 value = addr;
824 } else {
825 value = 0;
826 }
827 addr += incr; 797 addr += incr;
828 value |= flags; 798 value |= flags;
829 ib->ptr[ib->length_dw++] = value; 799 ib->ptr[ib->length_dw++] = value;
@@ -881,14 +851,14 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
881} 851}
882 852
883/** 853/**
884 * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw 854 * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
885 * 855 *
886 * @ib: indirect buffer to fill with padding 856 * @ib: indirect buffer to fill with padding
887 * 857 *
888 */ 858 */
889static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) 859static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
890{ 860{
891 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); 861 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
892 u32 pad_count; 862 u32 pad_count;
893 int i; 863 int i;
894 864
@@ -1111,6 +1081,8 @@ static void sdma_v2_4_print_status(void *handle)
1111 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); 1081 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
1112 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", 1082 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
1113 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); 1083 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
1084 dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
1085 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
1114 mutex_lock(&adev->srbm_mutex); 1086 mutex_lock(&adev->srbm_mutex);
1115 for (j = 0; j < 16; j++) { 1087 for (j = 0; j < 16; j++) {
1116 vi_srbm_select(adev, 0, 0, 0, j); 1088 vi_srbm_select(adev, 0, 0, 0, j);
@@ -1302,12 +1274,12 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1302 .parse_cs = NULL, 1274 .parse_cs = NULL,
1303 .emit_ib = sdma_v2_4_ring_emit_ib, 1275 .emit_ib = sdma_v2_4_ring_emit_ib,
1304 .emit_fence = sdma_v2_4_ring_emit_fence, 1276 .emit_fence = sdma_v2_4_ring_emit_fence,
1305 .emit_semaphore = sdma_v2_4_ring_emit_semaphore,
1306 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, 1277 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
1307 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, 1278 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
1308 .test_ring = sdma_v2_4_ring_test_ring, 1279 .test_ring = sdma_v2_4_ring_test_ring,
1309 .test_ib = sdma_v2_4_ring_test_ib, 1280 .test_ib = sdma_v2_4_ring_test_ib,
1310 .insert_nop = sdma_v2_4_ring_insert_nop, 1281 .insert_nop = sdma_v2_4_ring_insert_nop,
1282 .pad_ib = sdma_v2_4_ring_pad_ib,
1311}; 1283};
1312 1284
1313static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1285static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1405,14 +1377,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
1405 .copy_pte = sdma_v2_4_vm_copy_pte, 1377 .copy_pte = sdma_v2_4_vm_copy_pte,
1406 .write_pte = sdma_v2_4_vm_write_pte, 1378 .write_pte = sdma_v2_4_vm_write_pte,
1407 .set_pte_pde = sdma_v2_4_vm_set_pte_pde, 1379 .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
1408 .pad_ib = sdma_v2_4_vm_pad_ib,
1409}; 1380};
1410 1381
1411static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) 1382static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
1412{ 1383{
1384 unsigned i;
1385
1413 if (adev->vm_manager.vm_pte_funcs == NULL) { 1386 if (adev->vm_manager.vm_pte_funcs == NULL) {
1414 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; 1387 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
1415 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; 1388 for (i = 0; i < adev->sdma.num_instances; i++)
1416 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1389 adev->vm_manager.vm_pte_rings[i] =
1390 &adev->sdma.instance[i].ring;
1391
1392 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1417 } 1393 }
1418} 1394}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ad54c46751b0..6f064d7076e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -444,32 +444,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
444 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 444 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
445} 445}
446 446
447
448/**
449 * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring
450 *
451 * @ring: amdgpu_ring structure holding ring information
452 * @semaphore: amdgpu semaphore object
453 * @emit_wait: wait or signal semaphore
454 *
455 * Add a DMA semaphore packet to the ring wait on or signal
456 * other rings (VI).
457 */
458static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring,
459 struct amdgpu_semaphore *semaphore,
460 bool emit_wait)
461{
462 u64 addr = semaphore->gpu_addr;
463 u32 sig = emit_wait ? 0 : 1;
464
465 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
466 SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
467 amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
468 amdgpu_ring_write(ring, upper_32_bits(addr));
469
470 return true;
471}
472
473/** 447/**
474 * sdma_v3_0_gfx_stop - stop the gfx async dma engines 448 * sdma_v3_0_gfx_stop - stop the gfx async dma engines
475 * 449 *
@@ -596,6 +570,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
596 vi_srbm_select(adev, 0, 0, 0, 0); 570 vi_srbm_select(adev, 0, 0, 0, 0);
597 mutex_unlock(&adev->srbm_mutex); 571 mutex_unlock(&adev->srbm_mutex);
598 572
573 WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
574 adev->gfx.config.gb_addr_config & 0x70);
575
599 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); 576 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
600 577
601 /* Set ring buffer size in dwords */ 578 /* Set ring buffer size in dwords */
@@ -788,7 +765,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
788 tmp = 0xCAFEDEAD; 765 tmp = 0xCAFEDEAD;
789 adev->wb.wb[index] = cpu_to_le32(tmp); 766 adev->wb.wb[index] = cpu_to_le32(tmp);
790 767
791 r = amdgpu_ring_lock(ring, 5); 768 r = amdgpu_ring_alloc(ring, 5);
792 if (r) { 769 if (r) {
793 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 770 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
794 amdgpu_wb_free(adev, index); 771 amdgpu_wb_free(adev, index);
@@ -801,7 +778,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
801 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 778 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
802 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); 779 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
803 amdgpu_ring_write(ring, 0xDEADBEEF); 780 amdgpu_ring_write(ring, 0xDEADBEEF);
804 amdgpu_ring_unlock_commit(ring); 781 amdgpu_ring_commit(ring);
805 782
806 for (i = 0; i < adev->usec_timeout; i++) { 783 for (i = 0; i < adev->usec_timeout; i++) {
807 tmp = le32_to_cpu(adev->wb.wb[index]); 784 tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -851,7 +828,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
851 tmp = 0xCAFEDEAD; 828 tmp = 0xCAFEDEAD;
852 adev->wb.wb[index] = cpu_to_le32(tmp); 829 adev->wb.wb[index] = cpu_to_le32(tmp);
853 memset(&ib, 0, sizeof(ib)); 830 memset(&ib, 0, sizeof(ib));
854 r = amdgpu_ib_get(ring, NULL, 256, &ib); 831 r = amdgpu_ib_get(adev, NULL, 256, &ib);
855 if (r) { 832 if (r) {
856 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 833 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
857 goto err0; 834 goto err0;
@@ -868,9 +845,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
868 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 845 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
869 ib.length_dw = 8; 846 ib.length_dw = 8;
870 847
871 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, 848 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
872 AMDGPU_FENCE_OWNER_UNDEFINED, 849 NULL, &f);
873 &f);
874 if (r) 850 if (r)
875 goto err1; 851 goto err1;
876 852
@@ -948,7 +924,7 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
948 * Update PTEs by writing them manually using sDMA (CIK). 924 * Update PTEs by writing them manually using sDMA (CIK).
949 */ 925 */
950static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, 926static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
951 uint64_t pe, 927 const dma_addr_t *pages_addr, uint64_t pe,
952 uint64_t addr, unsigned count, 928 uint64_t addr, unsigned count,
953 uint32_t incr, uint32_t flags) 929 uint32_t incr, uint32_t flags)
954{ 930{
@@ -967,14 +943,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
967 ib->ptr[ib->length_dw++] = upper_32_bits(pe); 943 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
968 ib->ptr[ib->length_dw++] = ndw; 944 ib->ptr[ib->length_dw++] = ndw;
969 for (; ndw > 0; ndw -= 2, --count, pe += 8) { 945 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
970 if (flags & AMDGPU_PTE_SYSTEM) { 946 value = amdgpu_vm_map_gart(pages_addr, addr);
971 value = amdgpu_vm_map_gart(ib->ring->adev, addr);
972 value &= 0xFFFFFFFFFFFFF000ULL;
973 } else if (flags & AMDGPU_PTE_VALID) {
974 value = addr;
975 } else {
976 value = 0;
977 }
978 addr += incr; 947 addr += incr;
979 value |= flags; 948 value |= flags;
980 ib->ptr[ib->length_dw++] = value; 949 ib->ptr[ib->length_dw++] = value;
@@ -1032,14 +1001,14 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1032} 1001}
1033 1002
1034/** 1003/**
1035 * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw 1004 * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw
1036 * 1005 *
1037 * @ib: indirect buffer to fill with padding 1006 * @ib: indirect buffer to fill with padding
1038 * 1007 *
1039 */ 1008 */
1040static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) 1009static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1041{ 1010{
1042 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); 1011 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
1043 u32 pad_count; 1012 u32 pad_count;
1044 int i; 1013 int i;
1045 1014
@@ -1275,6 +1244,8 @@ static void sdma_v3_0_print_status(void *handle)
1275 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); 1244 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
1276 dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", 1245 dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n",
1277 i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); 1246 i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]));
1247 dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
1248 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
1278 mutex_lock(&adev->srbm_mutex); 1249 mutex_lock(&adev->srbm_mutex);
1279 for (j = 0; j < 16; j++) { 1250 for (j = 0; j < 16; j++) {
1280 vi_srbm_select(adev, 0, 0, 0, j); 1251 vi_srbm_select(adev, 0, 0, 0, j);
@@ -1570,12 +1541,12 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1570 .parse_cs = NULL, 1541 .parse_cs = NULL,
1571 .emit_ib = sdma_v3_0_ring_emit_ib, 1542 .emit_ib = sdma_v3_0_ring_emit_ib,
1572 .emit_fence = sdma_v3_0_ring_emit_fence, 1543 .emit_fence = sdma_v3_0_ring_emit_fence,
1573 .emit_semaphore = sdma_v3_0_ring_emit_semaphore,
1574 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, 1544 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
1575 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1545 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
1576 .test_ring = sdma_v3_0_ring_test_ring, 1546 .test_ring = sdma_v3_0_ring_test_ring,
1577 .test_ib = sdma_v3_0_ring_test_ib, 1547 .test_ib = sdma_v3_0_ring_test_ib,
1578 .insert_nop = sdma_v3_0_ring_insert_nop, 1548 .insert_nop = sdma_v3_0_ring_insert_nop,
1549 .pad_ib = sdma_v3_0_ring_pad_ib,
1579}; 1550};
1580 1551
1581static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1552static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1673,14 +1644,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
1673 .copy_pte = sdma_v3_0_vm_copy_pte, 1644 .copy_pte = sdma_v3_0_vm_copy_pte,
1674 .write_pte = sdma_v3_0_vm_write_pte, 1645 .write_pte = sdma_v3_0_vm_write_pte,
1675 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1646 .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
1676 .pad_ib = sdma_v3_0_vm_pad_ib,
1677}; 1647};
1678 1648
1679static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) 1649static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1680{ 1650{
1651 unsigned i;
1652
1681 if (adev->vm_manager.vm_pte_funcs == NULL) { 1653 if (adev->vm_manager.vm_pte_funcs == NULL) {
1682 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; 1654 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
1683 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; 1655 for (i = 0; i < adev->sdma.num_instances; i++)
1684 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1656 adev->vm_manager.vm_pte_rings[i] =
1657 &adev->sdma.instance[i].ring;
1658
1659 adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
1685 } 1660 }
1686} 1661}
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 361c49a82323..083893dd68c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -272,6 +272,12 @@ static int tonga_smu_upload_firmware_image(struct amdgpu_device *adev)
272 if (!adev->pm.fw) 272 if (!adev->pm.fw)
273 return -EINVAL; 273 return -EINVAL;
274 274
275 /* Skip SMC ucode loading on SR-IOV capable boards.
276 * vbios does this for us in asic_init in that case.
277 */
278 if (adev->virtualization.supports_sr_iov)
279 return 0;
280
275 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; 281 hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
276 amdgpu_ucode_print_smc_hdr(&hdr->header); 282 amdgpu_ucode_print_smc_hdr(&hdr->header);
277 283
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 5e9f73af83a8..70ed73fa5156 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -164,7 +164,7 @@ static int uvd_v4_2_hw_init(void *handle)
164 goto done; 164 goto done;
165 } 165 }
166 166
167 r = amdgpu_ring_lock(ring, 10); 167 r = amdgpu_ring_alloc(ring, 10);
168 if (r) { 168 if (r) {
169 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 169 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
170 goto done; 170 goto done;
@@ -189,7 +189,7 @@ static int uvd_v4_2_hw_init(void *handle)
189 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); 189 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
190 amdgpu_ring_write(ring, 3); 190 amdgpu_ring_write(ring, 3);
191 191
192 amdgpu_ring_unlock_commit(ring); 192 amdgpu_ring_commit(ring);
193 193
194done: 194done:
195 /* lower clocks again */ 195 /* lower clocks again */
@@ -439,33 +439,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
439} 439}
440 440
441/** 441/**
442 * uvd_v4_2_ring_emit_semaphore - emit semaphore command
443 *
444 * @ring: amdgpu_ring pointer
445 * @semaphore: semaphore to emit commands for
446 * @emit_wait: true if we should emit a wait command
447 *
448 * Emit a semaphore command (either wait or signal) to the UVD ring.
449 */
450static bool uvd_v4_2_ring_emit_semaphore(struct amdgpu_ring *ring,
451 struct amdgpu_semaphore *semaphore,
452 bool emit_wait)
453{
454 uint64_t addr = semaphore->gpu_addr;
455
456 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
457 amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
458
459 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
460 amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
461
462 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
463 amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
464
465 return true;
466}
467
468/**
469 * uvd_v4_2_ring_test_ring - register write test 442 * uvd_v4_2_ring_test_ring - register write test
470 * 443 *
471 * @ring: amdgpu_ring pointer 444 * @ring: amdgpu_ring pointer
@@ -480,7 +453,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
480 int r; 453 int r;
481 454
482 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 455 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
483 r = amdgpu_ring_lock(ring, 3); 456 r = amdgpu_ring_alloc(ring, 3);
484 if (r) { 457 if (r) {
485 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 458 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
486 ring->idx, r); 459 ring->idx, r);
@@ -488,7 +461,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
488 } 461 }
489 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 462 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
490 amdgpu_ring_write(ring, 0xDEADBEEF); 463 amdgpu_ring_write(ring, 0xDEADBEEF);
491 amdgpu_ring_unlock_commit(ring); 464 amdgpu_ring_commit(ring);
492 for (i = 0; i < adev->usec_timeout; i++) { 465 for (i = 0; i < adev->usec_timeout; i++) {
493 tmp = RREG32(mmUVD_CONTEXT_ID); 466 tmp = RREG32(mmUVD_CONTEXT_ID);
494 if (tmp == 0xDEADBEEF) 467 if (tmp == 0xDEADBEEF)
@@ -549,7 +522,7 @@ static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring)
549 goto error; 522 goto error;
550 } 523 }
551 524
552 r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); 525 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
553 if (r) { 526 if (r) {
554 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); 527 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
555 goto error; 528 goto error;
@@ -603,6 +576,10 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
603 addr = (adev->uvd.gpu_addr >> 32) & 0xFF; 576 addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
604 WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); 577 WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
605 578
579 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
580 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
581 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
582
606 uvd_v4_2_init_cg(adev); 583 uvd_v4_2_init_cg(adev);
607} 584}
608 585
@@ -804,6 +781,13 @@ static void uvd_v4_2_print_status(void *handle)
804 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); 781 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
805 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", 782 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
806 RREG32(mmUVD_CONTEXT_ID)); 783 RREG32(mmUVD_CONTEXT_ID));
784 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
785 RREG32(mmUVD_UDEC_ADDR_CONFIG));
786 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
787 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
788 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
789 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
790
807} 791}
808 792
809static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev, 793static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev,
@@ -882,10 +866,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
882 .parse_cs = amdgpu_uvd_ring_parse_cs, 866 .parse_cs = amdgpu_uvd_ring_parse_cs,
883 .emit_ib = uvd_v4_2_ring_emit_ib, 867 .emit_ib = uvd_v4_2_ring_emit_ib,
884 .emit_fence = uvd_v4_2_ring_emit_fence, 868 .emit_fence = uvd_v4_2_ring_emit_fence,
885 .emit_semaphore = uvd_v4_2_ring_emit_semaphore,
886 .test_ring = uvd_v4_2_ring_test_ring, 869 .test_ring = uvd_v4_2_ring_test_ring,
887 .test_ib = uvd_v4_2_ring_test_ib, 870 .test_ib = uvd_v4_2_ring_test_ib,
888 .insert_nop = amdgpu_ring_insert_nop, 871 .insert_nop = amdgpu_ring_insert_nop,
872 .pad_ib = amdgpu_ring_generic_pad_ib,
889}; 873};
890 874
891static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) 875static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 38864f562981..578ffb62fdb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -160,7 +160,7 @@ static int uvd_v5_0_hw_init(void *handle)
160 goto done; 160 goto done;
161 } 161 }
162 162
163 r = amdgpu_ring_lock(ring, 10); 163 r = amdgpu_ring_alloc(ring, 10);
164 if (r) { 164 if (r) {
165 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 165 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
166 goto done; 166 goto done;
@@ -185,7 +185,7 @@ static int uvd_v5_0_hw_init(void *handle)
185 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); 185 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
186 amdgpu_ring_write(ring, 3); 186 amdgpu_ring_write(ring, 3);
187 187
188 amdgpu_ring_unlock_commit(ring); 188 amdgpu_ring_commit(ring);
189 189
190done: 190done:
191 /* lower clocks again */ 191 /* lower clocks again */
@@ -279,6 +279,10 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
279 size = AMDGPU_UVD_HEAP_SIZE; 279 size = AMDGPU_UVD_HEAP_SIZE;
280 WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); 280 WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
281 WREG32(mmUVD_VCPU_CACHE_SIZE2, size); 281 WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
282
283 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
284 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
285 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
282} 286}
283 287
284/** 288/**
@@ -483,33 +487,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
483} 487}
484 488
485/** 489/**
486 * uvd_v5_0_ring_emit_semaphore - emit semaphore command
487 *
488 * @ring: amdgpu_ring pointer
489 * @semaphore: semaphore to emit commands for
490 * @emit_wait: true if we should emit a wait command
491 *
492 * Emit a semaphore command (either wait or signal) to the UVD ring.
493 */
494static bool uvd_v5_0_ring_emit_semaphore(struct amdgpu_ring *ring,
495 struct amdgpu_semaphore *semaphore,
496 bool emit_wait)
497{
498 uint64_t addr = semaphore->gpu_addr;
499
500 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
501 amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
502
503 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
504 amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
505
506 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
507 amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
508
509 return true;
510}
511
512/**
513 * uvd_v5_0_ring_test_ring - register write test 490 * uvd_v5_0_ring_test_ring - register write test
514 * 491 *
515 * @ring: amdgpu_ring pointer 492 * @ring: amdgpu_ring pointer
@@ -524,7 +501,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
524 int r; 501 int r;
525 502
526 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 503 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
527 r = amdgpu_ring_lock(ring, 3); 504 r = amdgpu_ring_alloc(ring, 3);
528 if (r) { 505 if (r) {
529 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 506 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
530 ring->idx, r); 507 ring->idx, r);
@@ -532,7 +509,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
532 } 509 }
533 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 510 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
534 amdgpu_ring_write(ring, 0xDEADBEEF); 511 amdgpu_ring_write(ring, 0xDEADBEEF);
535 amdgpu_ring_unlock_commit(ring); 512 amdgpu_ring_commit(ring);
536 for (i = 0; i < adev->usec_timeout; i++) { 513 for (i = 0; i < adev->usec_timeout; i++) {
537 tmp = RREG32(mmUVD_CONTEXT_ID); 514 tmp = RREG32(mmUVD_CONTEXT_ID);
538 if (tmp == 0xDEADBEEF) 515 if (tmp == 0xDEADBEEF)
@@ -595,7 +572,7 @@ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring)
595 goto error; 572 goto error;
596 } 573 }
597 574
598 r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); 575 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
599 if (r) { 576 if (r) {
600 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); 577 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
601 goto error; 578 goto error;
@@ -751,6 +728,12 @@ static void uvd_v5_0_print_status(void *handle)
751 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); 728 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
752 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", 729 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
753 RREG32(mmUVD_CONTEXT_ID)); 730 RREG32(mmUVD_CONTEXT_ID));
731 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
732 RREG32(mmUVD_UDEC_ADDR_CONFIG));
733 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
734 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
735 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
736 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
754} 737}
755 738
756static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev, 739static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -821,10 +804,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
821 .parse_cs = amdgpu_uvd_ring_parse_cs, 804 .parse_cs = amdgpu_uvd_ring_parse_cs,
822 .emit_ib = uvd_v5_0_ring_emit_ib, 805 .emit_ib = uvd_v5_0_ring_emit_ib,
823 .emit_fence = uvd_v5_0_ring_emit_fence, 806 .emit_fence = uvd_v5_0_ring_emit_fence,
824 .emit_semaphore = uvd_v5_0_ring_emit_semaphore,
825 .test_ring = uvd_v5_0_ring_test_ring, 807 .test_ring = uvd_v5_0_ring_test_ring,
826 .test_ib = uvd_v5_0_ring_test_ib, 808 .test_ib = uvd_v5_0_ring_test_ib,
827 .insert_nop = amdgpu_ring_insert_nop, 809 .insert_nop = amdgpu_ring_insert_nop,
810 .pad_ib = amdgpu_ring_generic_pad_ib,
828}; 811};
829 812
830static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) 813static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 3d5913926436..d4da1f04378c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -157,7 +157,7 @@ static int uvd_v6_0_hw_init(void *handle)
157 goto done; 157 goto done;
158 } 158 }
159 159
160 r = amdgpu_ring_lock(ring, 10); 160 r = amdgpu_ring_alloc(ring, 10);
161 if (r) { 161 if (r) {
162 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 162 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
163 goto done; 163 goto done;
@@ -182,7 +182,7 @@ static int uvd_v6_0_hw_init(void *handle)
182 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); 182 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
183 amdgpu_ring_write(ring, 3); 183 amdgpu_ring_write(ring, 3);
184 184
185 amdgpu_ring_unlock_commit(ring); 185 amdgpu_ring_commit(ring);
186 186
187done: 187done:
188 if (!r) 188 if (!r)
@@ -277,6 +277,10 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
277 size = AMDGPU_UVD_HEAP_SIZE; 277 size = AMDGPU_UVD_HEAP_SIZE;
278 WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); 278 WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
279 WREG32(mmUVD_VCPU_CACHE_SIZE2, size); 279 WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
280
281 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
282 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
283 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
280} 284}
281 285
282static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, 286static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
@@ -722,33 +726,6 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
722} 726}
723 727
724/** 728/**
725 * uvd_v6_0_ring_emit_semaphore - emit semaphore command
726 *
727 * @ring: amdgpu_ring pointer
728 * @semaphore: semaphore to emit commands for
729 * @emit_wait: true if we should emit a wait command
730 *
731 * Emit a semaphore command (either wait or signal) to the UVD ring.
732 */
733static bool uvd_v6_0_ring_emit_semaphore(struct amdgpu_ring *ring,
734 struct amdgpu_semaphore *semaphore,
735 bool emit_wait)
736{
737 uint64_t addr = semaphore->gpu_addr;
738
739 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
740 amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
741
742 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
743 amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
744
745 amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
746 amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
747
748 return true;
749}
750
751/**
752 * uvd_v6_0_ring_test_ring - register write test 729 * uvd_v6_0_ring_test_ring - register write test
753 * 730 *
754 * @ring: amdgpu_ring pointer 731 * @ring: amdgpu_ring pointer
@@ -763,7 +740,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
763 int r; 740 int r;
764 741
765 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 742 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
766 r = amdgpu_ring_lock(ring, 3); 743 r = amdgpu_ring_alloc(ring, 3);
767 if (r) { 744 if (r) {
768 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 745 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
769 ring->idx, r); 746 ring->idx, r);
@@ -771,7 +748,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
771 } 748 }
772 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 749 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
773 amdgpu_ring_write(ring, 0xDEADBEEF); 750 amdgpu_ring_write(ring, 0xDEADBEEF);
774 amdgpu_ring_unlock_commit(ring); 751 amdgpu_ring_commit(ring);
775 for (i = 0; i < adev->usec_timeout; i++) { 752 for (i = 0; i < adev->usec_timeout; i++) {
776 tmp = RREG32(mmUVD_CONTEXT_ID); 753 tmp = RREG32(mmUVD_CONTEXT_ID);
777 if (tmp == 0xDEADBEEF) 754 if (tmp == 0xDEADBEEF)
@@ -827,7 +804,7 @@ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring)
827 goto error; 804 goto error;
828 } 805 }
829 806
830 r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); 807 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
831 if (r) { 808 if (r) {
832 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); 809 DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
833 goto error; 810 goto error;
@@ -974,6 +951,12 @@ static void uvd_v6_0_print_status(void *handle)
974 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); 951 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
975 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", 952 dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
976 RREG32(mmUVD_CONTEXT_ID)); 953 RREG32(mmUVD_CONTEXT_ID));
954 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
955 RREG32(mmUVD_UDEC_ADDR_CONFIG));
956 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
957 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
958 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
959 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
977} 960}
978 961
979static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev, 962static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1062,10 +1045,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
1062 .parse_cs = amdgpu_uvd_ring_parse_cs, 1045 .parse_cs = amdgpu_uvd_ring_parse_cs,
1063 .emit_ib = uvd_v6_0_ring_emit_ib, 1046 .emit_ib = uvd_v6_0_ring_emit_ib,
1064 .emit_fence = uvd_v6_0_ring_emit_fence, 1047 .emit_fence = uvd_v6_0_ring_emit_fence,
1065 .emit_semaphore = uvd_v6_0_ring_emit_semaphore,
1066 .test_ring = uvd_v6_0_ring_test_ring, 1048 .test_ring = uvd_v6_0_ring_test_ring,
1067 .test_ib = uvd_v6_0_ring_test_ib, 1049 .test_ib = uvd_v6_0_ring_test_ib,
1068 .insert_nop = amdgpu_ring_insert_nop, 1050 .insert_nop = amdgpu_ring_insert_nop,
1051 .pad_ib = amdgpu_ring_generic_pad_ib,
1069}; 1052};
1070 1053
1071static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) 1054static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 52ac7a8f1e58..9c804f436974 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -639,10 +639,10 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
639 .parse_cs = amdgpu_vce_ring_parse_cs, 639 .parse_cs = amdgpu_vce_ring_parse_cs,
640 .emit_ib = amdgpu_vce_ring_emit_ib, 640 .emit_ib = amdgpu_vce_ring_emit_ib,
641 .emit_fence = amdgpu_vce_ring_emit_fence, 641 .emit_fence = amdgpu_vce_ring_emit_fence,
642 .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
643 .test_ring = amdgpu_vce_ring_test_ring, 642 .test_ring = amdgpu_vce_ring_test_ring,
644 .test_ib = amdgpu_vce_ring_test_ib, 643 .test_ib = amdgpu_vce_ring_test_ib,
645 .insert_nop = amdgpu_ring_insert_nop, 644 .insert_nop = amdgpu_ring_insert_nop,
645 .pad_ib = amdgpu_ring_generic_pad_ib,
646}; 646};
647 647
648static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) 648static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index e99af81e4aec..8f8d479061f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -759,10 +759,10 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
759 .parse_cs = amdgpu_vce_ring_parse_cs, 759 .parse_cs = amdgpu_vce_ring_parse_cs,
760 .emit_ib = amdgpu_vce_ring_emit_ib, 760 .emit_ib = amdgpu_vce_ring_emit_ib,
761 .emit_fence = amdgpu_vce_ring_emit_fence, 761 .emit_fence = amdgpu_vce_ring_emit_fence,
762 .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
763 .test_ring = amdgpu_vce_ring_test_ring, 762 .test_ring = amdgpu_vce_ring_test_ring,
764 .test_ib = amdgpu_vce_ring_test_ib, 763 .test_ib = amdgpu_vce_ring_test_ib,
765 .insert_nop = amdgpu_ring_insert_nop, 764 .insert_nop = amdgpu_ring_insert_nop,
765 .pad_ib = amdgpu_ring_generic_pad_ib,
766}; 766};
767 767
768static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) 768static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 89f5a1ff6f43..125003517544 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -74,6 +74,9 @@
74#include "uvd_v6_0.h" 74#include "uvd_v6_0.h"
75#include "vce_v3_0.h" 75#include "vce_v3_0.h"
76#include "amdgpu_powerplay.h" 76#include "amdgpu_powerplay.h"
77#if defined(CONFIG_DRM_AMD_ACP)
78#include "amdgpu_acp.h"
79#endif
77 80
78/* 81/*
79 * Indirect registers accessor 82 * Indirect registers accessor
@@ -571,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
571 return -EINVAL; 574 return -EINVAL;
572} 575}
573 576
574static void vi_print_gpu_status_regs(struct amdgpu_device *adev)
575{
576 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
577 RREG32(mmGRBM_STATUS));
578 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
579 RREG32(mmGRBM_STATUS2));
580 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
581 RREG32(mmGRBM_STATUS_SE0));
582 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
583 RREG32(mmGRBM_STATUS_SE1));
584 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
585 RREG32(mmGRBM_STATUS_SE2));
586 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
587 RREG32(mmGRBM_STATUS_SE3));
588 dev_info(adev->dev, " SRBM_STATUS=0x%08X\n",
589 RREG32(mmSRBM_STATUS));
590 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
591 RREG32(mmSRBM_STATUS2));
592 dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
593 RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
594 if (adev->sdma.num_instances > 1) {
595 dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
596 RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
597 }
598 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
599 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
600 RREG32(mmCP_STALLED_STAT1));
601 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
602 RREG32(mmCP_STALLED_STAT2));
603 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
604 RREG32(mmCP_STALLED_STAT3));
605 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
606 RREG32(mmCP_CPF_BUSY_STAT));
607 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
608 RREG32(mmCP_CPF_STALLED_STAT1));
609 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
610 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
611 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
612 RREG32(mmCP_CPC_STALLED_STAT1));
613 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
614}
615
616/**
617 * vi_gpu_check_soft_reset - check which blocks are busy
618 *
619 * @adev: amdgpu_device pointer
620 *
621 * Check which blocks are busy and return the relevant reset
622 * mask to be used by vi_gpu_soft_reset().
623 * Returns a mask of the blocks to be reset.
624 */
625u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev)
626{
627 u32 reset_mask = 0;
628 u32 tmp;
629
630 /* GRBM_STATUS */
631 tmp = RREG32(mmGRBM_STATUS);
632 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
633 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
634 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
635 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
636 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
637 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
638 reset_mask |= AMDGPU_RESET_GFX;
639
640 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
641 reset_mask |= AMDGPU_RESET_CP;
642
643 /* GRBM_STATUS2 */
644 tmp = RREG32(mmGRBM_STATUS2);
645 if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
646 reset_mask |= AMDGPU_RESET_RLC;
647
648 if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK |
649 GRBM_STATUS2__CPC_BUSY_MASK |
650 GRBM_STATUS2__CPG_BUSY_MASK))
651 reset_mask |= AMDGPU_RESET_CP;
652
653 /* SRBM_STATUS2 */
654 tmp = RREG32(mmSRBM_STATUS2);
655 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
656 reset_mask |= AMDGPU_RESET_DMA;
657
658 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
659 reset_mask |= AMDGPU_RESET_DMA1;
660
661 /* SRBM_STATUS */
662 tmp = RREG32(mmSRBM_STATUS);
663
664 if (tmp & SRBM_STATUS__IH_BUSY_MASK)
665 reset_mask |= AMDGPU_RESET_IH;
666
667 if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
668 reset_mask |= AMDGPU_RESET_SEM;
669
670 if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
671 reset_mask |= AMDGPU_RESET_GRBM;
672
673 if (adev->asic_type != CHIP_TOPAZ) {
674 if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK |
675 SRBM_STATUS__UVD_BUSY_MASK))
676 reset_mask |= AMDGPU_RESET_UVD;
677 }
678
679 if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
680 reset_mask |= AMDGPU_RESET_VMC;
681
682 if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
683 SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
684 reset_mask |= AMDGPU_RESET_MC;
685
686 /* SDMA0_STATUS_REG */
687 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
688 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
689 reset_mask |= AMDGPU_RESET_DMA;
690
691 /* SDMA1_STATUS_REG */
692 if (adev->sdma.num_instances > 1) {
693 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
694 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
695 reset_mask |= AMDGPU_RESET_DMA1;
696 }
697#if 0
698 /* VCE_STATUS */
699 if (adev->asic_type != CHIP_TOPAZ) {
700 tmp = RREG32(mmVCE_STATUS);
701 if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK)
702 reset_mask |= AMDGPU_RESET_VCE;
703 if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK)
704 reset_mask |= AMDGPU_RESET_VCE1;
705
706 }
707
708 if (adev->asic_type != CHIP_TOPAZ) {
709 if (amdgpu_display_is_display_hung(adev))
710 reset_mask |= AMDGPU_RESET_DISPLAY;
711 }
712#endif
713
714 /* Skip MC reset as it's mostly likely not hung, just busy */
715 if (reset_mask & AMDGPU_RESET_MC) {
716 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
717 reset_mask &= ~AMDGPU_RESET_MC;
718 }
719
720 return reset_mask;
721}
722
723/**
724 * vi_gpu_soft_reset - soft reset GPU
725 *
726 * @adev: amdgpu_device pointer
727 * @reset_mask: mask of which blocks to reset
728 *
729 * Soft reset the blocks specified in @reset_mask.
730 */
731static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
732{
733 struct amdgpu_mode_mc_save save;
734 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
735 u32 tmp;
736
737 if (reset_mask == 0)
738 return;
739
740 dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
741
742 vi_print_gpu_status_regs(adev);
743 dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
744 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
745 dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
746 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
747
748 /* disable CG/PG */
749
750 /* stop the rlc */
751 //XXX
752 //gfx_v8_0_rlc_stop(adev);
753
754 /* Disable GFX parsing/prefetching */
755 tmp = RREG32(mmCP_ME_CNTL);
756 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
757 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
758 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
759 WREG32(mmCP_ME_CNTL, tmp);
760
761 /* Disable MEC parsing/prefetching */
762 tmp = RREG32(mmCP_MEC_CNTL);
763 tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
764 tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
765 WREG32(mmCP_MEC_CNTL, tmp);
766
767 if (reset_mask & AMDGPU_RESET_DMA) {
768 /* sdma0 */
769 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
770 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
771 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
772 }
773 if (reset_mask & AMDGPU_RESET_DMA1) {
774 /* sdma1 */
775 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
776 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
777 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
778 }
779
780 gmc_v8_0_mc_stop(adev, &save);
781 if (amdgpu_asic_wait_for_mc_idle(adev)) {
782 dev_warn(adev->dev, "Wait for MC idle timedout !\n");
783 }
784
785 if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) {
786 grbm_soft_reset =
787 REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
788 grbm_soft_reset =
789 REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
790 }
791
792 if (reset_mask & AMDGPU_RESET_CP) {
793 grbm_soft_reset =
794 REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
795 srbm_soft_reset =
796 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
797 }
798
799 if (reset_mask & AMDGPU_RESET_DMA)
800 srbm_soft_reset =
801 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1);
802
803 if (reset_mask & AMDGPU_RESET_DMA1)
804 srbm_soft_reset =
805 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1);
806
807 if (reset_mask & AMDGPU_RESET_DISPLAY)
808 srbm_soft_reset =
809 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1);
810
811 if (reset_mask & AMDGPU_RESET_RLC)
812 grbm_soft_reset =
813 REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
814
815 if (reset_mask & AMDGPU_RESET_SEM)
816 srbm_soft_reset =
817 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
818
819 if (reset_mask & AMDGPU_RESET_IH)
820 srbm_soft_reset =
821 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1);
822
823 if (reset_mask & AMDGPU_RESET_GRBM)
824 srbm_soft_reset =
825 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
826
827 if (reset_mask & AMDGPU_RESET_VMC)
828 srbm_soft_reset =
829 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
830
831 if (reset_mask & AMDGPU_RESET_UVD)
832 srbm_soft_reset =
833 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
834
835 if (reset_mask & AMDGPU_RESET_VCE)
836 srbm_soft_reset =
837 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
838
839 if (reset_mask & AMDGPU_RESET_VCE)
840 srbm_soft_reset =
841 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
842
843 if (!(adev->flags & AMD_IS_APU)) {
844 if (reset_mask & AMDGPU_RESET_MC)
845 srbm_soft_reset =
846 REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
847 }
848
849 if (grbm_soft_reset) {
850 tmp = RREG32(mmGRBM_SOFT_RESET);
851 tmp |= grbm_soft_reset;
852 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
853 WREG32(mmGRBM_SOFT_RESET, tmp);
854 tmp = RREG32(mmGRBM_SOFT_RESET);
855
856 udelay(50);
857
858 tmp &= ~grbm_soft_reset;
859 WREG32(mmGRBM_SOFT_RESET, tmp);
860 tmp = RREG32(mmGRBM_SOFT_RESET);
861 }
862
863 if (srbm_soft_reset) {
864 tmp = RREG32(mmSRBM_SOFT_RESET);
865 tmp |= srbm_soft_reset;
866 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
867 WREG32(mmSRBM_SOFT_RESET, tmp);
868 tmp = RREG32(mmSRBM_SOFT_RESET);
869
870 udelay(50);
871
872 tmp &= ~srbm_soft_reset;
873 WREG32(mmSRBM_SOFT_RESET, tmp);
874 tmp = RREG32(mmSRBM_SOFT_RESET);
875 }
876
877 /* Wait a little for things to settle down */
878 udelay(50);
879
880 gmc_v8_0_mc_resume(adev, &save);
881 udelay(50);
882
883 vi_print_gpu_status_regs(adev);
884}
885
886static void vi_gpu_pci_config_reset(struct amdgpu_device *adev) 577static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
887{ 578{
888 struct amdgpu_mode_mc_save save; 579 u32 i;
889 u32 tmp, i;
890 580
891 dev_info(adev->dev, "GPU pci config reset\n"); 581 dev_info(adev->dev, "GPU pci config reset\n");
892 582
893 /* disable dpm? */
894
895 /* disable cg/pg */
896
897 /* Disable GFX parsing/prefetching */
898 tmp = RREG32(mmCP_ME_CNTL);
899 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
900 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
901 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
902 WREG32(mmCP_ME_CNTL, tmp);
903
904 /* Disable MEC parsing/prefetching */
905 tmp = RREG32(mmCP_MEC_CNTL);
906 tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
907 tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
908 WREG32(mmCP_MEC_CNTL, tmp);
909
910 /* Disable GFX parsing/prefetching */
911 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
912 CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
913
914 /* Disable MEC parsing/prefetching */
915 WREG32(mmCP_MEC_CNTL,
916 CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
917
918 /* sdma0 */
919 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
920 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
921 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
922
923 /* sdma1 */
924 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
925 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
926 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
927
928 /* XXX other engines? */
929
930 /* halt the rlc, disable cp internal ints */
931 //XXX
932 //gfx_v8_0_rlc_stop(adev);
933
934 udelay(50);
935
936 /* disable mem access */
937 gmc_v8_0_mc_stop(adev, &save);
938 if (amdgpu_asic_wait_for_mc_idle(adev)) {
939 dev_warn(adev->dev, "Wait for MC idle timed out !\n");
940 }
941
942 /* disable BM */ 583 /* disable BM */
943 pci_clear_master(adev->pdev); 584 pci_clear_master(adev->pdev);
944 /* reset */ 585 /* reset */
@@ -978,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
978 */ 619 */
979static int vi_asic_reset(struct amdgpu_device *adev) 620static int vi_asic_reset(struct amdgpu_device *adev)
980{ 621{
981 u32 reset_mask; 622 vi_set_bios_scratch_engine_hung(adev, true);
982
983 reset_mask = vi_gpu_check_soft_reset(adev);
984
985 if (reset_mask)
986 vi_set_bios_scratch_engine_hung(adev, true);
987
988 /* try soft reset */
989 vi_gpu_soft_reset(adev, reset_mask);
990
991 reset_mask = vi_gpu_check_soft_reset(adev);
992 623
993 /* try pci config reset */ 624 vi_gpu_pci_config_reset(adev);
994 if (reset_mask && amdgpu_hard_reset)
995 vi_gpu_pci_config_reset(adev);
996 625
997 reset_mask = vi_gpu_check_soft_reset(adev); 626 vi_set_bios_scratch_engine_hung(adev, false);
998
999 if (!reset_mask)
1000 vi_set_bios_scratch_engine_hung(adev, false);
1001 627
1002 return 0; 628 return 0;
1003} 629}
@@ -1347,6 +973,15 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] =
1347 .rev = 0, 973 .rev = 0,
1348 .funcs = &vce_v3_0_ip_funcs, 974 .funcs = &vce_v3_0_ip_funcs,
1349 }, 975 },
976#if defined(CONFIG_DRM_AMD_ACP)
977 {
978 .type = AMD_IP_BLOCK_TYPE_ACP,
979 .major = 2,
980 .minor = 2,
981 .rev = 0,
982 .funcs = &acp_ip_funcs,
983 },
984#endif
1350}; 985};
1351 986
1352int vi_set_ip_blocks(struct amdgpu_device *adev) 987int vi_set_ip_blocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 1195d06f55bc..15ff8b2c26e7 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -73,6 +73,7 @@ enum amd_ip_block_type {
73 AMD_IP_BLOCK_TYPE_SDMA, 73 AMD_IP_BLOCK_TYPE_SDMA,
74 AMD_IP_BLOCK_TYPE_UVD, 74 AMD_IP_BLOCK_TYPE_UVD,
75 AMD_IP_BLOCK_TYPE_VCE, 75 AMD_IP_BLOCK_TYPE_VCE,
76 AMD_IP_BLOCK_TYPE_ACP,
76}; 77};
77 78
78enum amd_clockgating_state { 79enum amd_clockgating_state {
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
index dc52ea0df4b4..d3ccf5a86de0 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
@@ -1379,6 +1379,7 @@
1379#define mmDC_GPIO_PAD_STRENGTH_1 0x1978 1379#define mmDC_GPIO_PAD_STRENGTH_1 0x1978
1380#define mmDC_GPIO_PAD_STRENGTH_2 0x1979 1380#define mmDC_GPIO_PAD_STRENGTH_2 0x1979
1381#define mmPHY_AUX_CNTL 0x197f 1381#define mmPHY_AUX_CNTL 0x197f
1382#define mmDC_GPIO_I2CPAD_MASK 0x1974
1382#define mmDC_GPIO_I2CPAD_A 0x1975 1383#define mmDC_GPIO_I2CPAD_A 0x1975
1383#define mmDC_GPIO_I2CPAD_EN 0x1976 1384#define mmDC_GPIO_I2CPAD_EN 0x1976
1384#define mmDC_GPIO_I2CPAD_Y 0x1977 1385#define mmDC_GPIO_I2CPAD_Y 0x1977
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
new file mode 100644
index 000000000000..6bea30ef3df5
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
@@ -0,0 +1,1117 @@
1/*
2 * DCE_8_0 Register documentation
3 *
4 * Copyright (C) 2016 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#ifndef DCE_8_0_ENUM_H
25#define DCE_8_0_ENUM_H
26
27typedef enum SurfaceEndian {
28 ENDIAN_NONE = 0x0,
29 ENDIAN_8IN16 = 0x1,
30 ENDIAN_8IN32 = 0x2,
31 ENDIAN_8IN64 = 0x3,
32} SurfaceEndian;
33typedef enum ArrayMode {
34 ARRAY_LINEAR_GENERAL = 0x0,
35 ARRAY_LINEAR_ALIGNED = 0x1,
36 ARRAY_1D_TILED_THIN1 = 0x2,
37 ARRAY_1D_TILED_THICK = 0x3,
38 ARRAY_2D_TILED_THIN1 = 0x4,
39 ARRAY_PRT_TILED_THIN1 = 0x5,
40 ARRAY_PRT_2D_TILED_THIN1 = 0x6,
41 ARRAY_2D_TILED_THICK = 0x7,
42 ARRAY_2D_TILED_XTHICK = 0x8,
43 ARRAY_PRT_TILED_THICK = 0x9,
44 ARRAY_PRT_2D_TILED_THICK = 0xa,
45 ARRAY_PRT_3D_TILED_THIN1 = 0xb,
46 ARRAY_3D_TILED_THIN1 = 0xc,
47 ARRAY_3D_TILED_THICK = 0xd,
48 ARRAY_3D_TILED_XTHICK = 0xe,
49 ARRAY_PRT_3D_TILED_THICK = 0xf,
50} ArrayMode;
51typedef enum PipeTiling {
52 CONFIG_1_PIPE = 0x0,
53 CONFIG_2_PIPE = 0x1,
54 CONFIG_4_PIPE = 0x2,
55 CONFIG_8_PIPE = 0x3,
56} PipeTiling;
57typedef enum BankTiling {
58 CONFIG_4_BANK = 0x0,
59 CONFIG_8_BANK = 0x1,
60} BankTiling;
61typedef enum GroupInterleave {
62 CONFIG_256B_GROUP = 0x0,
63 CONFIG_512B_GROUP = 0x1,
64} GroupInterleave;
65typedef enum RowTiling {
66 CONFIG_1KB_ROW = 0x0,
67 CONFIG_2KB_ROW = 0x1,
68 CONFIG_4KB_ROW = 0x2,
69 CONFIG_8KB_ROW = 0x3,
70 CONFIG_1KB_ROW_OPT = 0x4,
71 CONFIG_2KB_ROW_OPT = 0x5,
72 CONFIG_4KB_ROW_OPT = 0x6,
73 CONFIG_8KB_ROW_OPT = 0x7,
74} RowTiling;
75typedef enum BankSwapBytes {
76 CONFIG_128B_SWAPS = 0x0,
77 CONFIG_256B_SWAPS = 0x1,
78 CONFIG_512B_SWAPS = 0x2,
79 CONFIG_1KB_SWAPS = 0x3,
80} BankSwapBytes;
81typedef enum SampleSplitBytes {
82 CONFIG_1KB_SPLIT = 0x0,
83 CONFIG_2KB_SPLIT = 0x1,
84 CONFIG_4KB_SPLIT = 0x2,
85 CONFIG_8KB_SPLIT = 0x3,
86} SampleSplitBytes;
87typedef enum NumPipes {
88 ADDR_CONFIG_1_PIPE = 0x0,
89 ADDR_CONFIG_2_PIPE = 0x1,
90 ADDR_CONFIG_4_PIPE = 0x2,
91 ADDR_CONFIG_8_PIPE = 0x3,
92} NumPipes;
93typedef enum PipeInterleaveSize {
94 ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x0,
95 ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x1,
96} PipeInterleaveSize;
97typedef enum BankInterleaveSize {
98 ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x0,
99 ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x1,
100 ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x2,
101 ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x3,
102} BankInterleaveSize;
103typedef enum NumShaderEngines {
104 ADDR_CONFIG_1_SHADER_ENGINE = 0x0,
105 ADDR_CONFIG_2_SHADER_ENGINE = 0x1,
106} NumShaderEngines;
107typedef enum ShaderEngineTileSize {
108 ADDR_CONFIG_SE_TILE_16 = 0x0,
109 ADDR_CONFIG_SE_TILE_32 = 0x1,
110} ShaderEngineTileSize;
111typedef enum NumGPUs {
112 ADDR_CONFIG_1_GPU = 0x0,
113 ADDR_CONFIG_2_GPU = 0x1,
114 ADDR_CONFIG_4_GPU = 0x2,
115} NumGPUs;
116typedef enum MultiGPUTileSize {
117 ADDR_CONFIG_GPU_TILE_16 = 0x0,
118 ADDR_CONFIG_GPU_TILE_32 = 0x1,
119 ADDR_CONFIG_GPU_TILE_64 = 0x2,
120 ADDR_CONFIG_GPU_TILE_128 = 0x3,
121} MultiGPUTileSize;
122typedef enum RowSize {
123 ADDR_CONFIG_1KB_ROW = 0x0,
124 ADDR_CONFIG_2KB_ROW = 0x1,
125 ADDR_CONFIG_4KB_ROW = 0x2,
126} RowSize;
127typedef enum NumLowerPipes {
128 ADDR_CONFIG_1_LOWER_PIPES = 0x0,
129 ADDR_CONFIG_2_LOWER_PIPES = 0x1,
130} NumLowerPipes;
131typedef enum DebugBlockId {
132 DBG_CLIENT_BLKID_RESERVED = 0x0,
133 DBG_CLIENT_BLKID_dbg = 0x1,
134 DBG_CLIENT_BLKID_uvdu_0 = 0x2,
135 DBG_CLIENT_BLKID_uvdu_1 = 0x3,
136 DBG_CLIENT_BLKID_uvdu_2 = 0x4,
137 DBG_CLIENT_BLKID_uvdu_3 = 0x5,
138 DBG_CLIENT_BLKID_uvdu_4 = 0x6,
139 DBG_CLIENT_BLKID_uvdu_5 = 0x7,
140 DBG_CLIENT_BLKID_uvdu_6 = 0x8,
141 DBG_CLIENT_BLKID_uvdm_0 = 0x9,
142 DBG_CLIENT_BLKID_uvdm_1 = 0xa,
143 DBG_CLIENT_BLKID_uvdm_2 = 0xb,
144 DBG_CLIENT_BLKID_uvdm_3 = 0xc,
145 DBG_CLIENT_BLKID_vcea_0 = 0xd,
146 DBG_CLIENT_BLKID_vcea_1 = 0xe,
147 DBG_CLIENT_BLKID_vcea_2 = 0xf,
148 DBG_CLIENT_BLKID_vcea_3 = 0x10,
149 DBG_CLIENT_BLKID_vcea_4 = 0x11,
150 DBG_CLIENT_BLKID_vcea_5 = 0x12,
151 DBG_CLIENT_BLKID_vcea_6 = 0x13,
152 DBG_CLIENT_BLKID_vceb_0 = 0x14,
153 DBG_CLIENT_BLKID_vceb_1 = 0x15,
154 DBG_CLIENT_BLKID_vceb_2 = 0x16,
155 DBG_CLIENT_BLKID_dco = 0x17,
156 DBG_CLIENT_BLKID_xdma = 0x18,
157 DBG_CLIENT_BLKID_smu_0 = 0x19,
158 DBG_CLIENT_BLKID_smu_1 = 0x1a,
159 DBG_CLIENT_BLKID_smu_2 = 0x1b,
160 DBG_CLIENT_BLKID_gck = 0x1c,
161 DBG_CLIENT_BLKID_tmonw0 = 0x1d,
162 DBG_CLIENT_BLKID_tmonw1 = 0x1e,
163 DBG_CLIENT_BLKID_grbm = 0x1f,
164 DBG_CLIENT_BLKID_rlc = 0x20,
165 DBG_CLIENT_BLKID_ds0 = 0x21,
166 DBG_CLIENT_BLKID_cpg_0 = 0x22,
167 DBG_CLIENT_BLKID_cpg_1 = 0x23,
168 DBG_CLIENT_BLKID_cpc_0 = 0x24,
169 DBG_CLIENT_BLKID_cpc_1 = 0x25,
170 DBG_CLIENT_BLKID_cpf = 0x26,
171 DBG_CLIENT_BLKID_scf0 = 0x27,
172 DBG_CLIENT_BLKID_scf1 = 0x28,
173 DBG_CLIENT_BLKID_scf2 = 0x29,
174 DBG_CLIENT_BLKID_scf3 = 0x2a,
175 DBG_CLIENT_BLKID_pc0 = 0x2b,
176 DBG_CLIENT_BLKID_pc1 = 0x2c,
177 DBG_CLIENT_BLKID_pc2 = 0x2d,
178 DBG_CLIENT_BLKID_pc3 = 0x2e,
179 DBG_CLIENT_BLKID_vgt0 = 0x2f,
180 DBG_CLIENT_BLKID_vgt1 = 0x30,
181 DBG_CLIENT_BLKID_vgt2 = 0x31,
182 DBG_CLIENT_BLKID_vgt3 = 0x32,
183 DBG_CLIENT_BLKID_sx00 = 0x33,
184 DBG_CLIENT_BLKID_sx10 = 0x34,
185 DBG_CLIENT_BLKID_sx20 = 0x35,
186 DBG_CLIENT_BLKID_sx30 = 0x36,
187 DBG_CLIENT_BLKID_cb001 = 0x37,
188 DBG_CLIENT_BLKID_cb200 = 0x38,
189 DBG_CLIENT_BLKID_cb201 = 0x39,
190 DBG_CLIENT_BLKID_cbr0 = 0x3a,
191 DBG_CLIENT_BLKID_cb000 = 0x3b,
192 DBG_CLIENT_BLKID_cb101 = 0x3c,
193 DBG_CLIENT_BLKID_cb300 = 0x3d,
194 DBG_CLIENT_BLKID_cb301 = 0x3e,
195 DBG_CLIENT_BLKID_cbr1 = 0x3f,
196 DBG_CLIENT_BLKID_cb100 = 0x40,
197 DBG_CLIENT_BLKID_ia0 = 0x41,
198 DBG_CLIENT_BLKID_ia1 = 0x42,
199 DBG_CLIENT_BLKID_bci0 = 0x43,
200 DBG_CLIENT_BLKID_bci1 = 0x44,
201 DBG_CLIENT_BLKID_bci2 = 0x45,
202 DBG_CLIENT_BLKID_bci3 = 0x46,
203 DBG_CLIENT_BLKID_pa0 = 0x47,
204 DBG_CLIENT_BLKID_pa1 = 0x48,
205 DBG_CLIENT_BLKID_spim0 = 0x49,
206 DBG_CLIENT_BLKID_spim1 = 0x4a,
207 DBG_CLIENT_BLKID_spim2 = 0x4b,
208 DBG_CLIENT_BLKID_spim3 = 0x4c,
209 DBG_CLIENT_BLKID_sdma = 0x4d,
210 DBG_CLIENT_BLKID_ih = 0x4e,
211 DBG_CLIENT_BLKID_sem = 0x4f,
212 DBG_CLIENT_BLKID_srbm = 0x50,
213 DBG_CLIENT_BLKID_hdp = 0x51,
214 DBG_CLIENT_BLKID_acp_0 = 0x52,
215 DBG_CLIENT_BLKID_acp_1 = 0x53,
216 DBG_CLIENT_BLKID_sam = 0x54,
217 DBG_CLIENT_BLKID_mcc0 = 0x55,
218 DBG_CLIENT_BLKID_mcc1 = 0x56,
219 DBG_CLIENT_BLKID_mcc2 = 0x57,
220 DBG_CLIENT_BLKID_mcc3 = 0x58,
221 DBG_CLIENT_BLKID_mcd0 = 0x59,
222 DBG_CLIENT_BLKID_mcd1 = 0x5a,
223 DBG_CLIENT_BLKID_mcd2 = 0x5b,
224 DBG_CLIENT_BLKID_mcd3 = 0x5c,
225 DBG_CLIENT_BLKID_mcb = 0x5d,
226 DBG_CLIENT_BLKID_vmc = 0x5e,
227 DBG_CLIENT_BLKID_gmcon = 0x5f,
228 DBG_CLIENT_BLKID_gdc_0 = 0x60,
229 DBG_CLIENT_BLKID_gdc_1 = 0x61,
230 DBG_CLIENT_BLKID_gdc_2 = 0x62,
231 DBG_CLIENT_BLKID_gdc_3 = 0x63,
232 DBG_CLIENT_BLKID_gdc_4 = 0x64,
233 DBG_CLIENT_BLKID_gdc_5 = 0x65,
234 DBG_CLIENT_BLKID_gdc_6 = 0x66,
235 DBG_CLIENT_BLKID_gdc_7 = 0x67,
236 DBG_CLIENT_BLKID_gdc_8 = 0x68,
237 DBG_CLIENT_BLKID_gdc_9 = 0x69,
238 DBG_CLIENT_BLKID_gdc_10 = 0x6a,
239 DBG_CLIENT_BLKID_gdc_11 = 0x6b,
240 DBG_CLIENT_BLKID_gdc_12 = 0x6c,
241 DBG_CLIENT_BLKID_gdc_13 = 0x6d,
242 DBG_CLIENT_BLKID_gdc_14 = 0x6e,
243 DBG_CLIENT_BLKID_gdc_15 = 0x6f,
244 DBG_CLIENT_BLKID_gdc_16 = 0x70,
245 DBG_CLIENT_BLKID_gdc_17 = 0x71,
246 DBG_CLIENT_BLKID_gdc_18 = 0x72,
247 DBG_CLIENT_BLKID_gdc_19 = 0x73,
248 DBG_CLIENT_BLKID_gdc_20 = 0x74,
249 DBG_CLIENT_BLKID_gdc_21 = 0x75,
250 DBG_CLIENT_BLKID_gdc_22 = 0x76,
251 DBG_CLIENT_BLKID_wd = 0x77,
252 DBG_CLIENT_BLKID_sdma_0 = 0x78,
253 DBG_CLIENT_BLKID_sdma_1 = 0x79,
254} DebugBlockId;
255typedef enum DebugBlockId_OLD {
256 DBG_BLOCK_ID_RESERVED = 0x0,
257 DBG_BLOCK_ID_DBG = 0x1,
258 DBG_BLOCK_ID_VMC = 0x2,
259 DBG_BLOCK_ID_PDMA = 0x3,
260 DBG_BLOCK_ID_CG = 0x4,
261 DBG_BLOCK_ID_SRBM = 0x5,
262 DBG_BLOCK_ID_GRBM = 0x6,
263 DBG_BLOCK_ID_RLC = 0x7,
264 DBG_BLOCK_ID_CSC = 0x8,
265 DBG_BLOCK_ID_SEM = 0x9,
266 DBG_BLOCK_ID_IH = 0xa,
267 DBG_BLOCK_ID_SC = 0xb,
268 DBG_BLOCK_ID_SQ = 0xc,
269 DBG_BLOCK_ID_AVP = 0xd,
270 DBG_BLOCK_ID_GMCON = 0xe,
271 DBG_BLOCK_ID_SMU = 0xf,
272 DBG_BLOCK_ID_DMA0 = 0x10,
273 DBG_BLOCK_ID_DMA1 = 0x11,
274 DBG_BLOCK_ID_SPIM = 0x12,
275 DBG_BLOCK_ID_GDS = 0x13,
276 DBG_BLOCK_ID_SPIS = 0x14,
277 DBG_BLOCK_ID_UNUSED0 = 0x15,
278 DBG_BLOCK_ID_PA0 = 0x16,
279 DBG_BLOCK_ID_PA1 = 0x17,
280 DBG_BLOCK_ID_CP0 = 0x18,
281 DBG_BLOCK_ID_CP1 = 0x19,
282 DBG_BLOCK_ID_CP2 = 0x1a,
283 DBG_BLOCK_ID_UNUSED1 = 0x1b,
284 DBG_BLOCK_ID_UVDU = 0x1c,
285 DBG_BLOCK_ID_UVDM = 0x1d,
286 DBG_BLOCK_ID_VCE = 0x1e,
287 DBG_BLOCK_ID_UNUSED2 = 0x1f,
288 DBG_BLOCK_ID_VGT0 = 0x20,
289 DBG_BLOCK_ID_VGT1 = 0x21,
290 DBG_BLOCK_ID_IA = 0x22,
291 DBG_BLOCK_ID_UNUSED3 = 0x23,
292 DBG_BLOCK_ID_SCT0 = 0x24,
293 DBG_BLOCK_ID_SCT1 = 0x25,
294 DBG_BLOCK_ID_SPM0 = 0x26,
295 DBG_BLOCK_ID_SPM1 = 0x27,
296 DBG_BLOCK_ID_TCAA = 0x28,
297 DBG_BLOCK_ID_TCAB = 0x29,
298 DBG_BLOCK_ID_TCCA = 0x2a,
299 DBG_BLOCK_ID_TCCB = 0x2b,
300 DBG_BLOCK_ID_MCC0 = 0x2c,
301 DBG_BLOCK_ID_MCC1 = 0x2d,
302 DBG_BLOCK_ID_MCC2 = 0x2e,
303 DBG_BLOCK_ID_MCC3 = 0x2f,
304 DBG_BLOCK_ID_SX0 = 0x30,
305 DBG_BLOCK_ID_SX1 = 0x31,
306 DBG_BLOCK_ID_SX2 = 0x32,
307 DBG_BLOCK_ID_SX3 = 0x33,
308 DBG_BLOCK_ID_UNUSED4 = 0x34,
309 DBG_BLOCK_ID_UNUSED5 = 0x35,
310 DBG_BLOCK_ID_UNUSED6 = 0x36,
311 DBG_BLOCK_ID_UNUSED7 = 0x37,
312 DBG_BLOCK_ID_PC0 = 0x38,
313 DBG_BLOCK_ID_PC1 = 0x39,
314 DBG_BLOCK_ID_UNUSED8 = 0x3a,
315 DBG_BLOCK_ID_UNUSED9 = 0x3b,
316 DBG_BLOCK_ID_UNUSED10 = 0x3c,
317 DBG_BLOCK_ID_UNUSED11 = 0x3d,
318 DBG_BLOCK_ID_MCB = 0x3e,
319 DBG_BLOCK_ID_UNUSED12 = 0x3f,
320 DBG_BLOCK_ID_SCB0 = 0x40,
321 DBG_BLOCK_ID_SCB1 = 0x41,
322 DBG_BLOCK_ID_UNUSED13 = 0x42,
323 DBG_BLOCK_ID_UNUSED14 = 0x43,
324 DBG_BLOCK_ID_SCF0 = 0x44,
325 DBG_BLOCK_ID_SCF1 = 0x45,
326 DBG_BLOCK_ID_UNUSED15 = 0x46,
327 DBG_BLOCK_ID_UNUSED16 = 0x47,
328 DBG_BLOCK_ID_BCI0 = 0x48,
329 DBG_BLOCK_ID_BCI1 = 0x49,
330 DBG_BLOCK_ID_BCI2 = 0x4a,
331 DBG_BLOCK_ID_BCI3 = 0x4b,
332 DBG_BLOCK_ID_UNUSED17 = 0x4c,
333 DBG_BLOCK_ID_UNUSED18 = 0x4d,
334 DBG_BLOCK_ID_UNUSED19 = 0x4e,
335 DBG_BLOCK_ID_UNUSED20 = 0x4f,
336 DBG_BLOCK_ID_CB00 = 0x50,
337 DBG_BLOCK_ID_CB01 = 0x51,
338 DBG_BLOCK_ID_CB02 = 0x52,
339 DBG_BLOCK_ID_CB03 = 0x53,
340 DBG_BLOCK_ID_CB04 = 0x54,
341 DBG_BLOCK_ID_UNUSED21 = 0x55,
342 DBG_BLOCK_ID_UNUSED22 = 0x56,
343 DBG_BLOCK_ID_UNUSED23 = 0x57,
344 DBG_BLOCK_ID_CB10 = 0x58,
345 DBG_BLOCK_ID_CB11 = 0x59,
346 DBG_BLOCK_ID_CB12 = 0x5a,
347 DBG_BLOCK_ID_CB13 = 0x5b,
348 DBG_BLOCK_ID_CB14 = 0x5c,
349 DBG_BLOCK_ID_UNUSED24 = 0x5d,
350 DBG_BLOCK_ID_UNUSED25 = 0x5e,
351 DBG_BLOCK_ID_UNUSED26 = 0x5f,
352 DBG_BLOCK_ID_TCP0 = 0x60,
353 DBG_BLOCK_ID_TCP1 = 0x61,
354 DBG_BLOCK_ID_TCP2 = 0x62,
355 DBG_BLOCK_ID_TCP3 = 0x63,
356 DBG_BLOCK_ID_TCP4 = 0x64,
357 DBG_BLOCK_ID_TCP5 = 0x65,
358 DBG_BLOCK_ID_TCP6 = 0x66,
359 DBG_BLOCK_ID_TCP7 = 0x67,
360 DBG_BLOCK_ID_TCP8 = 0x68,
361 DBG_BLOCK_ID_TCP9 = 0x69,
362 DBG_BLOCK_ID_TCP10 = 0x6a,
363 DBG_BLOCK_ID_TCP11 = 0x6b,
364 DBG_BLOCK_ID_TCP12 = 0x6c,
365 DBG_BLOCK_ID_TCP13 = 0x6d,
366 DBG_BLOCK_ID_TCP14 = 0x6e,
367 DBG_BLOCK_ID_TCP15 = 0x6f,
368 DBG_BLOCK_ID_TCP16 = 0x70,
369 DBG_BLOCK_ID_TCP17 = 0x71,
370 DBG_BLOCK_ID_TCP18 = 0x72,
371 DBG_BLOCK_ID_TCP19 = 0x73,
372 DBG_BLOCK_ID_TCP20 = 0x74,
373 DBG_BLOCK_ID_TCP21 = 0x75,
374 DBG_BLOCK_ID_TCP22 = 0x76,
375 DBG_BLOCK_ID_TCP23 = 0x77,
376 DBG_BLOCK_ID_TCP_RESERVED0 = 0x78,
377 DBG_BLOCK_ID_TCP_RESERVED1 = 0x79,
378 DBG_BLOCK_ID_TCP_RESERVED2 = 0x7a,
379 DBG_BLOCK_ID_TCP_RESERVED3 = 0x7b,
380 DBG_BLOCK_ID_TCP_RESERVED4 = 0x7c,
381 DBG_BLOCK_ID_TCP_RESERVED5 = 0x7d,
382 DBG_BLOCK_ID_TCP_RESERVED6 = 0x7e,
383 DBG_BLOCK_ID_TCP_RESERVED7 = 0x7f,
384 DBG_BLOCK_ID_DB00 = 0x80,
385 DBG_BLOCK_ID_DB01 = 0x81,
386 DBG_BLOCK_ID_DB02 = 0x82,
387 DBG_BLOCK_ID_DB03 = 0x83,
388 DBG_BLOCK_ID_DB04 = 0x84,
389 DBG_BLOCK_ID_UNUSED27 = 0x85,
390 DBG_BLOCK_ID_UNUSED28 = 0x86,
391 DBG_BLOCK_ID_UNUSED29 = 0x87,
392 DBG_BLOCK_ID_DB10 = 0x88,
393 DBG_BLOCK_ID_DB11 = 0x89,
394 DBG_BLOCK_ID_DB12 = 0x8a,
395 DBG_BLOCK_ID_DB13 = 0x8b,
396 DBG_BLOCK_ID_DB14 = 0x8c,
397 DBG_BLOCK_ID_UNUSED30 = 0x8d,
398 DBG_BLOCK_ID_UNUSED31 = 0x8e,
399 DBG_BLOCK_ID_UNUSED32 = 0x8f,
400 DBG_BLOCK_ID_TCC0 = 0x90,
401 DBG_BLOCK_ID_TCC1 = 0x91,
402 DBG_BLOCK_ID_TCC2 = 0x92,
403 DBG_BLOCK_ID_TCC3 = 0x93,
404 DBG_BLOCK_ID_TCC4 = 0x94,
405 DBG_BLOCK_ID_TCC5 = 0x95,
406 DBG_BLOCK_ID_TCC6 = 0x96,
407 DBG_BLOCK_ID_TCC7 = 0x97,
408 DBG_BLOCK_ID_SPS00 = 0x98,
409 DBG_BLOCK_ID_SPS01 = 0x99,
410 DBG_BLOCK_ID_SPS02 = 0x9a,
411 DBG_BLOCK_ID_SPS10 = 0x9b,
412 DBG_BLOCK_ID_SPS11 = 0x9c,
413 DBG_BLOCK_ID_SPS12 = 0x9d,
414 DBG_BLOCK_ID_UNUSED33 = 0x9e,
415 DBG_BLOCK_ID_UNUSED34 = 0x9f,
416 DBG_BLOCK_ID_TA00 = 0xa0,
417 DBG_BLOCK_ID_TA01 = 0xa1,
418 DBG_BLOCK_ID_TA02 = 0xa2,
419 DBG_BLOCK_ID_TA03 = 0xa3,
420 DBG_BLOCK_ID_TA04 = 0xa4,
421 DBG_BLOCK_ID_TA05 = 0xa5,
422 DBG_BLOCK_ID_TA06 = 0xa6,
423 DBG_BLOCK_ID_TA07 = 0xa7,
424 DBG_BLOCK_ID_TA08 = 0xa8,
425 DBG_BLOCK_ID_TA09 = 0xa9,
426 DBG_BLOCK_ID_TA0A = 0xaa,
427 DBG_BLOCK_ID_TA0B = 0xab,
428 DBG_BLOCK_ID_UNUSED35 = 0xac,
429 DBG_BLOCK_ID_UNUSED36 = 0xad,
430 DBG_BLOCK_ID_UNUSED37 = 0xae,
431 DBG_BLOCK_ID_UNUSED38 = 0xaf,
432 DBG_BLOCK_ID_TA10 = 0xb0,
433 DBG_BLOCK_ID_TA11 = 0xb1,
434 DBG_BLOCK_ID_TA12 = 0xb2,
435 DBG_BLOCK_ID_TA13 = 0xb3,
436 DBG_BLOCK_ID_TA14 = 0xb4,
437 DBG_BLOCK_ID_TA15 = 0xb5,
438 DBG_BLOCK_ID_TA16 = 0xb6,
439 DBG_BLOCK_ID_TA17 = 0xb7,
440 DBG_BLOCK_ID_TA18 = 0xb8,
441 DBG_BLOCK_ID_TA19 = 0xb9,
442 DBG_BLOCK_ID_TA1A = 0xba,
443 DBG_BLOCK_ID_TA1B = 0xbb,
444 DBG_BLOCK_ID_UNUSED39 = 0xbc,
445 DBG_BLOCK_ID_UNUSED40 = 0xbd,
446 DBG_BLOCK_ID_UNUSED41 = 0xbe,
447 DBG_BLOCK_ID_UNUSED42 = 0xbf,
448 DBG_BLOCK_ID_TD00 = 0xc0,
449 DBG_BLOCK_ID_TD01 = 0xc1,
450 DBG_BLOCK_ID_TD02 = 0xc2,
451 DBG_BLOCK_ID_TD03 = 0xc3,
452 DBG_BLOCK_ID_TD04 = 0xc4,
453 DBG_BLOCK_ID_TD05 = 0xc5,
454 DBG_BLOCK_ID_TD06 = 0xc6,
455 DBG_BLOCK_ID_TD07 = 0xc7,
456 DBG_BLOCK_ID_TD08 = 0xc8,
457 DBG_BLOCK_ID_TD09 = 0xc9,
458 DBG_BLOCK_ID_TD0A = 0xca,
459 DBG_BLOCK_ID_TD0B = 0xcb,
460 DBG_BLOCK_ID_UNUSED43 = 0xcc,
461 DBG_BLOCK_ID_UNUSED44 = 0xcd,
462 DBG_BLOCK_ID_UNUSED45 = 0xce,
463 DBG_BLOCK_ID_UNUSED46 = 0xcf,
464 DBG_BLOCK_ID_TD10 = 0xd0,
465 DBG_BLOCK_ID_TD11 = 0xd1,
466 DBG_BLOCK_ID_TD12 = 0xd2,
467 DBG_BLOCK_ID_TD13 = 0xd3,
468 DBG_BLOCK_ID_TD14 = 0xd4,
469 DBG_BLOCK_ID_TD15 = 0xd5,
470 DBG_BLOCK_ID_TD16 = 0xd6,
471 DBG_BLOCK_ID_TD17 = 0xd7,
472 DBG_BLOCK_ID_TD18 = 0xd8,
473 DBG_BLOCK_ID_TD19 = 0xd9,
474 DBG_BLOCK_ID_TD1A = 0xda,
475 DBG_BLOCK_ID_TD1B = 0xdb,
476 DBG_BLOCK_ID_UNUSED47 = 0xdc,
477 DBG_BLOCK_ID_UNUSED48 = 0xdd,
478 DBG_BLOCK_ID_UNUSED49 = 0xde,
479 DBG_BLOCK_ID_UNUSED50 = 0xdf,
480 DBG_BLOCK_ID_MCD0 = 0xe0,
481 DBG_BLOCK_ID_MCD1 = 0xe1,
482 DBG_BLOCK_ID_MCD2 = 0xe2,
483 DBG_BLOCK_ID_MCD3 = 0xe3,
484 DBG_BLOCK_ID_MCD4 = 0xe4,
485 DBG_BLOCK_ID_MCD5 = 0xe5,
486 DBG_BLOCK_ID_UNUSED51 = 0xe6,
487 DBG_BLOCK_ID_UNUSED52 = 0xe7,
488} DebugBlockId_OLD;
489typedef enum DebugBlockId_BY2 {
490 DBG_BLOCK_ID_RESERVED_BY2 = 0x0,
491 DBG_BLOCK_ID_VMC_BY2 = 0x1,
492 DBG_BLOCK_ID_CG_BY2 = 0x2,
493 DBG_BLOCK_ID_GRBM_BY2 = 0x3,
494 DBG_BLOCK_ID_CSC_BY2 = 0x4,
495 DBG_BLOCK_ID_IH_BY2 = 0x5,
496 DBG_BLOCK_ID_SQ_BY2 = 0x6,
497 DBG_BLOCK_ID_GMCON_BY2 = 0x7,
498 DBG_BLOCK_ID_DMA0_BY2 = 0x8,
499 DBG_BLOCK_ID_SPIM_BY2 = 0x9,
500 DBG_BLOCK_ID_SPIS_BY2 = 0xa,
501 DBG_BLOCK_ID_PA0_BY2 = 0xb,
502 DBG_BLOCK_ID_CP0_BY2 = 0xc,
503 DBG_BLOCK_ID_CP2_BY2 = 0xd,
504 DBG_BLOCK_ID_UVDU_BY2 = 0xe,
505 DBG_BLOCK_ID_VCE_BY2 = 0xf,
506 DBG_BLOCK_ID_VGT0_BY2 = 0x10,
507 DBG_BLOCK_ID_IA_BY2 = 0x11,
508 DBG_BLOCK_ID_SCT0_BY2 = 0x12,
509 DBG_BLOCK_ID_SPM0_BY2 = 0x13,
510 DBG_BLOCK_ID_TCAA_BY2 = 0x14,
511 DBG_BLOCK_ID_TCCA_BY2 = 0x15,
512 DBG_BLOCK_ID_MCC0_BY2 = 0x16,
513 DBG_BLOCK_ID_MCC2_BY2 = 0x17,
514 DBG_BLOCK_ID_SX0_BY2 = 0x18,
515 DBG_BLOCK_ID_SX2_BY2 = 0x19,
516 DBG_BLOCK_ID_UNUSED4_BY2 = 0x1a,
517 DBG_BLOCK_ID_UNUSED6_BY2 = 0x1b,
518 DBG_BLOCK_ID_PC0_BY2 = 0x1c,
519 DBG_BLOCK_ID_UNUSED8_BY2 = 0x1d,
520 DBG_BLOCK_ID_UNUSED10_BY2 = 0x1e,
521 DBG_BLOCK_ID_MCB_BY2 = 0x1f,
522 DBG_BLOCK_ID_SCB0_BY2 = 0x20,
523 DBG_BLOCK_ID_UNUSED13_BY2 = 0x21,
524 DBG_BLOCK_ID_SCF0_BY2 = 0x22,
525 DBG_BLOCK_ID_UNUSED15_BY2 = 0x23,
526 DBG_BLOCK_ID_BCI0_BY2 = 0x24,
527 DBG_BLOCK_ID_BCI2_BY2 = 0x25,
528 DBG_BLOCK_ID_UNUSED17_BY2 = 0x26,
529 DBG_BLOCK_ID_UNUSED19_BY2 = 0x27,
530 DBG_BLOCK_ID_CB00_BY2 = 0x28,
531 DBG_BLOCK_ID_CB02_BY2 = 0x29,
532 DBG_BLOCK_ID_CB04_BY2 = 0x2a,
533 DBG_BLOCK_ID_UNUSED22_BY2 = 0x2b,
534 DBG_BLOCK_ID_CB10_BY2 = 0x2c,
535 DBG_BLOCK_ID_CB12_BY2 = 0x2d,
536 DBG_BLOCK_ID_CB14_BY2 = 0x2e,
537 DBG_BLOCK_ID_UNUSED25_BY2 = 0x2f,
538 DBG_BLOCK_ID_TCP0_BY2 = 0x30,
539 DBG_BLOCK_ID_TCP2_BY2 = 0x31,
540 DBG_BLOCK_ID_TCP4_BY2 = 0x32,
541 DBG_BLOCK_ID_TCP6_BY2 = 0x33,
542 DBG_BLOCK_ID_TCP8_BY2 = 0x34,
543 DBG_BLOCK_ID_TCP10_BY2 = 0x35,
544 DBG_BLOCK_ID_TCP12_BY2 = 0x36,
545 DBG_BLOCK_ID_TCP14_BY2 = 0x37,
546 DBG_BLOCK_ID_TCP16_BY2 = 0x38,
547 DBG_BLOCK_ID_TCP18_BY2 = 0x39,
548 DBG_BLOCK_ID_TCP20_BY2 = 0x3a,
549 DBG_BLOCK_ID_TCP22_BY2 = 0x3b,
550 DBG_BLOCK_ID_TCP_RESERVED0_BY2 = 0x3c,
551 DBG_BLOCK_ID_TCP_RESERVED2_BY2 = 0x3d,
552 DBG_BLOCK_ID_TCP_RESERVED4_BY2 = 0x3e,
553 DBG_BLOCK_ID_TCP_RESERVED6_BY2 = 0x3f,
554 DBG_BLOCK_ID_DB00_BY2 = 0x40,
555 DBG_BLOCK_ID_DB02_BY2 = 0x41,
556 DBG_BLOCK_ID_DB04_BY2 = 0x42,
557 DBG_BLOCK_ID_UNUSED28_BY2 = 0x43,
558 DBG_BLOCK_ID_DB10_BY2 = 0x44,
559 DBG_BLOCK_ID_DB12_BY2 = 0x45,
560 DBG_BLOCK_ID_DB14_BY2 = 0x46,
561 DBG_BLOCK_ID_UNUSED31_BY2 = 0x47,
562 DBG_BLOCK_ID_TCC0_BY2 = 0x48,
563 DBG_BLOCK_ID_TCC2_BY2 = 0x49,
564 DBG_BLOCK_ID_TCC4_BY2 = 0x4a,
565 DBG_BLOCK_ID_TCC6_BY2 = 0x4b,
566 DBG_BLOCK_ID_SPS00_BY2 = 0x4c,
567 DBG_BLOCK_ID_SPS02_BY2 = 0x4d,
568 DBG_BLOCK_ID_SPS11_BY2 = 0x4e,
569 DBG_BLOCK_ID_UNUSED33_BY2 = 0x4f,
570 DBG_BLOCK_ID_TA00_BY2 = 0x50,
571 DBG_BLOCK_ID_TA02_BY2 = 0x51,
572 DBG_BLOCK_ID_TA04_BY2 = 0x52,
573 DBG_BLOCK_ID_TA06_BY2 = 0x53,
574 DBG_BLOCK_ID_TA08_BY2 = 0x54,
575 DBG_BLOCK_ID_TA0A_BY2 = 0x55,
576 DBG_BLOCK_ID_UNUSED35_BY2 = 0x56,
577 DBG_BLOCK_ID_UNUSED37_BY2 = 0x57,
578 DBG_BLOCK_ID_TA10_BY2 = 0x58,
579 DBG_BLOCK_ID_TA12_BY2 = 0x59,
580 DBG_BLOCK_ID_TA14_BY2 = 0x5a,
581 DBG_BLOCK_ID_TA16_BY2 = 0x5b,
582 DBG_BLOCK_ID_TA18_BY2 = 0x5c,
583 DBG_BLOCK_ID_TA1A_BY2 = 0x5d,
584 DBG_BLOCK_ID_UNUSED39_BY2 = 0x5e,
585 DBG_BLOCK_ID_UNUSED41_BY2 = 0x5f,
586 DBG_BLOCK_ID_TD00_BY2 = 0x60,
587 DBG_BLOCK_ID_TD02_BY2 = 0x61,
588 DBG_BLOCK_ID_TD04_BY2 = 0x62,
589 DBG_BLOCK_ID_TD06_BY2 = 0x63,
590 DBG_BLOCK_ID_TD08_BY2 = 0x64,
591 DBG_BLOCK_ID_TD0A_BY2 = 0x65,
592 DBG_BLOCK_ID_UNUSED43_BY2 = 0x66,
593 DBG_BLOCK_ID_UNUSED45_BY2 = 0x67,
594 DBG_BLOCK_ID_TD10_BY2 = 0x68,
595 DBG_BLOCK_ID_TD12_BY2 = 0x69,
596 DBG_BLOCK_ID_TD14_BY2 = 0x6a,
597 DBG_BLOCK_ID_TD16_BY2 = 0x6b,
598 DBG_BLOCK_ID_TD18_BY2 = 0x6c,
599 DBG_BLOCK_ID_TD1A_BY2 = 0x6d,
600 DBG_BLOCK_ID_UNUSED47_BY2 = 0x6e,
601 DBG_BLOCK_ID_UNUSED49_BY2 = 0x6f,
602 DBG_BLOCK_ID_MCD0_BY2 = 0x70,
603 DBG_BLOCK_ID_MCD2_BY2 = 0x71,
604 DBG_BLOCK_ID_MCD4_BY2 = 0x72,
605 DBG_BLOCK_ID_UNUSED51_BY2 = 0x73,
606} DebugBlockId_BY2;
607typedef enum DebugBlockId_BY4 {
608 DBG_BLOCK_ID_RESERVED_BY4 = 0x0,
609 DBG_BLOCK_ID_CG_BY4 = 0x1,
610 DBG_BLOCK_ID_CSC_BY4 = 0x2,
611 DBG_BLOCK_ID_SQ_BY4 = 0x3,
612 DBG_BLOCK_ID_DMA0_BY4 = 0x4,
613 DBG_BLOCK_ID_SPIS_BY4 = 0x5,
614 DBG_BLOCK_ID_CP0_BY4 = 0x6,
615 DBG_BLOCK_ID_UVDU_BY4 = 0x7,
616 DBG_BLOCK_ID_VGT0_BY4 = 0x8,
617 DBG_BLOCK_ID_SCT0_BY4 = 0x9,
618 DBG_BLOCK_ID_TCAA_BY4 = 0xa,
619 DBG_BLOCK_ID_MCC0_BY4 = 0xb,
620 DBG_BLOCK_ID_SX0_BY4 = 0xc,
621 DBG_BLOCK_ID_UNUSED4_BY4 = 0xd,
622 DBG_BLOCK_ID_PC0_BY4 = 0xe,
623 DBG_BLOCK_ID_UNUSED10_BY4 = 0xf,
624 DBG_BLOCK_ID_SCB0_BY4 = 0x10,
625 DBG_BLOCK_ID_SCF0_BY4 = 0x11,
626 DBG_BLOCK_ID_BCI0_BY4 = 0x12,
627 DBG_BLOCK_ID_UNUSED17_BY4 = 0x13,
628 DBG_BLOCK_ID_CB00_BY4 = 0x14,
629 DBG_BLOCK_ID_CB04_BY4 = 0x15,
630 DBG_BLOCK_ID_CB10_BY4 = 0x16,
631 DBG_BLOCK_ID_CB14_BY4 = 0x17,
632 DBG_BLOCK_ID_TCP0_BY4 = 0x18,
633 DBG_BLOCK_ID_TCP4_BY4 = 0x19,
634 DBG_BLOCK_ID_TCP8_BY4 = 0x1a,
635 DBG_BLOCK_ID_TCP12_BY4 = 0x1b,
636 DBG_BLOCK_ID_TCP16_BY4 = 0x1c,
637 DBG_BLOCK_ID_TCP20_BY4 = 0x1d,
638 DBG_BLOCK_ID_TCP_RESERVED0_BY4 = 0x1e,
639 DBG_BLOCK_ID_TCP_RESERVED4_BY4 = 0x1f,
640 DBG_BLOCK_ID_DB_BY4 = 0x20,
641 DBG_BLOCK_ID_DB04_BY4 = 0x21,
642 DBG_BLOCK_ID_DB10_BY4 = 0x22,
643 DBG_BLOCK_ID_DB14_BY4 = 0x23,
644 DBG_BLOCK_ID_TCC0_BY4 = 0x24,
645 DBG_BLOCK_ID_TCC4_BY4 = 0x25,
646 DBG_BLOCK_ID_SPS00_BY4 = 0x26,
647 DBG_BLOCK_ID_SPS11_BY4 = 0x27,
648 DBG_BLOCK_ID_TA00_BY4 = 0x28,
649 DBG_BLOCK_ID_TA04_BY4 = 0x29,
650 DBG_BLOCK_ID_TA08_BY4 = 0x2a,
651 DBG_BLOCK_ID_UNUSED35_BY4 = 0x2b,
652 DBG_BLOCK_ID_TA10_BY4 = 0x2c,
653 DBG_BLOCK_ID_TA14_BY4 = 0x2d,
654 DBG_BLOCK_ID_TA18_BY4 = 0x2e,
655 DBG_BLOCK_ID_UNUSED39_BY4 = 0x2f,
656 DBG_BLOCK_ID_TD00_BY4 = 0x30,
657 DBG_BLOCK_ID_TD04_BY4 = 0x31,
658 DBG_BLOCK_ID_TD08_BY4 = 0x32,
659 DBG_BLOCK_ID_UNUSED43_BY4 = 0x33,
660 DBG_BLOCK_ID_TD10_BY4 = 0x34,
661 DBG_BLOCK_ID_TD14_BY4 = 0x35,
662 DBG_BLOCK_ID_TD18_BY4 = 0x36,
663 DBG_BLOCK_ID_UNUSED47_BY4 = 0x37,
664 DBG_BLOCK_ID_MCD0_BY4 = 0x38,
665 DBG_BLOCK_ID_MCD4_BY4 = 0x39,
666} DebugBlockId_BY4;
667typedef enum DebugBlockId_BY8 {
668 DBG_BLOCK_ID_RESERVED_BY8 = 0x0,
669 DBG_BLOCK_ID_CSC_BY8 = 0x1,
670 DBG_BLOCK_ID_DMA0_BY8 = 0x2,
671 DBG_BLOCK_ID_CP0_BY8 = 0x3,
672 DBG_BLOCK_ID_VGT0_BY8 = 0x4,
673 DBG_BLOCK_ID_TCAA_BY8 = 0x5,
674 DBG_BLOCK_ID_SX0_BY8 = 0x6,
675 DBG_BLOCK_ID_PC0_BY8 = 0x7,
676 DBG_BLOCK_ID_SCB0_BY8 = 0x8,
677 DBG_BLOCK_ID_BCI0_BY8 = 0x9,
678 DBG_BLOCK_ID_CB00_BY8 = 0xa,
679 DBG_BLOCK_ID_CB10_BY8 = 0xb,
680 DBG_BLOCK_ID_TCP0_BY8 = 0xc,
681 DBG_BLOCK_ID_TCP8_BY8 = 0xd,
682 DBG_BLOCK_ID_TCP16_BY8 = 0xe,
683 DBG_BLOCK_ID_TCP_RESERVED0_BY8 = 0xf,
684 DBG_BLOCK_ID_DB00_BY8 = 0x10,
685 DBG_BLOCK_ID_DB10_BY8 = 0x11,
686 DBG_BLOCK_ID_TCC0_BY8 = 0x12,
687 DBG_BLOCK_ID_SPS00_BY8 = 0x13,
688 DBG_BLOCK_ID_TA00_BY8 = 0x14,
689 DBG_BLOCK_ID_TA08_BY8 = 0x15,
690 DBG_BLOCK_ID_TA10_BY8 = 0x16,
691 DBG_BLOCK_ID_TA18_BY8 = 0x17,
692 DBG_BLOCK_ID_TD00_BY8 = 0x18,
693 DBG_BLOCK_ID_TD08_BY8 = 0x19,
694 DBG_BLOCK_ID_TD10_BY8 = 0x1a,
695 DBG_BLOCK_ID_TD18_BY8 = 0x1b,
696 DBG_BLOCK_ID_MCD0_BY8 = 0x1c,
697} DebugBlockId_BY8;
698typedef enum DebugBlockId_BY16 {
699 DBG_BLOCK_ID_RESERVED_BY16 = 0x0,
700 DBG_BLOCK_ID_DMA0_BY16 = 0x1,
701 DBG_BLOCK_ID_VGT0_BY16 = 0x2,
702 DBG_BLOCK_ID_SX0_BY16 = 0x3,
703 DBG_BLOCK_ID_SCB0_BY16 = 0x4,
704 DBG_BLOCK_ID_CB00_BY16 = 0x5,
705 DBG_BLOCK_ID_TCP0_BY16 = 0x6,
706 DBG_BLOCK_ID_TCP16_BY16 = 0x7,
707 DBG_BLOCK_ID_DB00_BY16 = 0x8,
708 DBG_BLOCK_ID_TCC0_BY16 = 0x9,
709 DBG_BLOCK_ID_TA00_BY16 = 0xa,
710 DBG_BLOCK_ID_TA10_BY16 = 0xb,
711 DBG_BLOCK_ID_TD00_BY16 = 0xc,
712 DBG_BLOCK_ID_TD10_BY16 = 0xd,
713 DBG_BLOCK_ID_MCD0_BY16 = 0xe,
714} DebugBlockId_BY16;
715typedef enum CompareRef {
716 REF_NEVER = 0x0,
717 REF_LESS = 0x1,
718 REF_EQUAL = 0x2,
719 REF_LEQUAL = 0x3,
720 REF_GREATER = 0x4,
721 REF_NOTEQUAL = 0x5,
722 REF_GEQUAL = 0x6,
723 REF_ALWAYS = 0x7,
724} CompareRef;
725typedef enum ReadSize {
726 READ_256_BITS = 0x0,
727 READ_512_BITS = 0x1,
728} ReadSize;
729typedef enum DepthFormat {
730 DEPTH_INVALID = 0x0,
731 DEPTH_16 = 0x1,
732 DEPTH_X8_24 = 0x2,
733 DEPTH_8_24 = 0x3,
734 DEPTH_X8_24_FLOAT = 0x4,
735 DEPTH_8_24_FLOAT = 0x5,
736 DEPTH_32_FLOAT = 0x6,
737 DEPTH_X24_8_32_FLOAT = 0x7,
738} DepthFormat;
739typedef enum ZFormat {
740 Z_INVALID = 0x0,
741 Z_16 = 0x1,
742 Z_24 = 0x2,
743 Z_32_FLOAT = 0x3,
744} ZFormat;
745typedef enum StencilFormat {
746 STENCIL_INVALID = 0x0,
747 STENCIL_8 = 0x1,
748} StencilFormat;
749typedef enum CmaskMode {
750 CMASK_CLEAR_NONE = 0x0,
751 CMASK_CLEAR_ONE = 0x1,
752 CMASK_CLEAR_ALL = 0x2,
753 CMASK_ANY_EXPANDED = 0x3,
754 CMASK_ALPHA0_FRAG1 = 0x4,
755 CMASK_ALPHA0_FRAG2 = 0x5,
756 CMASK_ALPHA0_FRAG4 = 0x6,
757 CMASK_ALPHA0_FRAGS = 0x7,
758 CMASK_ALPHA1_FRAG1 = 0x8,
759 CMASK_ALPHA1_FRAG2 = 0x9,
760 CMASK_ALPHA1_FRAG4 = 0xa,
761 CMASK_ALPHA1_FRAGS = 0xb,
762 CMASK_ALPHAX_FRAG1 = 0xc,
763 CMASK_ALPHAX_FRAG2 = 0xd,
764 CMASK_ALPHAX_FRAG4 = 0xe,
765 CMASK_ALPHAX_FRAGS = 0xf,
766} CmaskMode;
767typedef enum QuadExportFormat {
768 EXPORT_UNUSED = 0x0,
769 EXPORT_32_R = 0x1,
770 EXPORT_32_GR = 0x2,
771 EXPORT_32_AR = 0x3,
772 EXPORT_FP16_ABGR = 0x4,
773 EXPORT_UNSIGNED16_ABGR = 0x5,
774 EXPORT_SIGNED16_ABGR = 0x6,
775 EXPORT_32_ABGR = 0x7,
776} QuadExportFormat;
777typedef enum QuadExportFormatOld {
778 EXPORT_4P_32BPC_ABGR = 0x0,
779 EXPORT_4P_16BPC_ABGR = 0x1,
780 EXPORT_4P_32BPC_GR = 0x2,
781 EXPORT_4P_32BPC_AR = 0x3,
782 EXPORT_2P_32BPC_ABGR = 0x4,
783 EXPORT_8P_32BPC_R = 0x5,
784} QuadExportFormatOld;
785typedef enum ColorFormat {
786 COLOR_INVALID = 0x0,
787 COLOR_8 = 0x1,
788 COLOR_16 = 0x2,
789 COLOR_8_8 = 0x3,
790 COLOR_32 = 0x4,
791 COLOR_16_16 = 0x5,
792 COLOR_10_11_11 = 0x6,
793 COLOR_11_11_10 = 0x7,
794 COLOR_10_10_10_2 = 0x8,
795 COLOR_2_10_10_10 = 0x9,
796 COLOR_8_8_8_8 = 0xa,
797 COLOR_32_32 = 0xb,
798 COLOR_16_16_16_16 = 0xc,
799 COLOR_RESERVED_13 = 0xd,
800 COLOR_32_32_32_32 = 0xe,
801 COLOR_RESERVED_15 = 0xf,
802 COLOR_5_6_5 = 0x10,
803 COLOR_1_5_5_5 = 0x11,
804 COLOR_5_5_5_1 = 0x12,
805 COLOR_4_4_4_4 = 0x13,
806 COLOR_8_24 = 0x14,
807 COLOR_24_8 = 0x15,
808 COLOR_X24_8_32_FLOAT = 0x16,
809 COLOR_RESERVED_23 = 0x17,
810} ColorFormat;
811typedef enum SurfaceFormat {
812 FMT_INVALID = 0x0,
813 FMT_8 = 0x1,
814 FMT_16 = 0x2,
815 FMT_8_8 = 0x3,
816 FMT_32 = 0x4,
817 FMT_16_16 = 0x5,
818 FMT_10_11_11 = 0x6,
819 FMT_11_11_10 = 0x7,
820 FMT_10_10_10_2 = 0x8,
821 FMT_2_10_10_10 = 0x9,
822 FMT_8_8_8_8 = 0xa,
823 FMT_32_32 = 0xb,
824 FMT_16_16_16_16 = 0xc,
825 FMT_32_32_32 = 0xd,
826 FMT_32_32_32_32 = 0xe,
827 FMT_RESERVED_4 = 0xf,
828 FMT_5_6_5 = 0x10,
829 FMT_1_5_5_5 = 0x11,
830 FMT_5_5_5_1 = 0x12,
831 FMT_4_4_4_4 = 0x13,
832 FMT_8_24 = 0x14,
833 FMT_24_8 = 0x15,
834 FMT_X24_8_32_FLOAT = 0x16,
835 FMT_RESERVED_33 = 0x17,
836 FMT_11_11_10_FLOAT = 0x18,
837 FMT_16_FLOAT = 0x19,
838 FMT_32_FLOAT = 0x1a,
839 FMT_16_16_FLOAT = 0x1b,
840 FMT_8_24_FLOAT = 0x1c,
841 FMT_24_8_FLOAT = 0x1d,
842 FMT_32_32_FLOAT = 0x1e,
843 FMT_10_11_11_FLOAT = 0x1f,
844 FMT_16_16_16_16_FLOAT = 0x20,
845 FMT_3_3_2 = 0x21,
846 FMT_6_5_5 = 0x22,
847 FMT_32_32_32_32_FLOAT = 0x23,
848 FMT_RESERVED_36 = 0x24,
849 FMT_1 = 0x25,
850 FMT_1_REVERSED = 0x26,
851 FMT_GB_GR = 0x27,
852 FMT_BG_RG = 0x28,
853 FMT_32_AS_8 = 0x29,
854 FMT_32_AS_8_8 = 0x2a,
855 FMT_5_9_9_9_SHAREDEXP = 0x2b,
856 FMT_8_8_8 = 0x2c,
857 FMT_16_16_16 = 0x2d,
858 FMT_16_16_16_FLOAT = 0x2e,
859 FMT_4_4 = 0x2f,
860 FMT_32_32_32_FLOAT = 0x30,
861 FMT_BC1 = 0x31,
862 FMT_BC2 = 0x32,
863 FMT_BC3 = 0x33,
864 FMT_BC4 = 0x34,
865 FMT_BC5 = 0x35,
866 FMT_BC6 = 0x36,
867 FMT_BC7 = 0x37,
868 FMT_32_AS_32_32_32_32 = 0x38,
869 FMT_APC3 = 0x39,
870 FMT_APC4 = 0x3a,
871 FMT_APC5 = 0x3b,
872 FMT_APC6 = 0x3c,
873 FMT_APC7 = 0x3d,
874 FMT_CTX1 = 0x3e,
875 FMT_RESERVED_63 = 0x3f,
876} SurfaceFormat;
877typedef enum BUF_DATA_FORMAT {
878 BUF_DATA_FORMAT_INVALID = 0x0,
879 BUF_DATA_FORMAT_8 = 0x1,
880 BUF_DATA_FORMAT_16 = 0x2,
881 BUF_DATA_FORMAT_8_8 = 0x3,
882 BUF_DATA_FORMAT_32 = 0x4,
883 BUF_DATA_FORMAT_16_16 = 0x5,
884 BUF_DATA_FORMAT_10_11_11 = 0x6,
885 BUF_DATA_FORMAT_11_11_10 = 0x7,
886 BUF_DATA_FORMAT_10_10_10_2 = 0x8,
887 BUF_DATA_FORMAT_2_10_10_10 = 0x9,
888 BUF_DATA_FORMAT_8_8_8_8 = 0xa,
889 BUF_DATA_FORMAT_32_32 = 0xb,
890 BUF_DATA_FORMAT_16_16_16_16 = 0xc,
891 BUF_DATA_FORMAT_32_32_32 = 0xd,
892 BUF_DATA_FORMAT_32_32_32_32 = 0xe,
893 BUF_DATA_FORMAT_RESERVED_15 = 0xf,
894} BUF_DATA_FORMAT;
895typedef enum IMG_DATA_FORMAT {
896 IMG_DATA_FORMAT_INVALID = 0x0,
897 IMG_DATA_FORMAT_8 = 0x1,
898 IMG_DATA_FORMAT_16 = 0x2,
899 IMG_DATA_FORMAT_8_8 = 0x3,
900 IMG_DATA_FORMAT_32 = 0x4,
901 IMG_DATA_FORMAT_16_16 = 0x5,
902 IMG_DATA_FORMAT_10_11_11 = 0x6,
903 IMG_DATA_FORMAT_11_11_10 = 0x7,
904 IMG_DATA_FORMAT_10_10_10_2 = 0x8,
905 IMG_DATA_FORMAT_2_10_10_10 = 0x9,
906 IMG_DATA_FORMAT_8_8_8_8 = 0xa,
907 IMG_DATA_FORMAT_32_32 = 0xb,
908 IMG_DATA_FORMAT_16_16_16_16 = 0xc,
909 IMG_DATA_FORMAT_32_32_32 = 0xd,
910 IMG_DATA_FORMAT_32_32_32_32 = 0xe,
911 IMG_DATA_FORMAT_RESERVED_15 = 0xf,
912 IMG_DATA_FORMAT_5_6_5 = 0x10,
913 IMG_DATA_FORMAT_1_5_5_5 = 0x11,
914 IMG_DATA_FORMAT_5_5_5_1 = 0x12,
915 IMG_DATA_FORMAT_4_4_4_4 = 0x13,
916 IMG_DATA_FORMAT_8_24 = 0x14,
917 IMG_DATA_FORMAT_24_8 = 0x15,
918 IMG_DATA_FORMAT_X24_8_32 = 0x16,
919 IMG_DATA_FORMAT_RESERVED_23 = 0x17,
920 IMG_DATA_FORMAT_RESERVED_24 = 0x18,
921 IMG_DATA_FORMAT_RESERVED_25 = 0x19,
922 IMG_DATA_FORMAT_RESERVED_26 = 0x1a,
923 IMG_DATA_FORMAT_RESERVED_27 = 0x1b,
924 IMG_DATA_FORMAT_RESERVED_28 = 0x1c,
925 IMG_DATA_FORMAT_RESERVED_29 = 0x1d,
926 IMG_DATA_FORMAT_RESERVED_30 = 0x1e,
927 IMG_DATA_FORMAT_RESERVED_31 = 0x1f,
928 IMG_DATA_FORMAT_GB_GR = 0x20,
929 IMG_DATA_FORMAT_BG_RG = 0x21,
930 IMG_DATA_FORMAT_5_9_9_9 = 0x22,
931 IMG_DATA_FORMAT_BC1 = 0x23,
932 IMG_DATA_FORMAT_BC2 = 0x24,
933 IMG_DATA_FORMAT_BC3 = 0x25,
934 IMG_DATA_FORMAT_BC4 = 0x26,
935 IMG_DATA_FORMAT_BC5 = 0x27,
936 IMG_DATA_FORMAT_BC6 = 0x28,
937 IMG_DATA_FORMAT_BC7 = 0x29,
938 IMG_DATA_FORMAT_RESERVED_42 = 0x2a,
939 IMG_DATA_FORMAT_RESERVED_43 = 0x2b,
940 IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c,
941 IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d,
942 IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e,
943 IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f,
944 IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30,
945 IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31,
946 IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32,
947 IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33,
948 IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34,
949 IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35,
950 IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36,
951 IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37,
952 IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38,
953 IMG_DATA_FORMAT_4_4 = 0x39,
954 IMG_DATA_FORMAT_6_5_5 = 0x3a,
955 IMG_DATA_FORMAT_1 = 0x3b,
956 IMG_DATA_FORMAT_1_REVERSED = 0x3c,
957 IMG_DATA_FORMAT_32_AS_8 = 0x3d,
958 IMG_DATA_FORMAT_32_AS_8_8 = 0x3e,
959 IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f,
960} IMG_DATA_FORMAT;
961typedef enum BUF_NUM_FORMAT {
962 BUF_NUM_FORMAT_UNORM = 0x0,
963 BUF_NUM_FORMAT_SNORM = 0x1,
964 BUF_NUM_FORMAT_USCALED = 0x2,
965 BUF_NUM_FORMAT_SSCALED = 0x3,
966 BUF_NUM_FORMAT_UINT = 0x4,
967 BUF_NUM_FORMAT_SINT = 0x5,
968 BUF_NUM_FORMAT_SNORM_OGL = 0x6,
969 BUF_NUM_FORMAT_FLOAT = 0x7,
970} BUF_NUM_FORMAT;
971typedef enum IMG_NUM_FORMAT {
972 IMG_NUM_FORMAT_UNORM = 0x0,
973 IMG_NUM_FORMAT_SNORM = 0x1,
974 IMG_NUM_FORMAT_USCALED = 0x2,
975 IMG_NUM_FORMAT_SSCALED = 0x3,
976 IMG_NUM_FORMAT_UINT = 0x4,
977 IMG_NUM_FORMAT_SINT = 0x5,
978 IMG_NUM_FORMAT_SNORM_OGL = 0x6,
979 IMG_NUM_FORMAT_FLOAT = 0x7,
980 IMG_NUM_FORMAT_RESERVED_8 = 0x8,
981 IMG_NUM_FORMAT_SRGB = 0x9,
982 IMG_NUM_FORMAT_UBNORM = 0xa,
983 IMG_NUM_FORMAT_UBNORM_OGL = 0xb,
984 IMG_NUM_FORMAT_UBINT = 0xc,
985 IMG_NUM_FORMAT_UBSCALED = 0xd,
986 IMG_NUM_FORMAT_RESERVED_14 = 0xe,
987 IMG_NUM_FORMAT_RESERVED_15 = 0xf,
988} IMG_NUM_FORMAT;
989typedef enum TileType {
990 ARRAY_COLOR_TILE = 0x0,
991 ARRAY_DEPTH_TILE = 0x1,
992} TileType;
993typedef enum NonDispTilingOrder {
994 ADDR_SURF_MICRO_TILING_DISPLAY = 0x0,
995 ADDR_SURF_MICRO_TILING_NON_DISPLAY = 0x1,
996} NonDispTilingOrder;
997typedef enum MicroTileMode {
998 ADDR_SURF_DISPLAY_MICRO_TILING = 0x0,
999 ADDR_SURF_THIN_MICRO_TILING = 0x1,
1000 ADDR_SURF_DEPTH_MICRO_TILING = 0x2,
1001 ADDR_SURF_ROTATED_MICRO_TILING = 0x3,
1002 ADDR_SURF_THICK_MICRO_TILING = 0x4,
1003} MicroTileMode;
1004typedef enum TileSplit {
1005 ADDR_SURF_TILE_SPLIT_64B = 0x0,
1006 ADDR_SURF_TILE_SPLIT_128B = 0x1,
1007 ADDR_SURF_TILE_SPLIT_256B = 0x2,
1008 ADDR_SURF_TILE_SPLIT_512B = 0x3,
1009 ADDR_SURF_TILE_SPLIT_1KB = 0x4,
1010 ADDR_SURF_TILE_SPLIT_2KB = 0x5,
1011 ADDR_SURF_TILE_SPLIT_4KB = 0x6,
1012} TileSplit;
1013typedef enum SampleSplit {
1014 ADDR_SURF_SAMPLE_SPLIT_1 = 0x0,
1015 ADDR_SURF_SAMPLE_SPLIT_2 = 0x1,
1016 ADDR_SURF_SAMPLE_SPLIT_4 = 0x2,
1017 ADDR_SURF_SAMPLE_SPLIT_8 = 0x3,
1018} SampleSplit;
1019typedef enum PipeConfig {
1020 ADDR_SURF_P2 = 0x0,
1021 ADDR_SURF_P2_RESERVED0 = 0x1,
1022 ADDR_SURF_P2_RESERVED1 = 0x2,
1023 ADDR_SURF_P2_RESERVED2 = 0x3,
1024 ADDR_SURF_P4_8x16 = 0x4,
1025 ADDR_SURF_P4_16x16 = 0x5,
1026 ADDR_SURF_P4_16x32 = 0x6,
1027 ADDR_SURF_P4_32x32 = 0x7,
1028 ADDR_SURF_P8_16x16_8x16 = 0x8,
1029 ADDR_SURF_P8_16x32_8x16 = 0x9,
1030 ADDR_SURF_P8_32x32_8x16 = 0xa,
1031 ADDR_SURF_P8_16x32_16x16 = 0xb,
1032 ADDR_SURF_P8_32x32_16x16 = 0xc,
1033 ADDR_SURF_P8_32x32_16x32 = 0xd,
1034 ADDR_SURF_P8_32x64_32x32 = 0xe,
1035} PipeConfig;
1036typedef enum NumBanks {
1037 ADDR_SURF_2_BANK = 0x0,
1038 ADDR_SURF_4_BANK = 0x1,
1039 ADDR_SURF_8_BANK = 0x2,
1040 ADDR_SURF_16_BANK = 0x3,
1041} NumBanks;
1042typedef enum BankWidth {
1043 ADDR_SURF_BANK_WIDTH_1 = 0x0,
1044 ADDR_SURF_BANK_WIDTH_2 = 0x1,
1045 ADDR_SURF_BANK_WIDTH_4 = 0x2,
1046 ADDR_SURF_BANK_WIDTH_8 = 0x3,
1047} BankWidth;
1048typedef enum BankHeight {
1049 ADDR_SURF_BANK_HEIGHT_1 = 0x0,
1050 ADDR_SURF_BANK_HEIGHT_2 = 0x1,
1051 ADDR_SURF_BANK_HEIGHT_4 = 0x2,
1052 ADDR_SURF_BANK_HEIGHT_8 = 0x3,
1053} BankHeight;
1054typedef enum BankWidthHeight {
1055 ADDR_SURF_BANK_WH_1 = 0x0,
1056 ADDR_SURF_BANK_WH_2 = 0x1,
1057 ADDR_SURF_BANK_WH_4 = 0x2,
1058 ADDR_SURF_BANK_WH_8 = 0x3,
1059} BankWidthHeight;
1060typedef enum MacroTileAspect {
1061 ADDR_SURF_MACRO_ASPECT_1 = 0x0,
1062 ADDR_SURF_MACRO_ASPECT_2 = 0x1,
1063 ADDR_SURF_MACRO_ASPECT_4 = 0x2,
1064 ADDR_SURF_MACRO_ASPECT_8 = 0x3,
1065} MacroTileAspect;
1066typedef enum TCC_CACHE_POLICIES {
1067 TCC_CACHE_POLICY_LRU = 0x0,
1068 TCC_CACHE_POLICY_STREAM = 0x1,
1069 TCC_CACHE_POLICY_BYPASS = 0x2,
1070} TCC_CACHE_POLICIES;
1071typedef enum PERFMON_COUNTER_MODE {
1072 PERFMON_COUNTER_MODE_ACCUM = 0x0,
1073 PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x1,
1074 PERFMON_COUNTER_MODE_MAX = 0x2,
1075 PERFMON_COUNTER_MODE_DIRTY = 0x3,
1076 PERFMON_COUNTER_MODE_SAMPLE = 0x4,
1077 PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x5,
1078 PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x6,
1079 PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x7,
1080 PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x8,
1081 PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x9,
1082 PERFMON_COUNTER_MODE_RESERVED = 0xf,
1083} PERFMON_COUNTER_MODE;
1084typedef enum PERFMON_SPM_MODE {
1085 PERFMON_SPM_MODE_OFF = 0x0,
1086 PERFMON_SPM_MODE_16BIT_CLAMP = 0x1,
1087 PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x2,
1088 PERFMON_SPM_MODE_32BIT_CLAMP = 0x3,
1089 PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x4,
1090 PERFMON_SPM_MODE_RESERVED_5 = 0x5,
1091 PERFMON_SPM_MODE_RESERVED_6 = 0x6,
1092 PERFMON_SPM_MODE_RESERVED_7 = 0x7,
1093 PERFMON_SPM_MODE_TEST_MODE_0 = 0x8,
1094 PERFMON_SPM_MODE_TEST_MODE_1 = 0x9,
1095 PERFMON_SPM_MODE_TEST_MODE_2 = 0xa,
1096} PERFMON_SPM_MODE;
1097typedef enum SurfaceTiling {
1098 ARRAY_LINEAR = 0x0,
1099 ARRAY_TILED = 0x1,
1100} SurfaceTiling;
1101typedef enum SurfaceArray {
1102 ARRAY_1D = 0x0,
1103 ARRAY_2D = 0x1,
1104 ARRAY_3D = 0x2,
1105 ARRAY_3D_SLICE = 0x3,
1106} SurfaceArray;
1107typedef enum ColorArray {
1108 ARRAY_2D_ALT_COLOR = 0x0,
1109 ARRAY_2D_COLOR = 0x1,
1110 ARRAY_3D_SLICE_COLOR = 0x3,
1111} ColorArray;
1112typedef enum DepthArray {
1113 ARRAY_2D_ALT_DEPTH = 0x0,
1114 ARRAY_2D_DEPTH = 0x1,
1115} DepthArray;
1116
1117#endif /* DCE_8_0_ENUM_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
index 8a2930734477..c331c9fe7b81 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
@@ -4130,6 +4130,18 @@
4130#define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe 4130#define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe
4131#define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000 4131#define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000
4132#define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10 4132#define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10
4133#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK_MASK 0x1
4134#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK__SHIFT 0x0
4135#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS_MASK 0x2
4136#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS__SHIFT 0x1
4137#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV_MASK 0x4
4138#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV__SHIFT 0x2
4139#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK_MASK 0x10
4140#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK__SHIFT 0x4
4141#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS_MASK 0x20
4142#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS__SHIFT 0x5
4143#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV_MASK 0x40
4144#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV__SHIFT 0x6
4133#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1 4145#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1
4134#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0 4146#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0
4135#define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2 4147#define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
new file mode 100644
index 000000000000..d21c6b14662f
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
@@ -0,0 +1,102 @@
1/*
2 * Volcanic Islands IV SRC Register documentation
3 *
4 * Copyright (C) 2015 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24#ifndef _IVSRCID_VISLANDS30_H_
25#define _IVSRCID_VISLANDS30_H_
26
27
28// IV Source IDs
29
30#define VISLANDS30_IV_SRCID_D1_V_UPDATE_INT 7 // 0x07
31#define VISLANDS30_IV_EXTID_D1_V_UPDATE_INT 0
32
33#define VISLANDS30_IV_SRCID_D1_GRPH_PFLIP 8 // 0x08
34#define VISLANDS30_IV_EXTID_D1_GRPH_PFLIP 0
35
36#define VISLANDS30_IV_SRCID_D2_V_UPDATE_INT 9 // 0x09
37#define VISLANDS30_IV_EXTID_D2_V_UPDATE_INT 0
38
39#define VISLANDS30_IV_SRCID_D2_GRPH_PFLIP 10 // 0x0a
40#define VISLANDS30_IV_EXTID_D2_GRPH_PFLIP 0
41
42#define VISLANDS30_IV_SRCID_D3_V_UPDATE_INT 11 // 0x0b
43#define VISLANDS30_IV_EXTID_D3_V_UPDATE_INT 0
44
45#define VISLANDS30_IV_SRCID_D3_GRPH_PFLIP 12 // 0x0c
46#define VISLANDS30_IV_EXTID_D3_GRPH_PFLIP 0
47
48#define VISLANDS30_IV_SRCID_D4_V_UPDATE_INT 13 // 0x0d
49#define VISLANDS30_IV_EXTID_D4_V_UPDATE_INT 0
50
51#define VISLANDS30_IV_SRCID_D4_GRPH_PFLIP 14 // 0x0e
52#define VISLANDS30_IV_EXTID_D4_GRPH_PFLIP 0
53
54#define VISLANDS30_IV_SRCID_D5_V_UPDATE_INT 15 // 0x0f
55#define VISLANDS30_IV_EXTID_D5_V_UPDATE_INT 0
56
57#define VISLANDS30_IV_SRCID_D5_GRPH_PFLIP 16 // 0x10
58#define VISLANDS30_IV_EXTID_D5_GRPH_PFLIP 0
59
60#define VISLANDS30_IV_SRCID_D6_V_UPDATE_INT 17 // 0x11
61#define VISLANDS30_IV_EXTID_D6_V_UPDATE_INT 0
62
63#define VISLANDS30_IV_SRCID_D6_GRPH_PFLIP 18 // 0x12
64#define VISLANDS30_IV_EXTID_D6_GRPH_PFLIP 0
65
66#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A 42 // 0x2a
67#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_A 0
68
69#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_B 42 // 0x2a
70#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_B 1
71
72#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_C 42 // 0x2a
73#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_C 2
74
75#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_D 42 // 0x2a
76#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_D 3
77
78#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_E 42 // 0x2a
79#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_E 4
80
81#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_F 42 // 0x2a
82#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_F 5
83
84#define VISLANDS30_IV_SRCID_HPD_RX_A 42 // 0x2a
85#define VISLANDS30_IV_EXTID_HPD_RX_A 6
86
87#define VISLANDS30_IV_SRCID_HPD_RX_B 42 // 0x2a
88#define VISLANDS30_IV_EXTID_HPD_RX_B 7
89
90#define VISLANDS30_IV_SRCID_HPD_RX_C 42 // 0x2a
91#define VISLANDS30_IV_EXTID_HPD_RX_C 8
92
93#define VISLANDS30_IV_SRCID_HPD_RX_D 42 // 0x2a
94#define VISLANDS30_IV_EXTID_HPD_RX_D 9
95
96#define VISLANDS30_IV_SRCID_HPD_RX_E 42 // 0x2a
97#define VISLANDS30_IV_EXTID_HPD_RX_E 10
98
99#define VISLANDS30_IV_SRCID_HPD_RX_F 42 // 0x2a
100#define VISLANDS30_IV_EXTID_HPD_RX_F 11
101
102#endif // _IVSRCID_VISLANDS30_H_
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index aa67244a77ae..2ee4190f8c89 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -29,6 +29,7 @@
29#include "pp_instance.h" 29#include "pp_instance.h"
30#include "power_state.h" 30#include "power_state.h"
31#include "eventmanager.h" 31#include "eventmanager.h"
32#include "pp_debug.h"
32 33
33#define PP_CHECK(handle) \ 34#define PP_CHECK(handle) \
34 do { \ 35 do { \
@@ -433,7 +434,10 @@ enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
433 case PP_StateUILabel_Performance: 434 case PP_StateUILabel_Performance:
434 return POWER_STATE_TYPE_PERFORMANCE; 435 return POWER_STATE_TYPE_PERFORMANCE;
435 default: 436 default:
436 return POWER_STATE_TYPE_DEFAULT; 437 if (state->classification.flags & PP_StateClassificationFlag_Boot)
438 return POWER_STATE_TYPE_INTERNAL_BOOT;
439 else
440 return POWER_STATE_TYPE_DEFAULT;
437 } 441 }
438} 442}
439 443
@@ -535,6 +539,112 @@ static int pp_dpm_get_temperature(void *handle)
535 return hwmgr->hwmgr_func->get_temperature(hwmgr); 539 return hwmgr->hwmgr_func->get_temperature(hwmgr);
536} 540}
537 541
542static int pp_dpm_get_pp_num_states(void *handle,
543 struct pp_states_info *data)
544{
545 struct pp_hwmgr *hwmgr;
546 int i;
547
548 if (!handle)
549 return -EINVAL;
550
551 hwmgr = ((struct pp_instance *)handle)->hwmgr;
552
553 if (hwmgr == NULL || hwmgr->ps == NULL)
554 return -EINVAL;
555
556 data->nums = hwmgr->num_ps;
557
558 for (i = 0; i < hwmgr->num_ps; i++) {
559 struct pp_power_state *state = (struct pp_power_state *)
560 ((unsigned long)hwmgr->ps + i * hwmgr->ps_size);
561 switch (state->classification.ui_label) {
562 case PP_StateUILabel_Battery:
563 data->states[i] = POWER_STATE_TYPE_BATTERY;
564 break;
565 case PP_StateUILabel_Balanced:
566 data->states[i] = POWER_STATE_TYPE_BALANCED;
567 break;
568 case PP_StateUILabel_Performance:
569 data->states[i] = POWER_STATE_TYPE_PERFORMANCE;
570 break;
571 default:
572 if (state->classification.flags & PP_StateClassificationFlag_Boot)
573 data->states[i] = POWER_STATE_TYPE_INTERNAL_BOOT;
574 else
575 data->states[i] = POWER_STATE_TYPE_DEFAULT;
576 }
577 }
578
579 return 0;
580}
581
582static int pp_dpm_get_pp_table(void *handle, char **table)
583{
584 struct pp_hwmgr *hwmgr;
585
586 if (!handle)
587 return -EINVAL;
588
589 hwmgr = ((struct pp_instance *)handle)->hwmgr;
590
591 if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
592 hwmgr->hwmgr_func->get_pp_table == NULL)
593 return -EINVAL;
594
595 return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
596}
597
598static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
599{
600 struct pp_hwmgr *hwmgr;
601
602 if (!handle)
603 return -EINVAL;
604
605 hwmgr = ((struct pp_instance *)handle)->hwmgr;
606
607 if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
608 hwmgr->hwmgr_func->set_pp_table == NULL)
609 return -EINVAL;
610
611 return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
612}
613
614static int pp_dpm_force_clock_level(void *handle,
615 enum pp_clock_type type, int level)
616{
617 struct pp_hwmgr *hwmgr;
618
619 if (!handle)
620 return -EINVAL;
621
622 hwmgr = ((struct pp_instance *)handle)->hwmgr;
623
624 if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
625 hwmgr->hwmgr_func->force_clock_level == NULL)
626 return -EINVAL;
627
628 return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, level);
629}
630
631static int pp_dpm_print_clock_levels(void *handle,
632 enum pp_clock_type type, char *buf)
633{
634 struct pp_hwmgr *hwmgr;
635
636 if (!handle)
637 return -EINVAL;
638
639 hwmgr = ((struct pp_instance *)handle)->hwmgr;
640
641 if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
642 hwmgr->hwmgr_func->print_clock_levels == NULL)
643 return -EINVAL;
644
645 return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
646}
647
538const struct amd_powerplay_funcs pp_dpm_funcs = { 648const struct amd_powerplay_funcs pp_dpm_funcs = {
539 .get_temperature = pp_dpm_get_temperature, 649 .get_temperature = pp_dpm_get_temperature,
540 .load_firmware = pp_dpm_load_fw, 650 .load_firmware = pp_dpm_load_fw,
@@ -552,6 +662,11 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
552 .get_fan_control_mode = pp_dpm_get_fan_control_mode, 662 .get_fan_control_mode = pp_dpm_get_fan_control_mode,
553 .set_fan_speed_percent = pp_dpm_set_fan_speed_percent, 663 .set_fan_speed_percent = pp_dpm_set_fan_speed_percent,
554 .get_fan_speed_percent = pp_dpm_get_fan_speed_percent, 664 .get_fan_speed_percent = pp_dpm_get_fan_speed_percent,
665 .get_pp_num_states = pp_dpm_get_pp_num_states,
666 .get_pp_table = pp_dpm_get_pp_table,
667 .set_pp_table = pp_dpm_set_pp_table,
668 .force_clock_level = pp_dpm_force_clock_level,
669 .print_clock_levels = pp_dpm_print_clock_levels,
555}; 670};
556 671
557static int amd_pp_instance_init(struct amd_pp_init *pp_init, 672static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -635,10 +750,10 @@ int amd_powerplay_fini(void *handle)
635 750
636/* export this function to DAL */ 751/* export this function to DAL */
637 752
638int amd_powerplay_display_configuration_change(void *handle, const void *input) 753int amd_powerplay_display_configuration_change(void *handle,
754 const struct amd_pp_display_configuration *display_config)
639{ 755{
640 struct pp_hwmgr *hwmgr; 756 struct pp_hwmgr *hwmgr;
641 const struct amd_pp_display_configuration *display_config = input;
642 757
643 PP_CHECK((struct pp_instance *)handle); 758 PP_CHECK((struct pp_instance *)handle);
644 759
@@ -650,7 +765,7 @@ int amd_powerplay_display_configuration_change(void *handle, const void *input)
650} 765}
651 766
652int amd_powerplay_get_display_power_level(void *handle, 767int amd_powerplay_get_display_power_level(void *handle,
653 struct amd_pp_dal_clock_info *output) 768 struct amd_pp_simple_clock_info *output)
654{ 769{
655 struct pp_hwmgr *hwmgr; 770 struct pp_hwmgr *hwmgr;
656 771
@@ -663,3 +778,86 @@ int amd_powerplay_get_display_power_level(void *handle,
663 778
664 return phm_get_dal_power_level(hwmgr, output); 779 return phm_get_dal_power_level(hwmgr, output);
665} 780}
781
782int amd_powerplay_get_current_clocks(void *handle,
783 struct amd_pp_clock_info *clocks)
784{
785 struct pp_hwmgr *hwmgr;
786 struct amd_pp_simple_clock_info simple_clocks;
787 struct pp_clock_info hw_clocks;
788
789 PP_CHECK((struct pp_instance *)handle);
790
791 if (clocks == NULL)
792 return -EINVAL;
793
794 hwmgr = ((struct pp_instance *)handle)->hwmgr;
795
796 phm_get_dal_power_level(hwmgr, &simple_clocks);
797
798 if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment)) {
799 if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_PowerContainment))
800 PP_ASSERT_WITH_CODE(0, "Error in PHM_GetPowerContainmentClockInfo", return -1);
801 } else {
802 if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_Activity))
803 PP_ASSERT_WITH_CODE(0, "Error in PHM_GetClockInfo", return -1);
804 }
805
806 clocks->min_engine_clock = hw_clocks.min_eng_clk;
807 clocks->max_engine_clock = hw_clocks.max_eng_clk;
808 clocks->min_memory_clock = hw_clocks.min_mem_clk;
809 clocks->max_memory_clock = hw_clocks.max_mem_clk;
810 clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth;
811 clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth;
812
813 clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
814 clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
815
816 clocks->max_clocks_state = simple_clocks.level;
817
818 if (0 == phm_get_current_shallow_sleep_clocks(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks)) {
819 clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
820 clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
821 }
822
823 return 0;
824
825}
826
827int amd_powerplay_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
828{
829 int result = -1;
830
831 struct pp_hwmgr *hwmgr;
832
833 PP_CHECK((struct pp_instance *)handle);
834
835 if (clocks == NULL)
836 return -EINVAL;
837
838 hwmgr = ((struct pp_instance *)handle)->hwmgr;
839
840 result = phm_get_clock_by_type(hwmgr, type, clocks);
841
842 return result;
843}
844
845int amd_powerplay_get_display_mode_validation_clocks(void *handle,
846 struct amd_pp_simple_clock_info *clocks)
847{
848 int result = -1;
849 struct pp_hwmgr *hwmgr;
850
851 PP_CHECK((struct pp_instance *)handle);
852
853 if (clocks == NULL)
854 return -EINVAL;
855
856 hwmgr = ((struct pp_instance *)handle)->hwmgr;
857
858 if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState))
859 result = phm_get_max_high_clocks(hwmgr, clocks);
860
861 return result;
862}
863
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index 0874ab42ee95..ef1daf1251c7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -715,7 +715,6 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
715 unsigned long clock = 0; 715 unsigned long clock = 0;
716 unsigned long level; 716 unsigned long level;
717 unsigned long stable_pstate_sclk; 717 unsigned long stable_pstate_sclk;
718 struct PP_Clocks clocks;
719 unsigned long percentage; 718 unsigned long percentage;
720 719
721 cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk; 720 cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk;
@@ -726,8 +725,9 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
726 else 725 else
727 cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; 726 cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk;
728 727
729 /*PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks);*/ 728 clock = hwmgr->display_config.min_core_set_clock;
730 clock = clocks.engineClock; 729 if (clock == 0)
730 printk(KERN_ERR "[ powerplay ] min_core_set_clock not set\n");
731 731
732 if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) { 732 if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) {
733 cz_hwmgr->sclk_dpm.hard_min_clk = clock; 733 cz_hwmgr->sclk_dpm.hard_min_clk = clock;
@@ -883,9 +883,9 @@ static int cz_tf_update_low_mem_pstate(struct pp_hwmgr *hwmgr,
883 883
884 if (pnew_state->action == FORCE_HIGH) 884 if (pnew_state->action == FORCE_HIGH)
885 cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); 885 cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch);
886 else if(pnew_state->action == CANCEL_FORCE_HIGH) 886 else if (pnew_state->action == CANCEL_FORCE_HIGH)
887 cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); 887 cz_nbdpm_pstate_enable_disable(hwmgr, true, disable_switch);
888 else 888 else
889 cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch); 889 cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch);
890 } 890 }
891 return 0; 891 return 0;
@@ -1110,9 +1110,10 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
1110 cast_const_PhwCzPowerState(&pcurrent_ps->hardware); 1110 cast_const_PhwCzPowerState(&pcurrent_ps->hardware);
1111 1111
1112 struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); 1112 struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
1113 struct PP_Clocks clocks; 1113 struct PP_Clocks clocks = {0, 0, 0, 0};
1114 bool force_high; 1114 bool force_high;
1115 unsigned long num_of_active_displays = 4; 1115 uint32_t num_of_active_displays = 0;
1116 struct cgs_display_info info = {0};
1116 1117
1117 cz_ps->evclk = hwmgr->vce_arbiter.evclk; 1118 cz_ps->evclk = hwmgr->vce_arbiter.evclk;
1118 cz_ps->ecclk = hwmgr->vce_arbiter.ecclk; 1119 cz_ps->ecclk = hwmgr->vce_arbiter.ecclk;
@@ -1124,12 +1125,15 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
1124 1125
1125 cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); 1126 cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
1126 1127
1127 /* to do PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks); */ 1128 clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
1128 /* PECI_GetNumberOfActiveDisplays(pHwMgr->pPECI, &numOfActiveDisplays); */ 1129 hwmgr->display_config.min_mem_set_clock :
1130 cz_hwmgr->sys_info.nbp_memory_clock[1];
1131
1132 cgs_get_active_displays_info(hwmgr->device, &info);
1133 num_of_active_displays = info.display_count;
1134
1129 if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) 1135 if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
1130 clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; 1136 clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
1131 else
1132 clocks.memoryClock = 0;
1133 1137
1134 if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk) 1138 if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
1135 clocks.memoryClock = hwmgr->gfx_arbiter.mclk; 1139 clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
@@ -1199,6 +1203,7 @@ static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
1199 printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n"); 1203 printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n");
1200 return result; 1204 return result;
1201 } 1205 }
1206 hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = CZ_MAX_HARDWARE_POWERLEVELS;
1202 1207
1203 result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings)); 1208 result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings));
1204 if (result != 0) { 1209 if (result != 0) {
@@ -1630,10 +1635,10 @@ static void cz_hw_print_display_cfg(
1630 & PWRMGT_SEPARATION_TIME_MASK) 1635 & PWRMGT_SEPARATION_TIME_MASK)
1631 << PWRMGT_SEPARATION_TIME_SHIFT; 1636 << PWRMGT_SEPARATION_TIME_SHIFT;
1632 1637
1633 data|= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0) 1638 data |= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0)
1634 << PWRMGT_DISABLE_CPU_CSTATES_SHIFT; 1639 << PWRMGT_DISABLE_CPU_CSTATES_SHIFT;
1635 1640
1636 data|= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0) 1641 data |= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0)
1637 << PWRMGT_DISABLE_CPU_PSTATES_SHIFT; 1642 << PWRMGT_DISABLE_CPU_PSTATES_SHIFT;
1638 1643
1639 PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n", 1644 PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n",
@@ -1648,9 +1653,9 @@ static void cz_hw_print_display_cfg(
1648} 1653}
1649 1654
1650 1655
1651 static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, 1656static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
1652 bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) 1657 bool cc6_disable, bool pstate_disable, bool pstate_switch_disable)
1653 { 1658{
1654 struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend); 1659 struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend);
1655 1660
1656 if (separation_time != 1661 if (separation_time !=
@@ -1678,20 +1683,19 @@ static void cz_hw_print_display_cfg(
1678 return 0; 1683 return 0;
1679} 1684}
1680 1685
1681 static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, 1686static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr,
1682 struct amd_pp_dal_clock_info*info) 1687 struct amd_pp_simple_clock_info *info)
1683{ 1688{
1684 uint32_t i; 1689 uint32_t i;
1685 const struct phm_clock_voltage_dependency_table * table = 1690 const struct phm_clock_voltage_dependency_table *table =
1686 hwmgr->dyn_state.vddc_dep_on_dal_pwrl; 1691 hwmgr->dyn_state.vddc_dep_on_dal_pwrl;
1687 const struct phm_clock_and_voltage_limits* limits = 1692 const struct phm_clock_and_voltage_limits *limits =
1688 &hwmgr->dyn_state.max_clock_voltage_on_ac; 1693 &hwmgr->dyn_state.max_clock_voltage_on_ac;
1689 1694
1690 info->engine_max_clock = limits->sclk; 1695 info->engine_max_clock = limits->sclk;
1691 info->memory_max_clock = limits->mclk; 1696 info->memory_max_clock = limits->mclk;
1692 1697
1693 for (i = table->count - 1; i > 0; i--) { 1698 for (i = table->count - 1; i > 0; i--) {
1694
1695 if (limits->vddc >= table->entries[i].v) { 1699 if (limits->vddc >= table->entries[i].v) {
1696 info->level = table->entries[i].clk; 1700 info->level = table->entries[i].clk;
1697 return 0; 1701 return 0;
@@ -1700,6 +1704,158 @@ static void cz_hw_print_display_cfg(
1700 return -EINVAL; 1704 return -EINVAL;
1701} 1705}
1702 1706
1707static int cz_force_clock_level(struct pp_hwmgr *hwmgr,
1708 enum pp_clock_type type, int level)
1709{
1710 if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
1711 return -EINVAL;
1712
1713 switch (type) {
1714 case PP_SCLK:
1715 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
1716 PPSMC_MSG_SetSclkSoftMin,
1717 (1 << level));
1718 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
1719 PPSMC_MSG_SetSclkSoftMax,
1720 (1 << level));
1721 break;
1722 default:
1723 break;
1724 }
1725
1726 return 0;
1727}
1728
1729static int cz_print_clock_levels(struct pp_hwmgr *hwmgr,
1730 enum pp_clock_type type, char *buf)
1731{
1732 struct phm_clock_voltage_dependency_table *sclk_table =
1733 hwmgr->dyn_state.vddc_dependency_on_sclk;
1734 int i, now, size = 0;
1735
1736 switch (type) {
1737 case PP_SCLK:
1738 now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device,
1739 CGS_IND_REG__SMC,
1740 ixTARGET_AND_CURRENT_PROFILE_INDEX),
1741 TARGET_AND_CURRENT_PROFILE_INDEX,
1742 CURR_SCLK_INDEX);
1743
1744 for (i = 0; i < sclk_table->count; i++)
1745 size += sprintf(buf + size, "%d: %uMhz %s\n",
1746 i, sclk_table->entries[i].clk / 100,
1747 (i == now) ? "*" : "");
1748 break;
1749 default:
1750 break;
1751 }
1752 return size;
1753}
1754
1755static int cz_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
1756 PHM_PerformanceLevelDesignation designation, uint32_t index,
1757 PHM_PerformanceLevel *level)
1758{
1759 const struct cz_power_state *ps;
1760 struct cz_hwmgr *data;
1761 uint32_t level_index;
1762 uint32_t i;
1763
1764 if (level == NULL || hwmgr == NULL || state == NULL)
1765 return -EINVAL;
1766
1767 data = (struct cz_hwmgr *)(hwmgr->backend);
1768 ps = cast_const_PhwCzPowerState(state);
1769
1770 level_index = index > ps->level - 1 ? ps->level - 1 : index;
1771
1772 level->coreClock = ps->levels[level_index].engineClock;
1773
1774 if (designation == PHM_PerformanceLevelDesignation_PowerContainment) {
1775 for (i = 1; i < ps->level; i++) {
1776 if (ps->levels[i].engineClock > data->dce_slow_sclk_threshold) {
1777 level->coreClock = ps->levels[i].engineClock;
1778 break;
1779 }
1780 }
1781 }
1782
1783 if (level_index == 0)
1784 level->memory_clock = data->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1];
1785 else
1786 level->memory_clock = data->sys_info.nbp_memory_clock[0];
1787
1788 level->vddc = (cz_convert_8Bit_index_to_voltage(hwmgr, ps->levels[level_index].vddcIndex) + 2) / 4;
1789 level->nonLocalMemoryFreq = 0;
1790 level->nonLocalMemoryWidth = 0;
1791
1792 return 0;
1793}
1794
1795static int cz_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr,
1796 const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
1797{
1798 const struct cz_power_state *ps = cast_const_PhwCzPowerState(state);
1799
1800 clock_info->min_eng_clk = ps->levels[0].engineClock / (1 << (ps->levels[0].ssDividerIndex));
1801 clock_info->max_eng_clk = ps->levels[ps->level - 1].engineClock / (1 << (ps->levels[ps->level - 1].ssDividerIndex));
1802
1803 return 0;
1804}
1805
1806static int cz_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type,
1807 struct amd_pp_clocks *clocks)
1808{
1809 struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend);
1810 int i;
1811 struct phm_clock_voltage_dependency_table *table;
1812
1813 clocks->count = cz_get_max_sclk_level(hwmgr);
1814 switch (type) {
1815 case amd_pp_disp_clock:
1816 for (i = 0; i < clocks->count; i++)
1817 clocks->clock[i] = data->sys_info.display_clock[i];
1818 break;
1819 case amd_pp_sys_clock:
1820 table = hwmgr->dyn_state.vddc_dependency_on_sclk;
1821 for (i = 0; i < clocks->count; i++)
1822 clocks->clock[i] = table->entries[i].clk;
1823 break;
1824 case amd_pp_mem_clock:
1825 clocks->count = CZ_NUM_NBPMEMORYCLOCK;
1826 for (i = 0; i < clocks->count; i++)
1827 clocks->clock[i] = data->sys_info.nbp_memory_clock[clocks->count - 1 - i];
1828 break;
1829 default:
1830 return -1;
1831 }
1832
1833 return 0;
1834}
1835
1836static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
1837{
1838 struct phm_clock_voltage_dependency_table *table =
1839 hwmgr->dyn_state.vddc_dependency_on_sclk;
1840 unsigned long level;
1841 const struct phm_clock_and_voltage_limits *limits =
1842 &hwmgr->dyn_state.max_clock_voltage_on_ac;
1843
1844 if ((NULL == table) || (table->count <= 0) || (clocks == NULL))
1845 return -EINVAL;
1846
1847 level = cz_get_max_sclk_level(hwmgr) - 1;
1848
1849 if (level < table->count)
1850 clocks->engine_max_clock = table->entries[level].clk;
1851 else
1852 clocks->engine_max_clock = table->entries[table->count - 1].clk;
1853
1854 clocks->memory_max_clock = limits->mclk;
1855
1856 return 0;
1857}
1858
1703static const struct pp_hwmgr_func cz_hwmgr_funcs = { 1859static const struct pp_hwmgr_func cz_hwmgr_funcs = {
1704 .backend_init = cz_hwmgr_backend_init, 1860 .backend_init = cz_hwmgr_backend_init,
1705 .backend_fini = cz_hwmgr_backend_fini, 1861 .backend_fini = cz_hwmgr_backend_fini,
@@ -1718,7 +1874,13 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
1718 .print_current_perforce_level = cz_print_current_perforce_level, 1874 .print_current_perforce_level = cz_print_current_perforce_level,
1719 .set_cpu_power_state = cz_set_cpu_power_state, 1875 .set_cpu_power_state = cz_set_cpu_power_state,
1720 .store_cc6_data = cz_store_cc6_data, 1876 .store_cc6_data = cz_store_cc6_data,
1721 .get_dal_power_level= cz_get_dal_power_level, 1877 .force_clock_level = cz_force_clock_level,
1878 .print_clock_levels = cz_print_clock_levels,
1879 .get_dal_power_level = cz_get_dal_power_level,
1880 .get_performance_level = cz_get_performance_level,
1881 .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks,
1882 .get_clock_by_type = cz_get_clock_by_type,
1883 .get_max_high_clocks = cz_get_max_high_clocks,
1722}; 1884};
1723 1885
1724int cz_hwmgr_init(struct pp_hwmgr *hwmgr) 1886int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 28031a7eddba..5cca2ecc6bea 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -5073,6 +5073,125 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
5073 CG_FDO_CTRL2, FDO_PWM_MODE); 5073 CG_FDO_CTRL2, FDO_PWM_MODE);
5074} 5074}
5075 5075
5076static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
5077{
5078 struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
5079
5080 *table = (char *)&data->smc_state_table;
5081
5082 return sizeof(struct SMU73_Discrete_DpmTable);
5083}
5084
5085static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
5086{
5087 struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
5088
5089 void *table = (void *)&data->smc_state_table;
5090
5091 memcpy(table, buf, size);
5092
5093 return 0;
5094}
5095
5096static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
5097 enum pp_clock_type type, int level)
5098{
5099 struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
5100
5101 if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
5102 return -EINVAL;
5103
5104 switch (type) {
5105 case PP_SCLK:
5106 if (!data->sclk_dpm_key_disabled)
5107 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
5108 PPSMC_MSG_SCLKDPM_SetEnabledMask,
5109 (1 << level));
5110 break;
5111 case PP_MCLK:
5112 if (!data->mclk_dpm_key_disabled)
5113 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
5114 PPSMC_MSG_MCLKDPM_SetEnabledMask,
5115 (1 << level));
5116 break;
5117 case PP_PCIE:
5118 if (!data->pcie_dpm_key_disabled)
5119 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
5120 PPSMC_MSG_PCIeDPM_ForceLevel,
5121 (1 << level));
5122 break;
5123 default:
5124 break;
5125 }
5126
5127 return 0;
5128}
5129
5130static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr,
5131 enum pp_clock_type type, char *buf)
5132{
5133 struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
5134 struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
5135 struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
5136 struct fiji_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
5137 int i, now, size = 0;
5138 uint32_t clock, pcie_speed;
5139
5140 switch (type) {
5141 case PP_SCLK:
5142 smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
5143 clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
5144
5145 for (i = 0; i < sclk_table->count; i++) {
5146 if (clock > sclk_table->dpm_levels[i].value)
5147 continue;
5148 break;
5149 }
5150 now = i;
5151
5152 for (i = 0; i < sclk_table->count; i++)
5153 size += sprintf(buf + size, "%d: %uMhz %s\n",
5154 i, sclk_table->dpm_levels[i].value / 100,
5155 (i == now) ? "*" : "");
5156 break;
5157 case PP_MCLK:
5158 smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
5159 clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
5160
5161 for (i = 0; i < mclk_table->count; i++) {
5162 if (clock > mclk_table->dpm_levels[i].value)
5163 continue;
5164 break;
5165 }
5166 now = i;
5167
5168 for (i = 0; i < mclk_table->count; i++)
5169 size += sprintf(buf + size, "%d: %uMhz %s\n",
5170 i, mclk_table->dpm_levels[i].value / 100,
5171 (i == now) ? "*" : "");
5172 break;
5173 case PP_PCIE:
5174 pcie_speed = fiji_get_current_pcie_speed(hwmgr);
5175 for (i = 0; i < pcie_table->count; i++) {
5176 if (pcie_speed != pcie_table->dpm_levels[i].value)
5177 continue;
5178 break;
5179 }
5180 now = i;
5181
5182 for (i = 0; i < pcie_table->count; i++)
5183 size += sprintf(buf + size, "%d: %s %s\n", i,
5184 (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
5185 (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
5186 (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
5187 (i == now) ? "*" : "");
5188 break;
5189 default:
5190 break;
5191 }
5192 return size;
5193}
5194
5076static const struct pp_hwmgr_func fiji_hwmgr_funcs = { 5195static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
5077 .backend_init = &fiji_hwmgr_backend_init, 5196 .backend_init = &fiji_hwmgr_backend_init,
5078 .backend_fini = &tonga_hwmgr_backend_fini, 5197 .backend_fini = &tonga_hwmgr_backend_fini,
@@ -5108,6 +5227,10 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
5108 .register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt, 5227 .register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt,
5109 .set_fan_control_mode = fiji_set_fan_control_mode, 5228 .set_fan_control_mode = fiji_set_fan_control_mode,
5110 .get_fan_control_mode = fiji_get_fan_control_mode, 5229 .get_fan_control_mode = fiji_get_fan_control_mode,
5230 .get_pp_table = fiji_get_pp_table,
5231 .set_pp_table = fiji_set_pp_table,
5232 .force_clock_level = fiji_force_clock_level,
5233 .print_clock_levels = fiji_print_clock_levels,
5111}; 5234};
5112 5235
5113int fiji_hwmgr_init(struct pp_hwmgr *hwmgr) 5236int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 0f2d5e4bc241..be31bed2538a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -26,7 +26,7 @@
26#include "power_state.h" 26#include "power_state.h"
27#include "pp_acpi.h" 27#include "pp_acpi.h"
28#include "amd_acpi.h" 28#include "amd_acpi.h"
29#include "amd_powerplay.h" 29#include "pp_debug.h"
30 30
31#define PHM_FUNC_CHECK(hw) \ 31#define PHM_FUNC_CHECK(hw) \
32 do { \ 32 do { \
@@ -313,13 +313,12 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
313} 313}
314 314
315int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, 315int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
316 struct amd_pp_dal_clock_info *info) 316 struct amd_pp_simple_clock_info *info)
317{ 317{
318 PHM_FUNC_CHECK(hwmgr); 318 PHM_FUNC_CHECK(hwmgr);
319 319
320 if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL) 320 if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL)
321 return -EINVAL; 321 return -EINVAL;
322
323 return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info); 322 return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info);
324} 323}
325 324
@@ -332,3 +331,91 @@ int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr)
332 331
333 return 0; 332 return 0;
334} 333}
334
335
336int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
337 PHM_PerformanceLevelDesignation designation, uint32_t index,
338 PHM_PerformanceLevel *level)
339{
340 PHM_FUNC_CHECK(hwmgr);
341 if (hwmgr->hwmgr_func->get_performance_level == NULL)
342 return -EINVAL;
343
344 return hwmgr->hwmgr_func->get_performance_level(hwmgr, state, designation, index, level);
345
346
347}
348
349
350/**
351* Gets Clock Info.
352*
353* @param pHwMgr the address of the powerplay hardware manager.
354* @param pPowerState the address of the Power State structure.
355* @param pClockInfo the address of PP_ClockInfo structure where the result will be returned.
356* @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the back-end.
357*/
358int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *pclock_info,
359 PHM_PerformanceLevelDesignation designation)
360{
361 int result;
362 PHM_PerformanceLevel performance_level;
363
364 PHM_FUNC_CHECK(hwmgr);
365
366 PP_ASSERT_WITH_CODE((NULL != state), "Invalid Input!", return -EINVAL);
367 PP_ASSERT_WITH_CODE((NULL != pclock_info), "Invalid Input!", return -EINVAL);
368
369 result = phm_get_performance_level(hwmgr, state, PHM_PerformanceLevelDesignation_Activity, 0, &performance_level);
370
371 PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve minimum clocks.", return result);
372
373
374 pclock_info->min_mem_clk = performance_level.memory_clock;
375 pclock_info->min_eng_clk = performance_level.coreClock;
376 pclock_info->min_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
377
378
379 result = phm_get_performance_level(hwmgr, state, designation,
380 (hwmgr->platform_descriptor.hardwareActivityPerformanceLevels - 1), &performance_level);
381
382 PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve maximum clocks.", return result);
383
384 pclock_info->max_mem_clk = performance_level.memory_clock;
385 pclock_info->max_eng_clk = performance_level.coreClock;
386 pclock_info->max_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
387
388 return 0;
389}
390
391int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
392{
393 PHM_FUNC_CHECK(hwmgr);
394
395 if (hwmgr->hwmgr_func->get_current_shallow_sleep_clocks == NULL)
396 return -EINVAL;
397
398 return hwmgr->hwmgr_func->get_current_shallow_sleep_clocks(hwmgr, state, clock_info);
399
400}
401
402int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
403{
404 PHM_FUNC_CHECK(hwmgr);
405
406 if (hwmgr->hwmgr_func->get_clock_by_type == NULL)
407 return -EINVAL;
408
409 return hwmgr->hwmgr_func->get_clock_by_type(hwmgr, type, clocks);
410
411}
412
413int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
414{
415 PHM_FUNC_CHECK(hwmgr);
416
417 if (hwmgr->hwmgr_func->get_max_high_clocks == NULL)
418 return -EINVAL;
419
420 return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
421}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
index b7429a527828..b10df328d58c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
@@ -293,7 +293,7 @@ fInt GetScaledFraction(int X, int factor)
293 } 293 }
294 294
295 if (factor == 1) 295 if (factor == 1)
296 return (ConvertToFraction(X)); 296 return ConvertToFraction(X);
297 297
298 fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor)); 298 fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor));
299 299
@@ -371,7 +371,7 @@ fInt fDivide (fInt X, fInt Y)
371 fZERO = ConvertToFraction(0); 371 fZERO = ConvertToFraction(0);
372 372
373 if (Equal(Y, fZERO)) 373 if (Equal(Y, fZERO))
374 return fZERO; 374 return fZERO;
375 375
376 longlongX = (int64_t)X.full; 376 longlongX = (int64_t)X.full;
377 longlongY = (int64_t)Y.full; 377 longlongY = (int64_t)Y.full;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 44a925006479..bc83fa35ec46 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -6018,6 +6018,125 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
6018 CG_FDO_CTRL2, FDO_PWM_MODE); 6018 CG_FDO_CTRL2, FDO_PWM_MODE);
6019} 6019}
6020 6020
6021static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
6022{
6023 struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
6024
6025 *table = (char *)&data->smc_state_table;
6026
6027 return sizeof(struct SMU72_Discrete_DpmTable);
6028}
6029
6030static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
6031{
6032 struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
6033
6034 void *table = (void *)&data->smc_state_table;
6035
6036 memcpy(table, buf, size);
6037
6038 return 0;
6039}
6040
6041static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
6042 enum pp_clock_type type, int level)
6043{
6044 struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
6045
6046 if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
6047 return -EINVAL;
6048
6049 switch (type) {
6050 case PP_SCLK:
6051 if (!data->sclk_dpm_key_disabled)
6052 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
6053 PPSMC_MSG_SCLKDPM_SetEnabledMask,
6054 (1 << level));
6055 break;
6056 case PP_MCLK:
6057 if (!data->mclk_dpm_key_disabled)
6058 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
6059 PPSMC_MSG_MCLKDPM_SetEnabledMask,
6060 (1 << level));
6061 break;
6062 case PP_PCIE:
6063 if (!data->pcie_dpm_key_disabled)
6064 smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
6065 PPSMC_MSG_PCIeDPM_ForceLevel,
6066 (1 << level));
6067 break;
6068 default:
6069 break;
6070 }
6071
6072 return 0;
6073}
6074
6075static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
6076 enum pp_clock_type type, char *buf)
6077{
6078 struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
6079 struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
6080 struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
6081 struct tonga_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
6082 int i, now, size = 0;
6083 uint32_t clock, pcie_speed;
6084
6085 switch (type) {
6086 case PP_SCLK:
6087 smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
6088 clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
6089
6090 for (i = 0; i < sclk_table->count; i++) {
6091 if (clock > sclk_table->dpm_levels[i].value)
6092 continue;
6093 break;
6094 }
6095 now = i;
6096
6097 for (i = 0; i < sclk_table->count; i++)
6098 size += sprintf(buf + size, "%d: %uMhz %s\n",
6099 i, sclk_table->dpm_levels[i].value / 100,
6100 (i == now) ? "*" : "");
6101 break;
6102 case PP_MCLK:
6103 smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
6104 clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
6105
6106 for (i = 0; i < mclk_table->count; i++) {
6107 if (clock > mclk_table->dpm_levels[i].value)
6108 continue;
6109 break;
6110 }
6111 now = i;
6112
6113 for (i = 0; i < mclk_table->count; i++)
6114 size += sprintf(buf + size, "%d: %uMhz %s\n",
6115 i, mclk_table->dpm_levels[i].value / 100,
6116 (i == now) ? "*" : "");
6117 break;
6118 case PP_PCIE:
6119 pcie_speed = tonga_get_current_pcie_speed(hwmgr);
6120 for (i = 0; i < pcie_table->count; i++) {
6121 if (pcie_speed != pcie_table->dpm_levels[i].value)
6122 continue;
6123 break;
6124 }
6125 now = i;
6126
6127 for (i = 0; i < pcie_table->count; i++)
6128 size += sprintf(buf + size, "%d: %s %s\n", i,
6129 (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" :
6130 (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
6131 (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
6132 (i == now) ? "*" : "");
6133 break;
6134 default:
6135 break;
6136 }
6137 return size;
6138}
6139
6021static const struct pp_hwmgr_func tonga_hwmgr_funcs = { 6140static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
6022 .backend_init = &tonga_hwmgr_backend_init, 6141 .backend_init = &tonga_hwmgr_backend_init,
6023 .backend_fini = &tonga_hwmgr_backend_fini, 6142 .backend_fini = &tonga_hwmgr_backend_fini,
@@ -6055,6 +6174,10 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
6055 .check_states_equal = tonga_check_states_equal, 6174 .check_states_equal = tonga_check_states_equal,
6056 .set_fan_control_mode = tonga_set_fan_control_mode, 6175 .set_fan_control_mode = tonga_set_fan_control_mode,
6057 .get_fan_control_mode = tonga_get_fan_control_mode, 6176 .get_fan_control_mode = tonga_get_fan_control_mode,
6177 .get_pp_table = tonga_get_pp_table,
6178 .set_pp_table = tonga_set_pp_table,
6179 .force_clock_level = tonga_force_clock_level,
6180 .print_clock_levels = tonga_print_clock_levels,
6058}; 6181};
6059 6182
6060int tonga_hwmgr_init(struct pp_hwmgr *hwmgr) 6183int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index e61a3e67852e..7255f7ddf93a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -29,6 +29,7 @@
29#include "amd_shared.h" 29#include "amd_shared.h"
30#include "cgs_common.h" 30#include "cgs_common.h"
31 31
32
32enum amd_pp_event { 33enum amd_pp_event {
33 AMD_PP_EVENT_INITIALIZE = 0, 34 AMD_PP_EVENT_INITIALIZE = 0,
34 AMD_PP_EVENT_UNINITIALIZE, 35 AMD_PP_EVENT_UNINITIALIZE,
@@ -123,6 +124,7 @@ enum amd_dpm_forced_level {
123 AMD_DPM_FORCED_LEVEL_AUTO = 0, 124 AMD_DPM_FORCED_LEVEL_AUTO = 0,
124 AMD_DPM_FORCED_LEVEL_LOW = 1, 125 AMD_DPM_FORCED_LEVEL_LOW = 1,
125 AMD_DPM_FORCED_LEVEL_HIGH = 2, 126 AMD_DPM_FORCED_LEVEL_HIGH = 2,
127 AMD_DPM_FORCED_LEVEL_MANUAL = 3,
126}; 128};
127 129
128struct amd_pp_init { 130struct amd_pp_init {
@@ -212,12 +214,55 @@ struct amd_pp_display_configuration {
212 uint32_t dce_tolerable_mclk_in_active_latency; 214 uint32_t dce_tolerable_mclk_in_active_latency;
213}; 215};
214 216
215struct amd_pp_dal_clock_info { 217struct amd_pp_simple_clock_info {
216 uint32_t engine_max_clock; 218 uint32_t engine_max_clock;
217 uint32_t memory_max_clock; 219 uint32_t memory_max_clock;
218 uint32_t level; 220 uint32_t level;
219}; 221};
220 222
223enum PP_DAL_POWERLEVEL {
224 PP_DAL_POWERLEVEL_INVALID = 0,
225 PP_DAL_POWERLEVEL_ULTRALOW,
226 PP_DAL_POWERLEVEL_LOW,
227 PP_DAL_POWERLEVEL_NOMINAL,
228 PP_DAL_POWERLEVEL_PERFORMANCE,
229
230 PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
231 PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
232 PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
233 PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
234 PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
235 PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
236 PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
237 PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
238};
239
240struct amd_pp_clock_info {
241 uint32_t min_engine_clock;
242 uint32_t max_engine_clock;
243 uint32_t min_memory_clock;
244 uint32_t max_memory_clock;
245 uint32_t min_bus_bandwidth;
246 uint32_t max_bus_bandwidth;
247 uint32_t max_engine_clock_in_sr;
248 uint32_t min_engine_clock_in_sr;
249 enum PP_DAL_POWERLEVEL max_clocks_state;
250};
251
252enum amd_pp_clock_type {
253 amd_pp_disp_clock = 1,
254 amd_pp_sys_clock,
255 amd_pp_mem_clock
256};
257
258#define MAX_NUM_CLOCKS 16
259
260struct amd_pp_clocks {
261 uint32_t count;
262 uint32_t clock[MAX_NUM_CLOCKS];
263};
264
265
221enum { 266enum {
222 PP_GROUP_UNKNOWN = 0, 267 PP_GROUP_UNKNOWN = 0,
223 PP_GROUP_GFX = 1, 268 PP_GROUP_GFX = 1,
@@ -225,6 +270,17 @@ enum {
225 PP_GROUP_MAX 270 PP_GROUP_MAX
226}; 271};
227 272
273enum pp_clock_type {
274 PP_SCLK,
275 PP_MCLK,
276 PP_PCIE,
277};
278
279struct pp_states_info {
280 uint32_t nums;
281 uint32_t states[16];
282};
283
228#define PP_GROUP_MASK 0xF0000000 284#define PP_GROUP_MASK 0xF0000000
229#define PP_GROUP_SHIFT 28 285#define PP_GROUP_SHIFT 28
230 286
@@ -278,6 +334,11 @@ struct amd_powerplay_funcs {
278 int (*get_fan_control_mode)(void *handle); 334 int (*get_fan_control_mode)(void *handle);
279 int (*set_fan_speed_percent)(void *handle, uint32_t percent); 335 int (*set_fan_speed_percent)(void *handle, uint32_t percent);
280 int (*get_fan_speed_percent)(void *handle, uint32_t *speed); 336 int (*get_fan_speed_percent)(void *handle, uint32_t *speed);
337 int (*get_pp_num_states)(void *handle, struct pp_states_info *data);
338 int (*get_pp_table)(void *handle, char **table);
339 int (*set_pp_table)(void *handle, const char *buf, size_t size);
340 int (*force_clock_level)(void *handle, enum pp_clock_type type, int level);
341 int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
281}; 342};
282 343
283struct amd_powerplay { 344struct amd_powerplay {
@@ -288,12 +349,23 @@ struct amd_powerplay {
288 349
289int amd_powerplay_init(struct amd_pp_init *pp_init, 350int amd_powerplay_init(struct amd_pp_init *pp_init,
290 struct amd_powerplay *amd_pp); 351 struct amd_powerplay *amd_pp);
352
291int amd_powerplay_fini(void *handle); 353int amd_powerplay_fini(void *handle);
292 354
293int amd_powerplay_display_configuration_change(void *handle, const void *input); 355int amd_powerplay_display_configuration_change(void *handle,
356 const struct amd_pp_display_configuration *input);
294 357
295int amd_powerplay_get_display_power_level(void *handle, 358int amd_powerplay_get_display_power_level(void *handle,
296 struct amd_pp_dal_clock_info *output); 359 struct amd_pp_simple_clock_info *output);
360
361int amd_powerplay_get_current_clocks(void *handle,
362 struct amd_pp_clock_info *output);
363
364int amd_powerplay_get_clock_by_type(void *handle,
365 enum amd_pp_clock_type type,
366 struct amd_pp_clocks *clocks);
297 367
368int amd_powerplay_get_display_mode_validation_clocks(void *handle,
369 struct amd_pp_simple_clock_info *output);
298 370
299#endif /* _AMD_POWERPLAY_H_ */ 371#endif /* _AMD_POWERPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 91795efe1336..040d3f7cbf49 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -31,6 +31,7 @@ struct pp_power_state;
31enum amd_dpm_forced_level; 31enum amd_dpm_forced_level;
32struct PP_TemperatureRange; 32struct PP_TemperatureRange;
33 33
34
34struct phm_fan_speed_info { 35struct phm_fan_speed_info {
35 uint32_t min_percent; 36 uint32_t min_percent;
36 uint32_t max_percent; 37 uint32_t max_percent;
@@ -290,6 +291,15 @@ struct PP_Clocks {
290 uint32_t engineClockInSR; 291 uint32_t engineClockInSR;
291}; 292};
292 293
294struct pp_clock_info {
295 uint32_t min_mem_clk;
296 uint32_t max_mem_clk;
297 uint32_t min_eng_clk;
298 uint32_t max_eng_clk;
299 uint32_t min_bus_bandwidth;
300 uint32_t max_bus_bandwidth;
301};
302
293struct phm_platform_descriptor { 303struct phm_platform_descriptor {
294 uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES]; 304 uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES];
295 uint32_t vbiosInterruptId; 305 uint32_t vbiosInterruptId;
@@ -323,24 +333,6 @@ struct phm_clocks {
323 uint32_t clock[MAX_NUM_CLOCKS]; 333 uint32_t clock[MAX_NUM_CLOCKS];
324}; 334};
325 335
326enum PP_DAL_POWERLEVEL {
327 PP_DAL_POWERLEVEL_INVALID = 0,
328 PP_DAL_POWERLEVEL_ULTRALOW,
329 PP_DAL_POWERLEVEL_LOW,
330 PP_DAL_POWERLEVEL_NOMINAL,
331 PP_DAL_POWERLEVEL_PERFORMANCE,
332
333 PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
334 PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
335 PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
336 PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
337 PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
338 PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
339 PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
340 PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
341};
342
343
344extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr); 336extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr);
345extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate); 337extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate);
346extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate); 338extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
@@ -375,11 +367,25 @@ extern int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
375 const struct amd_pp_display_configuration *display_config); 367 const struct amd_pp_display_configuration *display_config);
376 368
377extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, 369extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
378 struct amd_pp_dal_clock_info*info); 370 struct amd_pp_simple_clock_info *info);
379 371
380extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr); 372extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr);
381 373
382extern int phm_power_down_asic(struct pp_hwmgr *hwmgr); 374extern int phm_power_down_asic(struct pp_hwmgr *hwmgr);
383 375
376extern int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
377 PHM_PerformanceLevelDesignation designation, uint32_t index,
378 PHM_PerformanceLevel *level);
379
380extern int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
381 struct pp_clock_info *pclock_info,
382 PHM_PerformanceLevelDesignation designation);
383
384extern int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
385
386extern int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
387
388extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
389
384#endif /* _HARDWARE_MANAGER_H_ */ 390#endif /* _HARDWARE_MANAGER_H_ */
385 391
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index aeaa3dbba525..928f5a740cba 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -325,8 +325,18 @@ struct pp_hwmgr_func {
325 bool cc6_disable, bool pstate_disable, 325 bool cc6_disable, bool pstate_disable,
326 bool pstate_switch_disable); 326 bool pstate_switch_disable);
327 int (*get_dal_power_level)(struct pp_hwmgr *hwmgr, 327 int (*get_dal_power_level)(struct pp_hwmgr *hwmgr,
328 struct amd_pp_dal_clock_info *info); 328 struct amd_pp_simple_clock_info *info);
329 int (*get_performance_level)(struct pp_hwmgr *, const struct pp_hw_power_state *,
330 PHM_PerformanceLevelDesignation, uint32_t, PHM_PerformanceLevel *);
331 int (*get_current_shallow_sleep_clocks)(struct pp_hwmgr *hwmgr,
332 const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
333 int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
334 int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
329 int (*power_off_asic)(struct pp_hwmgr *hwmgr); 335 int (*power_off_asic)(struct pp_hwmgr *hwmgr);
336 int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
337 int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
338 int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, int level);
339 int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
330}; 340};
331 341
332struct pp_table_func { 342struct pp_table_func {
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 8b2becd1aa07..a5ff9458d359 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -229,6 +229,14 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
229 amd_sched_wakeup(entity->sched); 229 amd_sched_wakeup(entity->sched);
230} 230}
231 231
232static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
233{
234 struct amd_sched_entity *entity =
235 container_of(cb, struct amd_sched_entity, cb);
236 entity->dependency = NULL;
237 fence_put(f);
238}
239
232static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) 240static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
233{ 241{
234 struct amd_gpu_scheduler *sched = entity->sched; 242 struct amd_gpu_scheduler *sched = entity->sched;
@@ -251,7 +259,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
251 } 259 }
252 260
253 /* Wait for fence to be scheduled */ 261 /* Wait for fence to be scheduled */
254 entity->cb.func = amd_sched_entity_wakeup; 262 entity->cb.func = amd_sched_entity_clear_dep;
255 list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); 263 list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
256 return true; 264 return true;
257 } 265 }
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 4c30d8c65558..06001400ce8b 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4219,13 +4219,20 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4219 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 4219 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4220 return r; 4220 return r;
4221 } 4221 }
4222 r = radeon_fence_wait(ib.fence, false); 4222 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
4223 if (r) { 4223 RADEON_USEC_IB_TEST_TIMEOUT));
4224 if (r < 0) {
4224 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 4225 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4225 radeon_scratch_free(rdev, scratch); 4226 radeon_scratch_free(rdev, scratch);
4226 radeon_ib_free(rdev, &ib); 4227 radeon_ib_free(rdev, &ib);
4227 return r; 4228 return r;
4229 } else if (r == 0) {
4230 DRM_ERROR("radeon: fence wait timed out.\n");
4231 radeon_scratch_free(rdev, scratch);
4232 radeon_ib_free(rdev, &ib);
4233 return -ETIMEDOUT;
4228 } 4234 }
4235 r = 0;
4229 for (i = 0; i < rdev->usec_timeout; i++) { 4236 for (i = 0; i < rdev->usec_timeout; i++) {
4230 tmp = RREG32(scratch); 4237 tmp = RREG32(scratch);
4231 if (tmp == 0xDEADBEEF) 4238 if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index d16f2eebd95e..9c351dc8a9e0 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -737,11 +737,16 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
737 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 737 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
738 return r; 738 return r;
739 } 739 }
740 r = radeon_fence_wait(ib.fence, false); 740 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
741 if (r) { 741 RADEON_USEC_IB_TEST_TIMEOUT));
742 if (r < 0) {
742 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 743 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
743 return r; 744 return r;
745 } else if (r == 0) {
746 DRM_ERROR("radeon: fence wait timed out.\n");
747 return -ETIMEDOUT;
744 } 748 }
749 r = 0;
745 for (i = 0; i < rdev->usec_timeout; i++) { 750 for (i = 0; i < rdev->usec_timeout; i++) {
746 tmp = le32_to_cpu(rdev->wb.wb[index/4]); 751 tmp = le32_to_cpu(rdev->wb.wb[index/4]);
747 if (tmp == 0xDEADBEEF) 752 if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5eae0a88dd3e..6e478a248628 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3732,11 +3732,17 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3732 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3732 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3733 goto free_ib; 3733 goto free_ib;
3734 } 3734 }
3735 r = radeon_fence_wait(ib.fence, false); 3735 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3736 if (r) { 3736 RADEON_USEC_IB_TEST_TIMEOUT));
3737 if (r < 0) {
3737 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3738 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3738 goto free_ib; 3739 goto free_ib;
3740 } else if (r == 0) {
3741 DRM_ERROR("radeon: fence wait timed out.\n");
3742 r = -ETIMEDOUT;
3743 goto free_ib;
3739 } 3744 }
3745 r = 0;
3740 for (i = 0; i < rdev->usec_timeout; i++) { 3746 for (i = 0; i < rdev->usec_timeout; i++) {
3741 tmp = RREG32(scratch); 3747 tmp = RREG32(scratch);
3742 if (tmp == 0xDEADBEEF) { 3748 if (tmp == 0xDEADBEEF) {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cc2fdf0be37a..ed121042247f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3381,11 +3381,17 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3381 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 3381 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3382 goto free_ib; 3382 goto free_ib;
3383 } 3383 }
3384 r = radeon_fence_wait(ib.fence, false); 3384 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3385 if (r) { 3385 RADEON_USEC_IB_TEST_TIMEOUT));
3386 if (r < 0) {
3386 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 3387 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3387 goto free_ib; 3388 goto free_ib;
3389 } else if (r == 0) {
3390 DRM_ERROR("radeon: fence wait timed out.\n");
3391 r = -ETIMEDOUT;
3392 goto free_ib;
3388 } 3393 }
3394 r = 0;
3389 for (i = 0; i < rdev->usec_timeout; i++) { 3395 for (i = 0; i < rdev->usec_timeout; i++) {
3390 tmp = RREG32(scratch); 3396 tmp = RREG32(scratch);
3391 if (tmp == 0xDEADBEEF) 3397 if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index d2dd29ab24fa..fb65e6fb5c4f 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -368,11 +368,16 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
368 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); 368 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
369 return r; 369 return r;
370 } 370 }
371 r = radeon_fence_wait(ib.fence, false); 371 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
372 if (r) { 372 RADEON_USEC_IB_TEST_TIMEOUT));
373 if (r < 0) {
373 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 374 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
374 return r; 375 return r;
376 } else if (r == 0) {
377 DRM_ERROR("radeon: fence wait timed out.\n");
378 return -ETIMEDOUT;
375 } 379 }
380 r = 0;
376 for (i = 0; i < rdev->usec_timeout; i++) { 381 for (i = 0; i < rdev->usec_timeout; i++) {
377 tmp = le32_to_cpu(rdev->wb.wb[index/4]); 382 tmp = le32_to_cpu(rdev->wb.wb[index/4]);
378 if (tmp == 0xDEADBEEF) 383 if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 78a51b3eda10..007be29a0020 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -120,6 +120,7 @@ extern int radeon_mst;
120 */ 120 */
121#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ 121#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
122#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) 122#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2)
123#define RADEON_USEC_IB_TEST_TIMEOUT 1000000 /* 1s */
123/* RADEON_IB_POOL_SIZE must be a power of 2 */ 124/* RADEON_IB_POOL_SIZE must be a power of 2 */
124#define RADEON_IB_POOL_SIZE 16 125#define RADEON_IB_POOL_SIZE 16
125#define RADEON_DEBUGFS_MAX_COMPONENTS 32 126#define RADEON_DEBUGFS_MAX_COMPONENTS 32
@@ -382,6 +383,7 @@ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
382int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); 383int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
383void radeon_fence_process(struct radeon_device *rdev, int ring); 384void radeon_fence_process(struct radeon_device *rdev, int ring);
384bool radeon_fence_signaled(struct radeon_fence *fence); 385bool radeon_fence_signaled(struct radeon_fence *fence);
386long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, long timeout);
385int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); 387int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
386int radeon_fence_wait_next(struct radeon_device *rdev, int ring); 388int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
387int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); 389int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 298ea1c453c3..a4674bfd979a 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1686,6 +1686,9 @@ void radeon_modeset_fini(struct radeon_device *rdev)
1686 radeon_fbdev_fini(rdev); 1686 radeon_fbdev_fini(rdev);
1687 kfree(rdev->mode_info.bios_hardcoded_edid); 1687 kfree(rdev->mode_info.bios_hardcoded_edid);
1688 1688
1689 /* free i2c buses */
1690 radeon_i2c_fini(rdev);
1691
1689 if (rdev->mode_info.mode_config_initialized) { 1692 if (rdev->mode_info.mode_config_initialized) {
1690 radeon_afmt_fini(rdev); 1693 radeon_afmt_fini(rdev);
1691 drm_kms_helper_poll_fini(rdev->ddev); 1694 drm_kms_helper_poll_fini(rdev->ddev);
@@ -1693,8 +1696,6 @@ void radeon_modeset_fini(struct radeon_device *rdev)
1693 drm_mode_config_cleanup(rdev->ddev); 1696 drm_mode_config_cleanup(rdev->ddev);
1694 rdev->mode_info.mode_config_initialized = false; 1697 rdev->mode_info.mode_config_initialized = false;
1695 } 1698 }
1696 /* free i2c buses */
1697 radeon_i2c_fini(rdev);
1698} 1699}
1699 1700
1700static bool is_hdtv_mode(const struct drm_display_mode *mode) 1701static bool is_hdtv_mode(const struct drm_display_mode *mode)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 05815c47b246..7ef075acde9c 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -527,7 +527,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
527} 527}
528 528
529/** 529/**
530 * radeon_fence_wait - wait for a fence to signal 530 * radeon_fence_wait_timeout - wait for a fence to signal with timeout
531 * 531 *
532 * @fence: radeon fence object 532 * @fence: radeon fence object
533 * @intr: use interruptible sleep 533 * @intr: use interruptible sleep
@@ -535,12 +535,15 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
535 * Wait for the requested fence to signal (all asics). 535 * Wait for the requested fence to signal (all asics).
536 * @intr selects whether to use interruptable (true) or non-interruptable 536 * @intr selects whether to use interruptable (true) or non-interruptable
537 * (false) sleep when waiting for the fence. 537 * (false) sleep when waiting for the fence.
538 * Returns 0 if the fence has passed, error for all other cases. 538 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
539 * Returns remaining time if the sequence number has passed, 0 when
540 * the wait timeout, or an error for all other cases.
539 */ 541 */
540int radeon_fence_wait(struct radeon_fence *fence, bool intr) 542long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
541{ 543{
542 uint64_t seq[RADEON_NUM_RINGS] = {}; 544 uint64_t seq[RADEON_NUM_RINGS] = {};
543 long r; 545 long r;
546 int r_sig;
544 547
545 /* 548 /*
546 * This function should not be called on !radeon fences. 549 * This function should not be called on !radeon fences.
@@ -552,15 +555,36 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
552 return fence_wait(&fence->base, intr); 555 return fence_wait(&fence->base, intr);
553 556
554 seq[fence->ring] = fence->seq; 557 seq[fence->ring] = fence->seq;
555 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); 558 r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
556 if (r < 0) { 559 if (r <= 0) {
557 return r; 560 return r;
558 } 561 }
559 562
560 r = fence_signal(&fence->base); 563 r_sig = fence_signal(&fence->base);
561 if (!r) 564 if (!r_sig)
562 FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); 565 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
563 return 0; 566 return r;
567}
568
569/**
570 * radeon_fence_wait - wait for a fence to signal
571 *
572 * @fence: radeon fence object
573 * @intr: use interruptible sleep
574 *
575 * Wait for the requested fence to signal (all asics).
576 * @intr selects whether to use interruptable (true) or non-interruptable
577 * (false) sleep when waiting for the fence.
578 * Returns 0 if the fence has passed, error for all other cases.
579 */
580int radeon_fence_wait(struct radeon_fence *fence, bool intr)
581{
582 long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
583 if (r > 0) {
584 return 0;
585 } else {
586 return r;
587 }
564} 588}
565 589
566/** 590/**
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 7eb1ae758906..566a1a01f6d1 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -810,11 +810,16 @@ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
810 goto error; 810 goto error;
811 } 811 }
812 812
813 r = radeon_fence_wait(fence, false); 813 r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
814 if (r) { 814 RADEON_USEC_IB_TEST_TIMEOUT));
815 if (r < 0) {
815 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 816 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
817 } else if (r == 0) {
818 DRM_ERROR("radeon: fence wait timed out.\n");
819 r = -ETIMEDOUT;
816 } else { 820 } else {
817 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 821 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
822 r = 0;
818 } 823 }
819error: 824error:
820 radeon_fence_unref(&fence); 825 radeon_fence_unref(&fence);
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index c6b1cbca47fc..12ddcfa82e20 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -522,11 +522,17 @@ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
522 goto error; 522 goto error;
523 } 523 }
524 524
525 r = radeon_fence_wait(fence, false); 525 r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
526 if (r) { 526 RADEON_USEC_IB_TEST_TIMEOUT));
527 if (r < 0) {
527 DRM_ERROR("radeon: fence wait failed (%d).\n", r); 528 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
528 goto error; 529 goto error;
530 } else if (r == 0) {
531 DRM_ERROR("radeon: fence wait timed out.\n");
532 r = -ETIMEDOUT;
533 goto error;
529 } 534 }
535 r = 0;
530 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 536 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
531error: 537error:
532 radeon_fence_unref(&fence); 538 radeon_fence_unref(&fence);