aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorJames Morris <james.l.morris@oracle.com>2017-07-24 20:44:18 -0400
committerJames Morris <james.l.morris@oracle.com>2017-07-24 20:44:18 -0400
commit53a2ebaaabc1eb8458796fec3bc1e0e80746b642 (patch)
tree9d1f9227b49392cdd2edcc01057517da4f4b09c2 /drivers/gpu/drm/amd/amdgpu
parent3cf29931453215536916d0c4da953fce1911ced3 (diff)
parent520eccdfe187591a51ea9ab4c1a024ae4d0f68d9 (diff)
sync to Linus v4.13-rc2 for subsystem developers to work against
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h173
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c309
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c442
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c299
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c654
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c657
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_smc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c607
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c1619
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c656
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1247
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1755
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c435
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_smc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c740
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c212
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c308
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_smc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c193
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c1189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h6
103 files changed, 9975 insertions, 4921 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 61360e27715f..26682454a446 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -5,15 +5,23 @@ config DRM_AMDGPU_SI
5 Choose this option if you want to enable experimental support 5 Choose this option if you want to enable experimental support
6 for SI asics. 6 for SI asics.
7 7
8 SI is already supported in radeon. Experimental support for SI
9 in amdgpu will be disabled by default and is still provided by
10 radeon. Use module options to override this:
11
12 radeon.si_support=0 amdgpu.si_support=1
13
8config DRM_AMDGPU_CIK 14config DRM_AMDGPU_CIK
9 bool "Enable amdgpu support for CIK parts" 15 bool "Enable amdgpu support for CIK parts"
10 depends on DRM_AMDGPU 16 depends on DRM_AMDGPU
11 help 17 help
12 Choose this option if you want to enable experimental support 18 Choose this option if you want to enable support for CIK asics.
13 for CIK asics. 19
20 CIK is already supported in radeon. Support for CIK in amdgpu
21 will be disabled by default and is still provided by radeon.
22 Use module options to override this:
14 23
15 CIK is already supported in radeon. CIK support in amdgpu 24 radeon.cik_support=0 amdgpu.cik_support=1
16 is for experimentation and testing.
17 25
18config DRM_AMDGPU_USERPTR 26config DRM_AMDGPU_USERPTR
19 bool "Always enable userptr write support" 27 bool "Always enable userptr write support"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 660786aba7d2..faea6349228f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -4,7 +4,7 @@
4 4
5FULL_AMD_PATH=$(src)/.. 5FULL_AMD_PATH=$(src)/..
6 6
7ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ 7ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
8 -I$(FULL_AMD_PATH)/include \ 8 -I$(FULL_AMD_PATH)/include \
9 -I$(FULL_AMD_PATH)/amdgpu \ 9 -I$(FULL_AMD_PATH)/amdgpu \
10 -I$(FULL_AMD_PATH)/scheduler \ 10 -I$(FULL_AMD_PATH)/scheduler \
@@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
24 atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ 24 atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
25 amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ 25 amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
26 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ 26 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
27 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o 27 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
28 amdgpu_queue_mgr.o
28 29
29# add asic specific block 30# add asic specific block
30amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ 31amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
34amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o 35amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
35 36
36amdgpu-y += \ 37amdgpu-y += \
37 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o 38 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o
38 39
39# add GMC block 40# add GMC block
40amdgpu-y += \ 41amdgpu-y += \
@@ -54,7 +55,8 @@ amdgpu-y += \
54# add PSP block 55# add PSP block
55amdgpu-y += \ 56amdgpu-y += \
56 amdgpu_psp.o \ 57 amdgpu_psp.o \
57 psp_v3_1.o 58 psp_v3_1.o \
59 psp_v10_0.o
58 60
59# add SMC block 61# add SMC block
60amdgpu-y += \ 62amdgpu-y += \
@@ -92,6 +94,11 @@ amdgpu-y += \
92 vce_v3_0.o \ 94 vce_v3_0.o \
93 vce_v4_0.o 95 vce_v4_0.o
94 96
97# add VCN block
98amdgpu-y += \
99 amdgpu_vcn.o \
100 vcn_v1_0.o
101
95# add amdkfd interfaces 102# add amdkfd interfaces
96amdgpu-y += \ 103amdgpu-y += \
97 amdgpu_amdkfd.o \ 104 amdgpu_amdkfd.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 833c3c16501a..ff7bf1a9f967 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -36,16 +36,18 @@
36#include <linux/hashtable.h> 36#include <linux/hashtable.h>
37#include <linux/dma-fence.h> 37#include <linux/dma-fence.h>
38 38
39#include <ttm/ttm_bo_api.h> 39#include <drm/ttm/ttm_bo_api.h>
40#include <ttm/ttm_bo_driver.h> 40#include <drm/ttm/ttm_bo_driver.h>
41#include <ttm/ttm_placement.h> 41#include <drm/ttm/ttm_placement.h>
42#include <ttm/ttm_module.h> 42#include <drm/ttm/ttm_module.h>
43#include <ttm/ttm_execbuf_util.h> 43#include <drm/ttm/ttm_execbuf_util.h>
44 44
45#include <drm/drmP.h> 45#include <drm/drmP.h>
46#include <drm/drm_gem.h> 46#include <drm/drm_gem.h>
47#include <drm/amdgpu_drm.h> 47#include <drm/amdgpu_drm.h>
48 48
49#include <kgd_kfd_interface.h>
50
49#include "amd_shared.h" 51#include "amd_shared.h"
50#include "amdgpu_mode.h" 52#include "amdgpu_mode.h"
51#include "amdgpu_ih.h" 53#include "amdgpu_ih.h"
@@ -62,6 +64,7 @@
62#include "amdgpu_acp.h" 64#include "amdgpu_acp.h"
63#include "amdgpu_uvd.h" 65#include "amdgpu_uvd.h"
64#include "amdgpu_vce.h" 66#include "amdgpu_vce.h"
67#include "amdgpu_vcn.h"
65 68
66#include "gpu_scheduler.h" 69#include "gpu_scheduler.h"
67#include "amdgpu_virt.h" 70#include "amdgpu_virt.h"
@@ -92,6 +95,7 @@ extern int amdgpu_vm_size;
92extern int amdgpu_vm_block_size; 95extern int amdgpu_vm_block_size;
93extern int amdgpu_vm_fault_stop; 96extern int amdgpu_vm_fault_stop;
94extern int amdgpu_vm_debug; 97extern int amdgpu_vm_debug;
98extern int amdgpu_vm_update_mode;
95extern int amdgpu_sched_jobs; 99extern int amdgpu_sched_jobs;
96extern int amdgpu_sched_hw_submission; 100extern int amdgpu_sched_hw_submission;
97extern int amdgpu_no_evict; 101extern int amdgpu_no_evict;
@@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se;
109extern int amdgpu_pos_buf_per_se; 113extern int amdgpu_pos_buf_per_se;
110extern int amdgpu_cntl_sb_buf_per_se; 114extern int amdgpu_cntl_sb_buf_per_se;
111extern int amdgpu_param_buf_per_se; 115extern int amdgpu_param_buf_per_se;
116extern int amdgpu_job_hang_limit;
117extern int amdgpu_lbpw;
118
119#ifdef CONFIG_DRM_AMDGPU_SI
120extern int amdgpu_si_support;
121#endif
122#ifdef CONFIG_DRM_AMDGPU_CIK
123extern int amdgpu_cik_support;
124#endif
112 125
113#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ 126#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
114#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 127#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
@@ -305,8 +318,8 @@ struct amdgpu_gart_funcs {
305 /* set pte flags based per asic */ 318 /* set pte flags based per asic */
306 uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, 319 uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
307 uint32_t flags); 320 uint32_t flags);
308 /* adjust mc addr in fb for APU case */ 321 /* get the pde for a given mc addr */
309 u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); 322 u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
310 uint32_t (*get_invalidate_req)(unsigned int vm_id); 323 uint32_t (*get_invalidate_req)(unsigned int vm_id);
311}; 324};
312 325
@@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
554void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); 567void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
555int amdgpu_gart_init(struct amdgpu_device *adev); 568int amdgpu_gart_init(struct amdgpu_device *adev);
556void amdgpu_gart_fini(struct amdgpu_device *adev); 569void amdgpu_gart_fini(struct amdgpu_device *adev);
557void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, 570int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
558 int pages); 571 int pages);
559int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, 572int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
560 int pages, struct page **pagelist, 573 int pages, struct page **pagelist,
@@ -602,6 +615,7 @@ struct amdgpu_mc {
602 uint32_t srbm_soft_reset; 615 uint32_t srbm_soft_reset;
603 struct amdgpu_mode_mc_save save; 616 struct amdgpu_mode_mc_save save;
604 bool prt_warning; 617 bool prt_warning;
618 uint64_t stolen_size;
605 /* apertures */ 619 /* apertures */
606 u64 shared_aperture_start; 620 u64 shared_aperture_start;
607 u64 shared_aperture_end; 621 u64 shared_aperture_end;
@@ -772,6 +786,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
772 struct dma_fence **f); 786 struct dma_fence **f);
773 787
774/* 788/*
789 * Queue manager
790 */
791struct amdgpu_queue_mapper {
792 int hw_ip;
793 struct mutex lock;
794 /* protected by lock */
795 struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
796};
797
798struct amdgpu_queue_mgr {
799 struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
800};
801
802int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
803 struct amdgpu_queue_mgr *mgr);
804int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
805 struct amdgpu_queue_mgr *mgr);
806int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
807 struct amdgpu_queue_mgr *mgr,
808 int hw_ip, int instance, int ring,
809 struct amdgpu_ring **out_ring);
810
811/*
775 * context related structures 812 * context related structures
776 */ 813 */
777 814
@@ -784,6 +821,7 @@ struct amdgpu_ctx_ring {
784struct amdgpu_ctx { 821struct amdgpu_ctx {
785 struct kref refcount; 822 struct kref refcount;
786 struct amdgpu_device *adev; 823 struct amdgpu_device *adev;
824 struct amdgpu_queue_mgr queue_mgr;
787 unsigned reset_counter; 825 unsigned reset_counter;
788 spinlock_t ring_lock; 826 spinlock_t ring_lock;
789 struct dma_fence **fences; 827 struct dma_fence **fences;
@@ -822,6 +860,7 @@ struct amdgpu_fpriv {
822 struct mutex bo_list_lock; 860 struct mutex bo_list_lock;
823 struct idr bo_list_handles; 861 struct idr bo_list_handles;
824 struct amdgpu_ctx_mgr ctx_mgr; 862 struct amdgpu_ctx_mgr ctx_mgr;
863 u32 vram_lost_counter;
825}; 864};
826 865
827/* 866/*
@@ -830,6 +869,8 @@ struct amdgpu_fpriv {
830 869
831struct amdgpu_bo_list { 870struct amdgpu_bo_list {
832 struct mutex lock; 871 struct mutex lock;
872 struct rcu_head rhead;
873 struct kref refcount;
833 struct amdgpu_bo *gds_obj; 874 struct amdgpu_bo *gds_obj;
834 struct amdgpu_bo *gws_obj; 875 struct amdgpu_bo *gws_obj;
835 struct amdgpu_bo *oa_obj; 876 struct amdgpu_bo *oa_obj;
@@ -893,20 +934,26 @@ struct amdgpu_rlc {
893 u32 *register_restore; 934 u32 *register_restore;
894}; 935};
895 936
937#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
938
896struct amdgpu_mec { 939struct amdgpu_mec {
897 struct amdgpu_bo *hpd_eop_obj; 940 struct amdgpu_bo *hpd_eop_obj;
898 u64 hpd_eop_gpu_addr; 941 u64 hpd_eop_gpu_addr;
899 struct amdgpu_bo *mec_fw_obj; 942 struct amdgpu_bo *mec_fw_obj;
900 u64 mec_fw_gpu_addr; 943 u64 mec_fw_gpu_addr;
901 u32 num_pipe;
902 u32 num_mec; 944 u32 num_mec;
903 u32 num_queue; 945 u32 num_pipe_per_mec;
946 u32 num_queue_per_pipe;
904 void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; 947 void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
948
949 /* These are the resources for which amdgpu takes ownership */
950 DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
905}; 951};
906 952
907struct amdgpu_kiq { 953struct amdgpu_kiq {
908 u64 eop_gpu_addr; 954 u64 eop_gpu_addr;
909 struct amdgpu_bo *eop_obj; 955 struct amdgpu_bo *eop_obj;
956 struct mutex ring_mutex;
910 struct amdgpu_ring ring; 957 struct amdgpu_ring ring;
911 struct amdgpu_irq_src irq; 958 struct amdgpu_irq_src irq;
912}; 959};
@@ -981,9 +1028,15 @@ struct amdgpu_gfx_config {
981}; 1028};
982 1029
983struct amdgpu_cu_info { 1030struct amdgpu_cu_info {
984 uint32_t number; /* total active CU number */ 1031 uint32_t max_waves_per_simd;
985 uint32_t ao_cu_mask;
986 uint32_t wave_front_size; 1032 uint32_t wave_front_size;
1033 uint32_t max_scratch_slots_per_cu;
1034 uint32_t lds_size;
1035
1036 /* total active CU number */
1037 uint32_t number;
1038 uint32_t ao_cu_mask;
1039 uint32_t ao_cu_bitmap[4][4];
987 uint32_t bitmap[4][4]; 1040 uint32_t bitmap[4][4];
988}; 1041};
989 1042
@@ -1061,6 +1114,8 @@ struct amdgpu_gfx {
1061 uint32_t grbm_soft_reset; 1114 uint32_t grbm_soft_reset;
1062 uint32_t srbm_soft_reset; 1115 uint32_t srbm_soft_reset;
1063 bool in_reset; 1116 bool in_reset;
1117 /* s3/s4 mask */
1118 bool in_suspend;
1064 /* NGG */ 1119 /* NGG */
1065 struct amdgpu_ngg ngg; 1120 struct amdgpu_ngg ngg;
1066}; 1121};
@@ -1109,12 +1164,14 @@ struct amdgpu_cs_parser {
1109 1164
1110 /* user fence */ 1165 /* user fence */
1111 struct amdgpu_bo_list_entry uf_entry; 1166 struct amdgpu_bo_list_entry uf_entry;
1167
1168 unsigned num_post_dep_syncobjs;
1169 struct drm_syncobj **post_dep_syncobjs;
1112}; 1170};
1113 1171
1114#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ 1172#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
1115#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ 1173#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
1116#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ 1174#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
1117#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */
1118 1175
1119struct amdgpu_job { 1176struct amdgpu_job {
1120 struct amd_sched_job base; 1177 struct amd_sched_job base;
@@ -1122,6 +1179,8 @@ struct amdgpu_job {
1122 struct amdgpu_vm *vm; 1179 struct amdgpu_vm *vm;
1123 struct amdgpu_ring *ring; 1180 struct amdgpu_ring *ring;
1124 struct amdgpu_sync sync; 1181 struct amdgpu_sync sync;
1182 struct amdgpu_sync dep_sync;
1183 struct amdgpu_sync sched_sync;
1125 struct amdgpu_ib *ibs; 1184 struct amdgpu_ib *ibs;
1126 struct dma_fence *fence; /* the hw fence */ 1185 struct dma_fence *fence; /* the hw fence */
1127 uint32_t preamble_status; 1186 uint32_t preamble_status;
@@ -1129,7 +1188,6 @@ struct amdgpu_job {
1129 void *owner; 1188 void *owner;
1130 uint64_t fence_ctx; /* the fence_context this job uses */ 1189 uint64_t fence_ctx; /* the fence_context this job uses */
1131 bool vm_needs_flush; 1190 bool vm_needs_flush;
1132 bool need_pipeline_sync;
1133 unsigned vm_id; 1191 unsigned vm_id;
1134 uint64_t vm_pd_addr; 1192 uint64_t vm_pd_addr;
1135 uint32_t gds_base, gds_size; 1193 uint32_t gds_base, gds_size;
@@ -1221,6 +1279,9 @@ struct amdgpu_firmware {
1221 const struct amdgpu_psp_funcs *funcs; 1279 const struct amdgpu_psp_funcs *funcs;
1222 struct amdgpu_bo *rbuf; 1280 struct amdgpu_bo *rbuf;
1223 struct mutex mutex; 1281 struct mutex mutex;
1282
1283 /* gpu info firmware data pointer */
1284 const struct firmware *gpu_info_fw;
1224}; 1285};
1225 1286
1226/* 1287/*
@@ -1296,7 +1357,6 @@ struct amdgpu_smumgr {
1296 */ 1357 */
1297struct amdgpu_allowed_register_entry { 1358struct amdgpu_allowed_register_entry {
1298 uint32_t reg_offset; 1359 uint32_t reg_offset;
1299 bool untouched;
1300 bool grbm_indexed; 1360 bool grbm_indexed;
1301}; 1361};
1302 1362
@@ -1424,6 +1484,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
1424typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); 1484typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
1425typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); 1485typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
1426 1486
1487#define AMDGPU_RESET_MAGIC_NUM 64
1427struct amdgpu_device { 1488struct amdgpu_device {
1428 struct device *dev; 1489 struct device *dev;
1429 struct drm_device *ddev; 1490 struct drm_device *ddev;
@@ -1523,7 +1584,9 @@ struct amdgpu_device {
1523 atomic64_t gtt_usage; 1584 atomic64_t gtt_usage;
1524 atomic64_t num_bytes_moved; 1585 atomic64_t num_bytes_moved;
1525 atomic64_t num_evictions; 1586 atomic64_t num_evictions;
1587 atomic64_t num_vram_cpu_page_faults;
1526 atomic_t gpu_reset_counter; 1588 atomic_t gpu_reset_counter;
1589 atomic_t vram_lost_counter;
1527 1590
1528 /* data for buffer migration throttling */ 1591 /* data for buffer migration throttling */
1529 struct { 1592 struct {
@@ -1570,11 +1633,18 @@ struct amdgpu_device {
1570 /* sdma */ 1633 /* sdma */
1571 struct amdgpu_sdma sdma; 1634 struct amdgpu_sdma sdma;
1572 1635
1573 /* uvd */ 1636 union {
1574 struct amdgpu_uvd uvd; 1637 struct {
1638 /* uvd */
1639 struct amdgpu_uvd uvd;
1640
1641 /* vce */
1642 struct amdgpu_vce vce;
1643 };
1575 1644
1576 /* vce */ 1645 /* vcn */
1577 struct amdgpu_vce vce; 1646 struct amdgpu_vcn vcn;
1647 };
1578 1648
1579 /* firmwares */ 1649 /* firmwares */
1580 struct amdgpu_firmware firmware; 1650 struct amdgpu_firmware firmware;
@@ -1598,6 +1668,9 @@ struct amdgpu_device {
1598 /* amdkfd interface */ 1668 /* amdkfd interface */
1599 struct kfd_dev *kfd; 1669 struct kfd_dev *kfd;
1600 1670
1671 /* delayed work_func for deferring clockgating during resume */
1672 struct delayed_work late_init_work;
1673
1601 struct amdgpu_virt virt; 1674 struct amdgpu_virt virt;
1602 1675
1603 /* link all shadow bo */ 1676 /* link all shadow bo */
@@ -1606,9 +1679,13 @@ struct amdgpu_device {
1606 /* link all gtt */ 1679 /* link all gtt */
1607 spinlock_t gtt_list_lock; 1680 spinlock_t gtt_list_lock;
1608 struct list_head gtt_list; 1681 struct list_head gtt_list;
1682 /* keep an lru list of rings by HW IP */
1683 struct list_head ring_lru_list;
1684 spinlock_t ring_lru_list_lock;
1609 1685
1610 /* record hw reset is performed */ 1686 /* record hw reset is performed */
1611 bool has_hw_reset; 1687 bool has_hw_reset;
1688 u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
1612 1689
1613}; 1690};
1614 1691
@@ -1617,7 +1694,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
1617 return container_of(bdev, struct amdgpu_device, mman.bdev); 1694 return container_of(bdev, struct amdgpu_device, mman.bdev);
1618} 1695}
1619 1696
1620bool amdgpu_device_is_px(struct drm_device *dev);
1621int amdgpu_device_init(struct amdgpu_device *adev, 1697int amdgpu_device_init(struct amdgpu_device *adev,
1622 struct drm_device *ddev, 1698 struct drm_device *ddev,
1623 struct pci_dev *pdev, 1699 struct pci_dev *pdev,
@@ -1733,30 +1809,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
1733 unsigned occupied, chunk1, chunk2; 1809 unsigned occupied, chunk1, chunk2;
1734 void *dst; 1810 void *dst;
1735 1811
1736 if (ring->count_dw < count_dw) { 1812 if (unlikely(ring->count_dw < count_dw)) {
1737 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); 1813 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
1738 } else { 1814 return;
1739 occupied = ring->wptr & ring->buf_mask; 1815 }
1740 dst = (void *)&ring->ring[occupied]; 1816
1741 chunk1 = ring->buf_mask + 1 - occupied; 1817 occupied = ring->wptr & ring->buf_mask;
1742 chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; 1818 dst = (void *)&ring->ring[occupied];
1743 chunk2 = count_dw - chunk1; 1819 chunk1 = ring->buf_mask + 1 - occupied;
1744 chunk1 <<= 2; 1820 chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
1745 chunk2 <<= 2; 1821 chunk2 = count_dw - chunk1;
1746 1822 chunk1 <<= 2;
1747 if (chunk1) 1823 chunk2 <<= 2;
1748 memcpy(dst, src, chunk1); 1824
1749 1825 if (chunk1)
1750 if (chunk2) { 1826 memcpy(dst, src, chunk1);
1751 src += chunk1; 1827
1752 dst = (void *)ring->ring; 1828 if (chunk2) {
1753 memcpy(dst, src, chunk2); 1829 src += chunk1;
1754 } 1830 dst = (void *)ring->ring;
1755 1831 memcpy(dst, src, chunk2);
1756 ring->wptr += count_dw;
1757 ring->wptr &= ring->ptr_mask;
1758 ring->count_dw -= count_dw;
1759 } 1832 }
1833
1834 ring->wptr += count_dw;
1835 ring->wptr &= ring->ptr_mask;
1836 ring->count_dw -= count_dw;
1760} 1837}
1761 1838
1762static inline struct amdgpu_sdma_instance * 1839static inline struct amdgpu_sdma_instance *
@@ -1792,6 +1869,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1792#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) 1869#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
1793#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) 1870#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
1794#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) 1871#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
1872#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
1795#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) 1873#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
1796#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) 1874#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
1797#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) 1875#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -1813,6 +1891,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1813#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) 1891#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
1814#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) 1892#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
1815#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) 1893#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
1894#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
1816#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 1895#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
1817#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 1896#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
1818#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) 1897#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
@@ -1848,10 +1927,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
1848bool amdgpu_need_post(struct amdgpu_device *adev); 1927bool amdgpu_need_post(struct amdgpu_device *adev);
1849void amdgpu_update_display_priority(struct amdgpu_device *adev); 1928void amdgpu_update_display_priority(struct amdgpu_device *adev);
1850 1929
1851int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
1852int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
1853 u32 ip_instance, u32 ring,
1854 struct amdgpu_ring **out_ring);
1855void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); 1930void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
1856void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); 1931void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
1857bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 1932bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
@@ -1900,6 +1975,8 @@ static inline bool amdgpu_has_atpx(void) { return false; }
1900extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; 1975extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
1901extern const int amdgpu_max_kms_ioctl; 1976extern const int amdgpu_max_kms_ioctl;
1902 1977
1978bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
1979 struct amdgpu_fpriv *fpriv);
1903int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); 1980int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
1904void amdgpu_driver_unload_kms(struct drm_device *dev); 1981void amdgpu_driver_unload_kms(struct drm_device *dev);
1905void amdgpu_driver_lastclose_kms(struct drm_device *dev); 1982void amdgpu_driver_lastclose_kms(struct drm_device *dev);
@@ -1912,10 +1989,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
1912u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); 1989u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
1913int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe); 1990int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
1914void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); 1991void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
1915int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
1916 int *max_error,
1917 struct timeval *vblank_time,
1918 unsigned flags);
1919long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, 1992long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
1920 unsigned long arg); 1993 unsigned long arg);
1921 1994
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b25e66..37971d9402e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -24,6 +24,7 @@
24#include "amd_shared.h" 24#include "amd_shared.h"
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
27#include <linux/module.h> 28#include <linux/module.h>
28 29
29const struct kfd2kgd_calls *kfd2kgd; 30const struct kfd2kgd_calls *kfd2kgd;
@@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void)
60 return ret; 61 return ret;
61} 62}
62 63
63bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) 64bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
64{ 65{
65 switch (rdev->asic_type) { 66 switch (adev->asic_type) {
66#ifdef CONFIG_DRM_AMDGPU_CIK 67#ifdef CONFIG_DRM_AMDGPU_CIK
67 case CHIP_KAVERI: 68 case CHIP_KAVERI:
68 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); 69 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@@ -86,59 +87,82 @@ void amdgpu_amdkfd_fini(void)
86 } 87 }
87} 88}
88 89
89void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) 90void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
90{ 91{
91 if (kgd2kfd) 92 if (kgd2kfd)
92 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, 93 adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
93 rdev->pdev, kfd2kgd); 94 adev->pdev, kfd2kgd);
94} 95}
95 96
96void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) 97void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
97{ 98{
98 if (rdev->kfd) { 99 int i;
100 int last_valid_bit;
101 if (adev->kfd) {
99 struct kgd2kfd_shared_resources gpu_resources = { 102 struct kgd2kfd_shared_resources gpu_resources = {
100 .compute_vmid_bitmap = 0xFF00, 103 .compute_vmid_bitmap = 0xFF00,
101 104 .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
102 .first_compute_pipe = 1, 105 .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
103 .compute_pipe_count = 4 - 1,
104 }; 106 };
105 107
106 amdgpu_doorbell_get_kfd_info(rdev, 108 /* this is going to have a few of the MSBs set that we need to
109 * clear */
110 bitmap_complement(gpu_resources.queue_bitmap,
111 adev->gfx.mec.queue_bitmap,
112 KGD_MAX_QUEUES);
113
114 /* remove the KIQ bit as well */
115 if (adev->gfx.kiq.ring.ready)
116 clear_bit(amdgpu_gfx_queue_to_bit(adev,
117 adev->gfx.kiq.ring.me - 1,
118 adev->gfx.kiq.ring.pipe,
119 adev->gfx.kiq.ring.queue),
120 gpu_resources.queue_bitmap);
121
122 /* According to linux/bitmap.h we shouldn't use bitmap_clear if
123 * nbits is not compile time constant */
124 last_valid_bit = 1 /* only first MEC can have compute queues */
125 * adev->gfx.mec.num_pipe_per_mec
126 * adev->gfx.mec.num_queue_per_pipe;
127 for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
128 clear_bit(i, gpu_resources.queue_bitmap);
129
130 amdgpu_doorbell_get_kfd_info(adev,
107 &gpu_resources.doorbell_physical_address, 131 &gpu_resources.doorbell_physical_address,
108 &gpu_resources.doorbell_aperture_size, 132 &gpu_resources.doorbell_aperture_size,
109 &gpu_resources.doorbell_start_offset); 133 &gpu_resources.doorbell_start_offset);
110 134
111 kgd2kfd->device_init(rdev->kfd, &gpu_resources); 135 kgd2kfd->device_init(adev->kfd, &gpu_resources);
112 } 136 }
113} 137}
114 138
115void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) 139void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
116{ 140{
117 if (rdev->kfd) { 141 if (adev->kfd) {
118 kgd2kfd->device_exit(rdev->kfd); 142 kgd2kfd->device_exit(adev->kfd);
119 rdev->kfd = NULL; 143 adev->kfd = NULL;
120 } 144 }
121} 145}
122 146
123void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, 147void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
124 const void *ih_ring_entry) 148 const void *ih_ring_entry)
125{ 149{
126 if (rdev->kfd) 150 if (adev->kfd)
127 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); 151 kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
128} 152}
129 153
130void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) 154void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
131{ 155{
132 if (rdev->kfd) 156 if (adev->kfd)
133 kgd2kfd->suspend(rdev->kfd); 157 kgd2kfd->suspend(adev->kfd);
134} 158}
135 159
136int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) 160int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
137{ 161{
138 int r = 0; 162 int r = 0;
139 163
140 if (rdev->kfd) 164 if (adev->kfd)
141 r = kgd2kfd->resume(rdev->kfd); 165 r = kgd2kfd->resume(adev->kfd);
142 166
143 return r; 167 return r;
144} 168}
@@ -147,7 +171,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
147 void **mem_obj, uint64_t *gpu_addr, 171 void **mem_obj, uint64_t *gpu_addr,
148 void **cpu_ptr) 172 void **cpu_ptr)
149{ 173{
150 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; 174 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
151 struct kgd_mem **mem = (struct kgd_mem **) mem_obj; 175 struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
152 int r; 176 int r;
153 177
@@ -159,10 +183,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
159 if ((*mem) == NULL) 183 if ((*mem) == NULL)
160 return -ENOMEM; 184 return -ENOMEM;
161 185
162 r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 186 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
163 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); 187 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
164 if (r) { 188 if (r) {
165 dev_err(rdev->dev, 189 dev_err(adev->dev,
166 "failed to allocate BO for amdkfd (%d)\n", r); 190 "failed to allocate BO for amdkfd (%d)\n", r);
167 return r; 191 return r;
168 } 192 }
@@ -170,21 +194,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
170 /* map the buffer */ 194 /* map the buffer */
171 r = amdgpu_bo_reserve((*mem)->bo, true); 195 r = amdgpu_bo_reserve((*mem)->bo, true);
172 if (r) { 196 if (r) {
173 dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 197 dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
174 goto allocate_mem_reserve_bo_failed; 198 goto allocate_mem_reserve_bo_failed;
175 } 199 }
176 200
177 r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, 201 r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
178 &(*mem)->gpu_addr); 202 &(*mem)->gpu_addr);
179 if (r) { 203 if (r) {
180 dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); 204 dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
181 goto allocate_mem_pin_bo_failed; 205 goto allocate_mem_pin_bo_failed;
182 } 206 }
183 *gpu_addr = (*mem)->gpu_addr; 207 *gpu_addr = (*mem)->gpu_addr;
184 208
185 r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); 209 r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
186 if (r) { 210 if (r) {
187 dev_err(rdev->dev, 211 dev_err(adev->dev,
188 "(%d) failed to map bo to kernel for amdkfd\n", r); 212 "(%d) failed to map bo to kernel for amdkfd\n", r);
189 goto allocate_mem_kmap_bo_failed; 213 goto allocate_mem_kmap_bo_failed;
190 } 214 }
@@ -220,27 +244,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
220 244
221uint64_t get_vmem_size(struct kgd_dev *kgd) 245uint64_t get_vmem_size(struct kgd_dev *kgd)
222{ 246{
223 struct amdgpu_device *rdev = 247 struct amdgpu_device *adev =
224 (struct amdgpu_device *)kgd; 248 (struct amdgpu_device *)kgd;
225 249
226 BUG_ON(kgd == NULL); 250 BUG_ON(kgd == NULL);
227 251
228 return rdev->mc.real_vram_size; 252 return adev->mc.real_vram_size;
229} 253}
230 254
231uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) 255uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
232{ 256{
233 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; 257 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
234 258
235 if (rdev->gfx.funcs->get_gpu_clock_counter) 259 if (adev->gfx.funcs->get_gpu_clock_counter)
236 return rdev->gfx.funcs->get_gpu_clock_counter(rdev); 260 return adev->gfx.funcs->get_gpu_clock_counter(adev);
237 return 0; 261 return 0;
238} 262}
239 263
240uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 264uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
241{ 265{
242 struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; 266 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
243 267
244 /* The sclk is in quantas of 10kHz */ 268 /* The sclk is in quantas of 10kHz */
245 return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; 269 return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
246} 270}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index de530f68d4e3..73f83a10ae14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -39,15 +39,15 @@ struct kgd_mem {
39int amdgpu_amdkfd_init(void); 39int amdgpu_amdkfd_init(void);
40void amdgpu_amdkfd_fini(void); 40void amdgpu_amdkfd_fini(void);
41 41
42bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); 42bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
43 43
44void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); 44void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
45int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); 45int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
46void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, 46void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
47 const void *ih_ring_entry); 47 const void *ih_ring_entry);
48void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); 48void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
49void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); 49void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
50void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); 50void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
51 51
52struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); 52struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
53struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); 53struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7cccbc..5254562fd0f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -29,6 +29,7 @@
29#include "cikd.h" 29#include "cikd.h"
30#include "cik_sdma.h" 30#include "cik_sdma.h"
31#include "amdgpu_ucode.h" 31#include "amdgpu_ucode.h"
32#include "gfx_v7_0.h"
32#include "gca/gfx_7_2_d.h" 33#include "gca/gfx_7_2_d.h"
33#include "gca/gfx_7_2_enum.h" 34#include "gca/gfx_7_2_enum.h"
34#include "gca/gfx_7_2_sh_mask.h" 35#include "gca/gfx_7_2_sh_mask.h"
@@ -38,8 +39,6 @@
38#include "gmc/gmc_7_1_sh_mask.h" 39#include "gmc/gmc_7_1_sh_mask.h"
39#include "cik_structs.h" 40#include "cik_structs.h"
40 41
41#define CIK_PIPE_PER_MEC (4)
42
43enum { 42enum {
44 MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 43 MAX_TRAPID = 8, /* 3 bits in the bitfield. */
45 MAX_WATCH_ADDRESSES = 4 44 MAX_WATCH_ADDRESSES = 4
@@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
185static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 184static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
186 uint32_t queue_id) 185 uint32_t queue_id)
187{ 186{
188 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; 187 struct amdgpu_device *adev = get_amdgpu_device(kgd);
189 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); 188
189 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
190 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
190 191
191 lock_srbm(kgd, mec, pipe, queue_id, 0); 192 lock_srbm(kgd, mec, pipe, queue_id, 0);
192} 193}
@@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
243static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 244static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
244 uint32_t hpd_size, uint64_t hpd_gpu_addr) 245 uint32_t hpd_size, uint64_t hpd_gpu_addr)
245{ 246{
246 struct amdgpu_device *adev = get_amdgpu_device(kgd); 247 /* amdgpu owns the per-pipe state */
247
248 uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
249 uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
250
251 lock_srbm(kgd, mec, pipe, 0, 0);
252 WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
253 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
254 WREG32(mmCP_HPD_EOP_VMID, 0);
255 WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
256 unlock_srbm(kgd);
257
258 return 0; 248 return 0;
259} 249}
260 250
@@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
264 uint32_t mec; 254 uint32_t mec;
265 uint32_t pipe; 255 uint32_t pipe;
266 256
267 mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; 257 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
268 pipe = (pipe_id % CIK_PIPE_PER_MEC); 258 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
269 259
270 lock_srbm(kgd, mec, pipe, 0, 0); 260 lock_srbm(kgd, mec, pipe, 0, 0);
271 261
@@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
309 m = get_mqd(mqd); 299 m = get_mqd(mqd);
310 300
311 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); 301 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
312
313 acquire_queue(kgd, pipe_id, queue_id);
314 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
315 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
316 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
317
318 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
319 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
320 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
321
322 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
323 WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
324 WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
325
326 WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
327
328 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
329 WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
330 WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
331
332 WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
333 WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
334 WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
335 WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
336
337 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
338 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
339 m->cp_hqd_pq_rptr_report_addr_hi);
340
341 WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
342
343 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
344 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
345
346 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
347
348 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
349
350 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
351
352 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
353 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
354
355 WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
356
357 if (is_wptr_shadow_valid) 302 if (is_wptr_shadow_valid)
358 WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); 303 m->cp_hqd_pq_wptr = wptr_shadow;
359 304
360 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); 305 acquire_queue(kgd, pipe_id, queue_id);
306 gfx_v7_0_mqd_commit(adev, m);
361 release_queue(kgd); 307 release_queue(kgd);
362 308
363 return 0; 309 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6697612239c2..133d06671e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -28,6 +28,7 @@
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_amdkfd.h" 29#include "amdgpu_amdkfd.h"
30#include "amdgpu_ucode.h" 30#include "amdgpu_ucode.h"
31#include "gfx_v8_0.h"
31#include "gca/gfx_8_0_sh_mask.h" 32#include "gca/gfx_8_0_sh_mask.h"
32#include "gca/gfx_8_0_d.h" 33#include "gca/gfx_8_0_d.h"
33#include "gca/gfx_8_0_enum.h" 34#include "gca/gfx_8_0_enum.h"
@@ -38,8 +39,6 @@
38#include "vi_structs.h" 39#include "vi_structs.h"
39#include "vid.h" 40#include "vid.h"
40 41
41#define VI_PIPE_PER_MEC (4)
42
43struct cik_sdma_rlc_registers; 42struct cik_sdma_rlc_registers;
44 43
45/* 44/*
@@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
146static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, 145static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
147 uint32_t queue_id) 146 uint32_t queue_id)
148{ 147{
149 uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; 148 struct amdgpu_device *adev = get_amdgpu_device(kgd);
150 uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); 149
150 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
151 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
151 152
152 lock_srbm(kgd, mec, pipe, queue_id, 0); 153 lock_srbm(kgd, mec, pipe, queue_id, 0);
153} 154}
@@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
205static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, 206static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
206 uint32_t hpd_size, uint64_t hpd_gpu_addr) 207 uint32_t hpd_size, uint64_t hpd_gpu_addr)
207{ 208{
209 /* amdgpu owns the per-pipe state */
208 return 0; 210 return 0;
209} 211}
210 212
@@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
214 uint32_t mec; 216 uint32_t mec;
215 uint32_t pipe; 217 uint32_t pipe;
216 218
217 mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; 219 mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
218 pipe = (pipe_id % VI_PIPE_PER_MEC); 220 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
219 221
220 lock_srbm(kgd, mec, pipe, 0, 0); 222 lock_srbm(kgd, mec, pipe, 0, 0);
221 223
@@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
251 m = get_mqd(mqd); 253 m = get_mqd(mqd);
252 254
253 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); 255 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
254 acquire_queue(kgd, pipe_id, queue_id); 256 if (valid_wptr == 0)
255 257 m->cp_hqd_pq_wptr = shadow_wptr;
256 WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
257 WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
258 WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
259
260 WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
261 WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
262 WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
263 WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
264 WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
265 WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
266 WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
267 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
268 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
269 m->cp_hqd_pq_rptr_report_addr_hi);
270
271 if (valid_wptr > 0)
272 WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
273
274 WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
275 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
276
277 WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
278 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
279 WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
280 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
281 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
282 WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
283
284 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
285 WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
286 WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
287 WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
288 WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
289 WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
290 WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
291
292 WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
293
294 WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
295 WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
296 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
297 WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
298
299 WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
300 258
259 acquire_queue(kgd, pipe_id, queue_id);
260 gfx_v8_0_mqd_commit(adev, mqd);
301 release_queue(kgd); 261 release_queue(kgd);
302 262
303 return 0; 263 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 1cf78f4dd339..1e8e1123ddf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
693 DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", 693 DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
694 adev->clock.default_dispclk / 100); 694 adev->clock.default_dispclk / 100);
695 adev->clock.default_dispclk = 60000; 695 adev->clock.default_dispclk = 60000;
696 } else if (adev->clock.default_dispclk <= 60000) {
697 DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
698 adev->clock.default_dispclk / 100);
699 adev->clock.default_dispclk = 62500;
696 } 700 }
697 adev->clock.dp_extclk = 701 adev->clock.dp_extclk =
698 le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); 702 le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index a6649874e6ce..f621ee115c98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -35,33 +35,59 @@
35#define AMDGPU_BO_LIST_MAX_PRIORITY 32u 35#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
36#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) 36#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
37 37
38static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, 38static int amdgpu_bo_list_set(struct amdgpu_device *adev,
39 struct amdgpu_bo_list **result, 39 struct drm_file *filp,
40 struct amdgpu_bo_list *list,
41 struct drm_amdgpu_bo_list_entry *info,
42 unsigned num_entries);
43
44static void amdgpu_bo_list_release_rcu(struct kref *ref)
45{
46 unsigned i;
47 struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
48 refcount);
49
50 for (i = 0; i < list->num_entries; ++i)
51 amdgpu_bo_unref(&list->array[i].robj);
52
53 mutex_destroy(&list->lock);
54 kvfree(list->array);
55 kfree_rcu(list, rhead);
56}
57
58static int amdgpu_bo_list_create(struct amdgpu_device *adev,
59 struct drm_file *filp,
60 struct drm_amdgpu_bo_list_entry *info,
61 unsigned num_entries,
40 int *id) 62 int *id)
41{ 63{
42 int r; 64 int r;
65 struct amdgpu_fpriv *fpriv = filp->driver_priv;
66 struct amdgpu_bo_list *list;
43 67
44 *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); 68 list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
45 if (!*result) 69 if (!list)
46 return -ENOMEM; 70 return -ENOMEM;
47 71
72 /* initialize bo list*/
73 mutex_init(&list->lock);
74 kref_init(&list->refcount);
75 r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
76 if (r) {
77 kfree(list);
78 return r;
79 }
80
81 /* idr alloc should be called only after initialization of bo list. */
48 mutex_lock(&fpriv->bo_list_lock); 82 mutex_lock(&fpriv->bo_list_lock);
49 r = idr_alloc(&fpriv->bo_list_handles, *result, 83 r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
50 1, 0, GFP_KERNEL); 84 mutex_unlock(&fpriv->bo_list_lock);
51 if (r < 0) { 85 if (r < 0) {
52 mutex_unlock(&fpriv->bo_list_lock); 86 kfree(list);
53 kfree(*result);
54 return r; 87 return r;
55 } 88 }
56 *id = r; 89 *id = r;
57 90
58 mutex_init(&(*result)->lock);
59 (*result)->num_entries = 0;
60 (*result)->array = NULL;
61
62 mutex_lock(&(*result)->lock);
63 mutex_unlock(&fpriv->bo_list_lock);
64
65 return 0; 91 return 0;
66} 92}
67 93
@@ -71,13 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
71 97
72 mutex_lock(&fpriv->bo_list_lock); 98 mutex_lock(&fpriv->bo_list_lock);
73 list = idr_remove(&fpriv->bo_list_handles, id); 99 list = idr_remove(&fpriv->bo_list_handles, id);
74 if (list) {
75 /* Another user may have a reference to this list still */
76 mutex_lock(&list->lock);
77 mutex_unlock(&list->lock);
78 amdgpu_bo_list_free(list);
79 }
80 mutex_unlock(&fpriv->bo_list_lock); 100 mutex_unlock(&fpriv->bo_list_lock);
101 if (list)
102 kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
81} 103}
82 104
83static int amdgpu_bo_list_set(struct amdgpu_device *adev, 105static int amdgpu_bo_list_set(struct amdgpu_device *adev,
@@ -96,7 +118,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
96 int r; 118 int r;
97 unsigned long total_size = 0; 119 unsigned long total_size = 0;
98 120
99 array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); 121 array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
100 if (!array) 122 if (!array)
101 return -ENOMEM; 123 return -ENOMEM;
102 memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); 124 memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
@@ -148,7 +170,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
148 for (i = 0; i < list->num_entries; ++i) 170 for (i = 0; i < list->num_entries; ++i)
149 amdgpu_bo_unref(&list->array[i].robj); 171 amdgpu_bo_unref(&list->array[i].robj);
150 172
151 drm_free_large(list->array); 173 kvfree(list->array);
152 174
153 list->gds_obj = gds_obj; 175 list->gds_obj = gds_obj;
154 list->gws_obj = gws_obj; 176 list->gws_obj = gws_obj;
@@ -163,7 +185,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
163error_free: 185error_free:
164 while (i--) 186 while (i--)
165 amdgpu_bo_unref(&array[i].robj); 187 amdgpu_bo_unref(&array[i].robj);
166 drm_free_large(array); 188 kvfree(array);
167 return r; 189 return r;
168} 190}
169 191
@@ -172,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
172{ 194{
173 struct amdgpu_bo_list *result; 195 struct amdgpu_bo_list *result;
174 196
175 mutex_lock(&fpriv->bo_list_lock); 197 rcu_read_lock();
176 result = idr_find(&fpriv->bo_list_handles, id); 198 result = idr_find(&fpriv->bo_list_handles, id);
177 if (result) 199
178 mutex_lock(&result->lock); 200 if (result) {
179 mutex_unlock(&fpriv->bo_list_lock); 201 if (kref_get_unless_zero(&result->refcount))
202 mutex_lock(&result->lock);
203 else
204 result = NULL;
205 }
206 rcu_read_unlock();
207
180 return result; 208 return result;
181} 209}
182 210
@@ -214,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
214void amdgpu_bo_list_put(struct amdgpu_bo_list *list) 242void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
215{ 243{
216 mutex_unlock(&list->lock); 244 mutex_unlock(&list->lock);
245 kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
217} 246}
218 247
219void amdgpu_bo_list_free(struct amdgpu_bo_list *list) 248void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
@@ -224,7 +253,7 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
224 amdgpu_bo_unref(&list->array[i].robj); 253 amdgpu_bo_unref(&list->array[i].robj);
225 254
226 mutex_destroy(&list->lock); 255 mutex_destroy(&list->lock);
227 drm_free_large(list->array); 256 kvfree(list->array);
228 kfree(list); 257 kfree(list);
229} 258}
230 259
@@ -244,8 +273,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
244 273
245 int r; 274 int r;
246 275
247 info = drm_malloc_ab(args->in.bo_number, 276 info = kvmalloc_array(args->in.bo_number,
248 sizeof(struct drm_amdgpu_bo_list_entry)); 277 sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
249 if (!info) 278 if (!info)
250 return -ENOMEM; 279 return -ENOMEM;
251 280
@@ -273,16 +302,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
273 302
274 switch (args->in.operation) { 303 switch (args->in.operation) {
275 case AMDGPU_BO_LIST_OP_CREATE: 304 case AMDGPU_BO_LIST_OP_CREATE:
276 r = amdgpu_bo_list_create(fpriv, &list, &handle); 305 r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
306 &handle);
277 if (r) 307 if (r)
278 goto error_free; 308 goto error_free;
279
280 r = amdgpu_bo_list_set(adev, filp, list, info,
281 args->in.bo_number);
282 amdgpu_bo_list_put(list);
283 if (r)
284 goto error_free;
285
286 break; 309 break;
287 310
288 case AMDGPU_BO_LIST_OP_DESTROY: 311 case AMDGPU_BO_LIST_OP_DESTROY:
@@ -311,11 +334,11 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
311 334
312 memset(args, 0, sizeof(*args)); 335 memset(args, 0, sizeof(*args));
313 args->out.list_handle = handle; 336 args->out.list_handle = handle;
314 drm_free_large(info); 337 kvfree(info);
315 338
316 return 0; 339 return 0;
317 340
318error_free: 341error_free:
319 drm_free_large(info); 342 kvfree(info);
320 return r; 343 return r;
321} 344}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index c6dba1eaefbd..c0a806280257 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -838,6 +838,12 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
838 return -EINVAL; 838 return -EINVAL;
839 839
840 mode_info = info->mode_info; 840 mode_info = info->mode_info;
841 if (mode_info) {
842 /* if the displays are off, vblank time is max */
843 mode_info->vblank_time_us = 0xffffffff;
844 /* always set the reference clock */
845 mode_info->ref_clock = adev->clock.spll.reference_freq;
846 }
841 847
842 if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { 848 if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
843 list_for_each_entry(crtc, 849 list_for_each_entry(crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4e6b9501ab0a..5599c01b265d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -27,81 +27,10 @@
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <drm/drmP.h> 28#include <drm/drmP.h>
29#include <drm/amdgpu_drm.h> 29#include <drm/amdgpu_drm.h>
30#include <drm/drm_syncobj.h>
30#include "amdgpu.h" 31#include "amdgpu.h"
31#include "amdgpu_trace.h" 32#include "amdgpu_trace.h"
32 33
33int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
34 u32 ip_instance, u32 ring,
35 struct amdgpu_ring **out_ring)
36{
37 /* Right now all IPs have only one instance - multiple rings. */
38 if (ip_instance != 0) {
39 DRM_ERROR("invalid ip instance: %d\n", ip_instance);
40 return -EINVAL;
41 }
42
43 switch (ip_type) {
44 default:
45 DRM_ERROR("unknown ip type: %d\n", ip_type);
46 return -EINVAL;
47 case AMDGPU_HW_IP_GFX:
48 if (ring < adev->gfx.num_gfx_rings) {
49 *out_ring = &adev->gfx.gfx_ring[ring];
50 } else {
51 DRM_ERROR("only %d gfx rings are supported now\n",
52 adev->gfx.num_gfx_rings);
53 return -EINVAL;
54 }
55 break;
56 case AMDGPU_HW_IP_COMPUTE:
57 if (ring < adev->gfx.num_compute_rings) {
58 *out_ring = &adev->gfx.compute_ring[ring];
59 } else {
60 DRM_ERROR("only %d compute rings are supported now\n",
61 adev->gfx.num_compute_rings);
62 return -EINVAL;
63 }
64 break;
65 case AMDGPU_HW_IP_DMA:
66 if (ring < adev->sdma.num_instances) {
67 *out_ring = &adev->sdma.instance[ring].ring;
68 } else {
69 DRM_ERROR("only %d SDMA rings are supported\n",
70 adev->sdma.num_instances);
71 return -EINVAL;
72 }
73 break;
74 case AMDGPU_HW_IP_UVD:
75 *out_ring = &adev->uvd.ring;
76 break;
77 case AMDGPU_HW_IP_VCE:
78 if (ring < adev->vce.num_rings){
79 *out_ring = &adev->vce.ring[ring];
80 } else {
81 DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings);
82 return -EINVAL;
83 }
84 break;
85 case AMDGPU_HW_IP_UVD_ENC:
86 if (ring < adev->uvd.num_enc_rings){
87 *out_ring = &adev->uvd.ring_enc[ring];
88 } else {
89 DRM_ERROR("only %d UVD ENC rings are supported\n",
90 adev->uvd.num_enc_rings);
91 return -EINVAL;
92 }
93 break;
94 }
95
96 if (!(*out_ring && (*out_ring)->adev)) {
97 DRM_ERROR("Ring %d is not initialized on IP %d\n",
98 ring, ip_type);
99 return -EINVAL;
100 }
101
102 return 0;
103}
104
105static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 34static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
106 struct drm_amdgpu_cs_chunk_fence *data, 35 struct drm_amdgpu_cs_chunk_fence *data,
107 uint32_t *offset) 36 uint32_t *offset)
@@ -135,7 +64,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
135 return 0; 64 return 0;
136} 65}
137 66
138int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 67static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
139{ 68{
140 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 69 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
141 struct amdgpu_vm *vm = &fpriv->vm; 70 struct amdgpu_vm *vm = &fpriv->vm;
@@ -194,7 +123,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
194 size = p->chunks[i].length_dw; 123 size = p->chunks[i].length_dw;
195 cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; 124 cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
196 125
197 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 126 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
198 if (p->chunks[i].kdata == NULL) { 127 if (p->chunks[i].kdata == NULL) {
199 ret = -ENOMEM; 128 ret = -ENOMEM;
200 i--; 129 i--;
@@ -226,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
226 break; 155 break;
227 156
228 case AMDGPU_CHUNK_ID_DEPENDENCIES: 157 case AMDGPU_CHUNK_ID_DEPENDENCIES:
158 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
159 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
229 break; 160 break;
230 161
231 default: 162 default:
@@ -247,7 +178,7 @@ free_all_kdata:
247 i = p->nchunks - 1; 178 i = p->nchunks - 1;
248free_partial_kdata: 179free_partial_kdata:
249 for (; i >= 0; i--) 180 for (; i >= 0; i--)
250 drm_free_large(p->chunks[i].kdata); 181 kvfree(p->chunks[i].kdata);
251 kfree(p->chunks); 182 kfree(p->chunks);
252 p->chunks = NULL; 183 p->chunks = NULL;
253 p->nchunks = 0; 184 p->nchunks = 0;
@@ -505,7 +436,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
505 return r; 436 return r;
506 437
507 if (binding_userptr) { 438 if (binding_userptr) {
508 drm_free_large(lobj->user_pages); 439 kvfree(lobj->user_pages);
509 lobj->user_pages = NULL; 440 lobj->user_pages = NULL;
510 } 441 }
511 } 442 }
@@ -566,12 +497,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
566 &e->user_invalidated) && e->user_pages) { 497 &e->user_invalidated) && e->user_pages) {
567 498
568 /* We acquired a page array, but somebody 499 /* We acquired a page array, but somebody
569 * invalidated it. Free it an try again 500 * invalidated it. Free it and try again
570 */ 501 */
571 release_pages(e->user_pages, 502 release_pages(e->user_pages,
572 e->robj->tbo.ttm->num_pages, 503 e->robj->tbo.ttm->num_pages,
573 false); 504 false);
574 drm_free_large(e->user_pages); 505 kvfree(e->user_pages);
575 e->user_pages = NULL; 506 e->user_pages = NULL;
576 } 507 }
577 508
@@ -597,12 +528,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
597 goto error_free_pages; 528 goto error_free_pages;
598 } 529 }
599 530
600 /* Fill the page arrays for all useptrs. */ 531 /* Fill the page arrays for all userptrs. */
601 list_for_each_entry(e, &need_pages, tv.head) { 532 list_for_each_entry(e, &need_pages, tv.head) {
602 struct ttm_tt *ttm = e->robj->tbo.ttm; 533 struct ttm_tt *ttm = e->robj->tbo.ttm;
603 534
604 e->user_pages = drm_calloc_large(ttm->num_pages, 535 e->user_pages = kvmalloc_array(ttm->num_pages,
605 sizeof(struct page*)); 536 sizeof(struct page*),
537 GFP_KERNEL | __GFP_ZERO);
606 if (!e->user_pages) { 538 if (!e->user_pages) {
607 r = -ENOMEM; 539 r = -ENOMEM;
608 DRM_ERROR("calloc failure in %s\n", __func__); 540 DRM_ERROR("calloc failure in %s\n", __func__);
@@ -612,7 +544,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
612 r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); 544 r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
613 if (r) { 545 if (r) {
614 DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); 546 DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
615 drm_free_large(e->user_pages); 547 kvfree(e->user_pages);
616 e->user_pages = NULL; 548 e->user_pages = NULL;
617 goto error_free_pages; 549 goto error_free_pages;
618 } 550 }
@@ -708,7 +640,7 @@ error_free_pages:
708 release_pages(e->user_pages, 640 release_pages(e->user_pages,
709 e->robj->tbo.ttm->num_pages, 641 e->robj->tbo.ttm->num_pages,
710 false); 642 false);
711 drm_free_large(e->user_pages); 643 kvfree(e->user_pages);
712 } 644 }
713 } 645 }
714 646
@@ -753,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
753 ttm_eu_backoff_reservation(&parser->ticket, 685 ttm_eu_backoff_reservation(&parser->ticket,
754 &parser->validated); 686 &parser->validated);
755 } 687 }
688
689 for (i = 0; i < parser->num_post_dep_syncobjs; i++)
690 drm_syncobj_put(parser->post_dep_syncobjs[i]);
691 kfree(parser->post_dep_syncobjs);
692
756 dma_fence_put(parser->fence); 693 dma_fence_put(parser->fence);
757 694
758 if (parser->ctx) 695 if (parser->ctx)
@@ -761,7 +698,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
761 amdgpu_bo_list_put(parser->bo_list); 698 amdgpu_bo_list_put(parser->bo_list);
762 699
763 for (i = 0; i < parser->nchunks; i++) 700 for (i = 0; i < parser->nchunks; i++)
764 drm_free_large(parser->chunks[i].kdata); 701 kvfree(parser->chunks[i].kdata);
765 kfree(parser->chunks); 702 kfree(parser->chunks);
766 if (parser->job) 703 if (parser->job)
767 amdgpu_job_free(parser->job); 704 amdgpu_job_free(parser->job);
@@ -916,9 +853,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
916 return -EINVAL; 853 return -EINVAL;
917 } 854 }
918 855
919 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, 856 r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
920 chunk_ib->ip_instance, chunk_ib->ring, 857 chunk_ib->ip_instance, chunk_ib->ring, &ring);
921 &ring);
922 if (r) 858 if (r)
923 return r; 859 return r;
924 860
@@ -995,62 +931,148 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
995 return 0; 931 return 0;
996} 932}
997 933
998static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 934static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
999 struct amdgpu_cs_parser *p) 935 struct amdgpu_cs_chunk *chunk)
1000{ 936{
1001 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 937 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1002 int i, j, r; 938 unsigned num_deps;
1003 939 int i, r;
1004 for (i = 0; i < p->nchunks; ++i) { 940 struct drm_amdgpu_cs_chunk_dep *deps;
1005 struct drm_amdgpu_cs_chunk_dep *deps;
1006 struct amdgpu_cs_chunk *chunk;
1007 unsigned num_deps;
1008 941
1009 chunk = &p->chunks[i]; 942 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
943 num_deps = chunk->length_dw * 4 /
944 sizeof(struct drm_amdgpu_cs_chunk_dep);
1010 945
1011 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) 946 for (i = 0; i < num_deps; ++i) {
1012 continue; 947 struct amdgpu_ring *ring;
948 struct amdgpu_ctx *ctx;
949 struct dma_fence *fence;
1013 950
1014 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 951 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
1015 num_deps = chunk->length_dw * 4 / 952 if (ctx == NULL)
1016 sizeof(struct drm_amdgpu_cs_chunk_dep); 953 return -EINVAL;
1017 954
1018 for (j = 0; j < num_deps; ++j) { 955 r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
1019 struct amdgpu_ring *ring; 956 deps[i].ip_type,
1020 struct amdgpu_ctx *ctx; 957 deps[i].ip_instance,
1021 struct dma_fence *fence; 958 deps[i].ring, &ring);
959 if (r) {
960 amdgpu_ctx_put(ctx);
961 return r;
962 }
1022 963
1023 r = amdgpu_cs_get_ring(adev, deps[j].ip_type, 964 fence = amdgpu_ctx_get_fence(ctx, ring,
1024 deps[j].ip_instance, 965 deps[i].handle);
1025 deps[j].ring, &ring); 966 if (IS_ERR(fence)) {
967 r = PTR_ERR(fence);
968 amdgpu_ctx_put(ctx);
969 return r;
970 } else if (fence) {
971 r = amdgpu_sync_fence(p->adev, &p->job->sync,
972 fence);
973 dma_fence_put(fence);
974 amdgpu_ctx_put(ctx);
1026 if (r) 975 if (r)
1027 return r; 976 return r;
977 }
978 }
979 return 0;
980}
1028 981
1029 ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); 982static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1030 if (ctx == NULL) 983 uint32_t handle)
1031 return -EINVAL; 984{
985 int r;
986 struct dma_fence *fence;
987 r = drm_syncobj_fence_get(p->filp, handle, &fence);
988 if (r)
989 return r;
1032 990
1033 fence = amdgpu_ctx_get_fence(ctx, ring, 991 r = amdgpu_sync_fence(p->adev, &p->job->sync, fence);
1034 deps[j].handle); 992 dma_fence_put(fence);
1035 if (IS_ERR(fence)) {
1036 r = PTR_ERR(fence);
1037 amdgpu_ctx_put(ctx);
1038 return r;
1039 993
1040 } else if (fence) { 994 return r;
1041 r = amdgpu_sync_fence(adev, &p->job->sync, 995}
1042 fence); 996
1043 dma_fence_put(fence); 997static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1044 amdgpu_ctx_put(ctx); 998 struct amdgpu_cs_chunk *chunk)
1045 if (r) 999{
1046 return r; 1000 unsigned num_deps;
1047 } 1001 int i, r;
1002 struct drm_amdgpu_cs_chunk_sem *deps;
1003
1004 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1005 num_deps = chunk->length_dw * 4 /
1006 sizeof(struct drm_amdgpu_cs_chunk_sem);
1007
1008 for (i = 0; i < num_deps; ++i) {
1009 r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
1010 if (r)
1011 return r;
1012 }
1013 return 0;
1014}
1015
1016static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1017 struct amdgpu_cs_chunk *chunk)
1018{
1019 unsigned num_deps;
1020 int i;
1021 struct drm_amdgpu_cs_chunk_sem *deps;
1022 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1023 num_deps = chunk->length_dw * 4 /
1024 sizeof(struct drm_amdgpu_cs_chunk_sem);
1025
1026 p->post_dep_syncobjs = kmalloc_array(num_deps,
1027 sizeof(struct drm_syncobj *),
1028 GFP_KERNEL);
1029 p->num_post_dep_syncobjs = 0;
1030
1031 for (i = 0; i < num_deps; ++i) {
1032 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1033 if (!p->post_dep_syncobjs[i])
1034 return -EINVAL;
1035 p->num_post_dep_syncobjs++;
1036 }
1037 return 0;
1038}
1039
1040static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1041 struct amdgpu_cs_parser *p)
1042{
1043 int i, r;
1044
1045 for (i = 0; i < p->nchunks; ++i) {
1046 struct amdgpu_cs_chunk *chunk;
1047
1048 chunk = &p->chunks[i];
1049
1050 if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
1051 r = amdgpu_cs_process_fence_dep(p, chunk);
1052 if (r)
1053 return r;
1054 } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
1055 r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1056 if (r)
1057 return r;
1058 } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
1059 r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1060 if (r)
1061 return r;
1048 } 1062 }
1049 } 1063 }
1050 1064
1051 return 0; 1065 return 0;
1052} 1066}
1053 1067
1068static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1069{
1070 int i;
1071
1072 for (i = 0; i < p->num_post_dep_syncobjs; ++i)
1073 drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
1074}
1075
1054static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 1076static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1055 union drm_amdgpu_cs *cs) 1077 union drm_amdgpu_cs *cs)
1056{ 1078{
@@ -1071,6 +1093,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1071 job->owner = p->filp; 1093 job->owner = p->filp;
1072 job->fence_ctx = entity->fence_context; 1094 job->fence_ctx = entity->fence_context;
1073 p->fence = dma_fence_get(&job->base.s_fence->finished); 1095 p->fence = dma_fence_get(&job->base.s_fence->finished);
1096
1097 amdgpu_cs_post_dependencies(p);
1098
1074 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); 1099 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
1075 job->uf_sequence = cs->out.handle; 1100 job->uf_sequence = cs->out.handle;
1076 amdgpu_job_free_resources(job); 1101 amdgpu_job_free_resources(job);
@@ -1078,13 +1103,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1078 1103
1079 trace_amdgpu_cs_ioctl(job); 1104 trace_amdgpu_cs_ioctl(job);
1080 amd_sched_entity_push_job(&job->base); 1105 amd_sched_entity_push_job(&job->base);
1081
1082 return 0; 1106 return 0;
1083} 1107}
1084 1108
1085int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 1109int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1086{ 1110{
1087 struct amdgpu_device *adev = dev->dev_private; 1111 struct amdgpu_device *adev = dev->dev_private;
1112 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1088 union drm_amdgpu_cs *cs = data; 1113 union drm_amdgpu_cs *cs = data;
1089 struct amdgpu_cs_parser parser = {}; 1114 struct amdgpu_cs_parser parser = {};
1090 bool reserved_buffers = false; 1115 bool reserved_buffers = false;
@@ -1092,6 +1117,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1092 1117
1093 if (!adev->accel_working) 1118 if (!adev->accel_working)
1094 return -EBUSY; 1119 return -EBUSY;
1120 if (amdgpu_kms_vram_lost(adev, fpriv))
1121 return -ENODEV;
1095 1122
1096 parser.adev = adev; 1123 parser.adev = adev;
1097 parser.filp = filp; 1124 parser.filp = filp;
@@ -1153,21 +1180,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1153{ 1180{
1154 union drm_amdgpu_wait_cs *wait = data; 1181 union drm_amdgpu_wait_cs *wait = data;
1155 struct amdgpu_device *adev = dev->dev_private; 1182 struct amdgpu_device *adev = dev->dev_private;
1183 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1156 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 1184 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1157 struct amdgpu_ring *ring = NULL; 1185 struct amdgpu_ring *ring = NULL;
1158 struct amdgpu_ctx *ctx; 1186 struct amdgpu_ctx *ctx;
1159 struct dma_fence *fence; 1187 struct dma_fence *fence;
1160 long r; 1188 long r;
1161 1189
1162 r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, 1190 if (amdgpu_kms_vram_lost(adev, fpriv))
1163 wait->in.ring, &ring); 1191 return -ENODEV;
1164 if (r)
1165 return r;
1166 1192
1167 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 1193 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1168 if (ctx == NULL) 1194 if (ctx == NULL)
1169 return -EINVAL; 1195 return -EINVAL;
1170 1196
1197 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
1198 wait->in.ip_type, wait->in.ip_instance,
1199 wait->in.ring, &ring);
1200 if (r) {
1201 amdgpu_ctx_put(ctx);
1202 return r;
1203 }
1204
1171 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 1205 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
1172 if (IS_ERR(fence)) 1206 if (IS_ERR(fence))
1173 r = PTR_ERR(fence); 1207 r = PTR_ERR(fence);
@@ -1203,15 +1237,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1203 struct dma_fence *fence; 1237 struct dma_fence *fence;
1204 int r; 1238 int r;
1205 1239
1206 r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
1207 user->ring, &ring);
1208 if (r)
1209 return ERR_PTR(r);
1210
1211 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); 1240 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1212 if (ctx == NULL) 1241 if (ctx == NULL)
1213 return ERR_PTR(-EINVAL); 1242 return ERR_PTR(-EINVAL);
1214 1243
1244 r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
1245 user->ip_instance, user->ring, &ring);
1246 if (r) {
1247 amdgpu_ctx_put(ctx);
1248 return ERR_PTR(r);
1249 }
1250
1215 fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); 1251 fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
1216 amdgpu_ctx_put(ctx); 1252 amdgpu_ctx_put(ctx);
1217 1253
@@ -1332,12 +1368,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1332 struct drm_file *filp) 1368 struct drm_file *filp)
1333{ 1369{
1334 struct amdgpu_device *adev = dev->dev_private; 1370 struct amdgpu_device *adev = dev->dev_private;
1371 struct amdgpu_fpriv *fpriv = filp->driver_priv;
1335 union drm_amdgpu_wait_fences *wait = data; 1372 union drm_amdgpu_wait_fences *wait = data;
1336 uint32_t fence_count = wait->in.fence_count; 1373 uint32_t fence_count = wait->in.fence_count;
1337 struct drm_amdgpu_fence *fences_user; 1374 struct drm_amdgpu_fence *fences_user;
1338 struct drm_amdgpu_fence *fences; 1375 struct drm_amdgpu_fence *fences;
1339 int r; 1376 int r;
1340 1377
1378 if (amdgpu_kms_vram_lost(adev, fpriv))
1379 return -ENODEV;
1341 /* Get the fences from userspace */ 1380 /* Get the fences from userspace */
1342 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), 1381 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1343 GFP_KERNEL); 1382 GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 90d1ac8a80f8..a11e44340b23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
52 struct amd_sched_rq *rq; 52 struct amd_sched_rq *rq;
53 53
54 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; 54 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
55
56 if (ring == &adev->gfx.kiq.ring)
57 continue;
58
55 r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, 59 r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
56 rq, amdgpu_sched_jobs); 60 rq, amdgpu_sched_jobs);
57 if (r) 61 if (r)
58 goto failed; 62 goto failed;
59 } 63 }
60 64
65 r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
66 if (r)
67 goto failed;
68
61 return 0; 69 return 0;
62 70
63failed: 71failed:
@@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
86 for (i = 0; i < adev->num_rings; i++) 94 for (i = 0; i < adev->num_rings; i++)
87 amd_sched_entity_fini(&adev->rings[i]->sched, 95 amd_sched_entity_fini(&adev->rings[i]->sched,
88 &ctx->rings[i].entity); 96 &ctx->rings[i].entity);
97
98 amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
89} 99}
90 100
91static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 101static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 43ca16b6eee2..4a8fc15467cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -54,8 +54,14 @@
54#include <linux/pci.h> 54#include <linux/pci.h>
55#include <linux/firmware.h> 55#include <linux/firmware.h>
56 56
57MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
58MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
59
60#define AMDGPU_RESUME_MS 2000
61
57static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); 62static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
58static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); 63static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
64static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev);
59 65
60static const char *amdgpu_asic_name[] = { 66static const char *amdgpu_asic_name[] = {
61 "TAHITI", 67 "TAHITI",
@@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = {
77 "POLARIS11", 83 "POLARIS11",
78 "POLARIS12", 84 "POLARIS12",
79 "VEGA10", 85 "VEGA10",
86 "RAVEN",
80 "LAST", 87 "LAST",
81}; 88};
82 89
@@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
478 485
479/* 486/*
480 * amdgpu_wb_*() 487 * amdgpu_wb_*()
481 * Writeback is the the method by which the the GPU updates special pages 488 * Writeback is the method by which the GPU updates special pages in memory
482 * in memory with the status of certain GPU events (fences, ring pointers, 489 * with the status of certain GPU events (fences, ring pointers,etc.).
483 * etc.).
484 */ 490 */
485 491
486/** 492/**
@@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev)
506 * 512 *
507 * @adev: amdgpu_device pointer 513 * @adev: amdgpu_device pointer
508 * 514 *
509 * Disables Writeback and frees the Writeback memory (all asics). 515 * Initializes writeback and allocates writeback memory (all asics).
510 * Used at driver startup. 516 * Used at driver startup.
511 * Returns 0 on success or an -error on failure. 517 * Returns 0 on success or an -error on failure.
512 */ 518 */
@@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
614 * @mc: memory controller structure holding memory informations 620 * @mc: memory controller structure holding memory informations
615 * @base: base address at which to put VRAM 621 * @base: base address at which to put VRAM
616 * 622 *
617 * Function will place try to place VRAM at base address provided 623 * Function will try to place VRAM at base address provided
618 * as parameter (which is so far either PCI aperture address or 624 * as parameter (which is so far either PCI aperture address or
619 * for IGP TOM base address). 625 * for IGP TOM base address).
620 * 626 *
@@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb)
636 * ones) 642 * ones)
637 * 643 *
638 * Note: IGP TOM addr should be the same as the aperture addr, we don't 644 * Note: IGP TOM addr should be the same as the aperture addr, we don't
639 * explicitly check for that thought. 645 * explicitly check for that though.
640 * 646 *
641 * FIXME: when reducing VRAM size align new size on power of 2. 647 * FIXME: when reducing VRAM size align new size on power of 2.
642 */ 648 */
@@ -1067,6 +1073,10 @@ def_value:
1067 1073
1068static void amdgpu_check_vm_size(struct amdgpu_device *adev) 1074static void amdgpu_check_vm_size(struct amdgpu_device *adev)
1069{ 1075{
1076 /* no need to check the default value */
1077 if (amdgpu_vm_size == -1)
1078 return;
1079
1070 if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { 1080 if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
1071 dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", 1081 dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
1072 amdgpu_vm_size); 1082 amdgpu_vm_size);
@@ -1152,16 +1162,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
1152 return; 1162 return;
1153 1163
1154 if (state == VGA_SWITCHEROO_ON) { 1164 if (state == VGA_SWITCHEROO_ON) {
1155 unsigned d3_delay = dev->pdev->d3_delay;
1156
1157 pr_info("amdgpu: switched on\n"); 1165 pr_info("amdgpu: switched on\n");
1158 /* don't suspend or resume card normally */ 1166 /* don't suspend or resume card normally */
1159 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1167 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1160 1168
1161 amdgpu_device_resume(dev, true, true); 1169 amdgpu_device_resume(dev, true, true);
1162 1170
1163 dev->pdev->d3_delay = d3_delay;
1164
1165 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1171 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1166 drm_kms_helper_poll_enable(dev); 1172 drm_kms_helper_poll_enable(dev);
1167 } else { 1173 } else {
@@ -1342,6 +1348,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev,
1342 if (!ip_block_version) 1348 if (!ip_block_version)
1343 return -EINVAL; 1349 return -EINVAL;
1344 1350
1351 DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
1352 ip_block_version->funcs->name);
1353
1345 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1354 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1346 1355
1347 return 0; 1356 return 0;
@@ -1392,6 +1401,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1392 } 1401 }
1393} 1402}
1394 1403
1404static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1405{
1406 const char *chip_name;
1407 char fw_name[30];
1408 int err;
1409 const struct gpu_info_firmware_header_v1_0 *hdr;
1410
1411 adev->firmware.gpu_info_fw = NULL;
1412
1413 switch (adev->asic_type) {
1414 case CHIP_TOPAZ:
1415 case CHIP_TONGA:
1416 case CHIP_FIJI:
1417 case CHIP_POLARIS11:
1418 case CHIP_POLARIS10:
1419 case CHIP_POLARIS12:
1420 case CHIP_CARRIZO:
1421 case CHIP_STONEY:
1422#ifdef CONFIG_DRM_AMDGPU_SI
1423 case CHIP_VERDE:
1424 case CHIP_TAHITI:
1425 case CHIP_PITCAIRN:
1426 case CHIP_OLAND:
1427 case CHIP_HAINAN:
1428#endif
1429#ifdef CONFIG_DRM_AMDGPU_CIK
1430 case CHIP_BONAIRE:
1431 case CHIP_HAWAII:
1432 case CHIP_KAVERI:
1433 case CHIP_KABINI:
1434 case CHIP_MULLINS:
1435#endif
1436 default:
1437 return 0;
1438 case CHIP_VEGA10:
1439 chip_name = "vega10";
1440 break;
1441 case CHIP_RAVEN:
1442 chip_name = "raven";
1443 break;
1444 }
1445
1446 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1447 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
1448 if (err) {
1449 dev_err(adev->dev,
1450 "Failed to load gpu_info firmware \"%s\"\n",
1451 fw_name);
1452 goto out;
1453 }
1454 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
1455 if (err) {
1456 dev_err(adev->dev,
1457 "Failed to validate gpu_info firmware \"%s\"\n",
1458 fw_name);
1459 goto out;
1460 }
1461
1462 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
1463 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1464
1465 switch (hdr->version_major) {
1466 case 1:
1467 {
1468 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
1469 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
1470 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1471
1472 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1473 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1474 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1475 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
1476 adev->gfx.config.max_texture_channel_caches =
1477 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1478 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1479 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1480 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1481 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
1482 adev->gfx.config.double_offchip_lds_buf =
1483 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1484 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
1485 adev->gfx.cu_info.max_waves_per_simd =
1486 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1487 adev->gfx.cu_info.max_scratch_slots_per_cu =
1488 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1489 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
1490 break;
1491 }
1492 default:
1493 dev_err(adev->dev,
1494 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1495 err = -EINVAL;
1496 goto out;
1497 }
1498out:
1499 return err;
1500}
1501
1395static int amdgpu_early_init(struct amdgpu_device *adev) 1502static int amdgpu_early_init(struct amdgpu_device *adev)
1396{ 1503{
1397 int i, r; 1504 int i, r;
@@ -1444,8 +1551,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
1444 return r; 1551 return r;
1445 break; 1552 break;
1446#endif 1553#endif
1447 case CHIP_VEGA10: 1554 case CHIP_VEGA10:
1448 adev->family = AMDGPU_FAMILY_AI; 1555 case CHIP_RAVEN:
1556 if (adev->asic_type == CHIP_RAVEN)
1557 adev->family = AMDGPU_FAMILY_RV;
1558 else
1559 adev->family = AMDGPU_FAMILY_AI;
1449 1560
1450 r = soc15_set_ip_blocks(adev); 1561 r = soc15_set_ip_blocks(adev);
1451 if (r) 1562 if (r)
@@ -1456,6 +1567,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
1456 return -EINVAL; 1567 return -EINVAL;
1457 } 1568 }
1458 1569
1570 r = amdgpu_device_parse_gpu_info_fw(adev);
1571 if (r)
1572 return r;
1573
1459 if (amdgpu_sriov_vf(adev)) { 1574 if (amdgpu_sriov_vf(adev)) {
1460 r = amdgpu_virt_request_full_gpu(adev, true); 1575 r = amdgpu_virt_request_full_gpu(adev, true);
1461 if (r) 1576 if (r)
@@ -1464,7 +1579,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
1464 1579
1465 for (i = 0; i < adev->num_ip_blocks; i++) { 1580 for (i = 0; i < adev->num_ip_blocks; i++) {
1466 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1581 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1467 DRM_ERROR("disabled ip block: %d\n", i); 1582 DRM_ERROR("disabled ip block: %d <%s>\n",
1583 i, adev->ip_blocks[i].version->funcs->name);
1468 adev->ip_blocks[i].status.valid = false; 1584 adev->ip_blocks[i].status.valid = false;
1469 } else { 1585 } else {
1470 if (adev->ip_blocks[i].version->funcs->early_init) { 1586 if (adev->ip_blocks[i].version->funcs->early_init) {
@@ -1552,22 +1668,24 @@ static int amdgpu_init(struct amdgpu_device *adev)
1552 return 0; 1668 return 0;
1553} 1669}
1554 1670
1555static int amdgpu_late_init(struct amdgpu_device *adev) 1671static void amdgpu_fill_reset_magic(struct amdgpu_device *adev)
1672{
1673 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1674}
1675
1676static bool amdgpu_check_vram_lost(struct amdgpu_device *adev)
1677{
1678 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1679 AMDGPU_RESET_MAGIC_NUM);
1680}
1681
1682static int amdgpu_late_set_cg_state(struct amdgpu_device *adev)
1556{ 1683{
1557 int i = 0, r; 1684 int i = 0, r;
1558 1685
1559 for (i = 0; i < adev->num_ip_blocks; i++) { 1686 for (i = 0; i < adev->num_ip_blocks; i++) {
1560 if (!adev->ip_blocks[i].status.valid) 1687 if (!adev->ip_blocks[i].status.valid)
1561 continue; 1688 continue;
1562 if (adev->ip_blocks[i].version->funcs->late_init) {
1563 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1564 if (r) {
1565 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1566 adev->ip_blocks[i].version->funcs->name, r);
1567 return r;
1568 }
1569 adev->ip_blocks[i].status.late_initialized = true;
1570 }
1571 /* skip CG for VCE/UVD, it's handled specially */ 1689 /* skip CG for VCE/UVD, it's handled specially */
1572 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1690 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1573 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { 1691 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
@@ -1581,6 +1699,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
1581 } 1699 }
1582 } 1700 }
1583 } 1701 }
1702 return 0;
1703}
1704
1705static int amdgpu_late_init(struct amdgpu_device *adev)
1706{
1707 int i = 0, r;
1708
1709 for (i = 0; i < adev->num_ip_blocks; i++) {
1710 if (!adev->ip_blocks[i].status.valid)
1711 continue;
1712 if (adev->ip_blocks[i].version->funcs->late_init) {
1713 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1714 if (r) {
1715 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1716 adev->ip_blocks[i].version->funcs->name, r);
1717 return r;
1718 }
1719 adev->ip_blocks[i].status.late_initialized = true;
1720 }
1721 }
1722
1723 mod_delayed_work(system_wq, &adev->late_init_work,
1724 msecs_to_jiffies(AMDGPU_RESUME_MS));
1725
1726 amdgpu_fill_reset_magic(adev);
1584 1727
1585 return 0; 1728 return 0;
1586} 1729}
@@ -1672,6 +1815,13 @@ static int amdgpu_fini(struct amdgpu_device *adev)
1672 return 0; 1815 return 0;
1673} 1816}
1674 1817
1818static void amdgpu_late_init_func_handler(struct work_struct *work)
1819{
1820 struct amdgpu_device *adev =
1821 container_of(work, struct amdgpu_device, late_init_work.work);
1822 amdgpu_late_set_cg_state(adev);
1823}
1824
1675int amdgpu_suspend(struct amdgpu_device *adev) 1825int amdgpu_suspend(struct amdgpu_device *adev)
1676{ 1826{
1677 int i, r; 1827 int i, r;
@@ -1717,19 +1867,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev)
1717{ 1867{
1718 int i, r; 1868 int i, r;
1719 1869
1720 for (i = 0; i < adev->num_ip_blocks; i++) { 1870 static enum amd_ip_block_type ip_order[] = {
1721 if (!adev->ip_blocks[i].status.valid) 1871 AMD_IP_BLOCK_TYPE_GMC,
1722 continue; 1872 AMD_IP_BLOCK_TYPE_COMMON,
1873 AMD_IP_BLOCK_TYPE_IH,
1874 };
1723 1875
1724 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1876 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
1725 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 1877 int j;
1726 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) 1878 struct amdgpu_ip_block *block;
1727 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1728 1879
1729 if (r) { 1880 for (j = 0; j < adev->num_ip_blocks; j++) {
1730 DRM_ERROR("resume of IP block <%s> failed %d\n", 1881 block = &adev->ip_blocks[j];
1731 adev->ip_blocks[i].version->funcs->name, r); 1882
1732 return r; 1883 if (block->version->type != ip_order[i] ||
1884 !block->status.valid)
1885 continue;
1886
1887 r = block->version->funcs->hw_init(adev);
1888 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1733 } 1889 }
1734 } 1890 }
1735 1891
@@ -1740,33 +1896,67 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev)
1740{ 1896{
1741 int i, r; 1897 int i, r;
1742 1898
1899 static enum amd_ip_block_type ip_order[] = {
1900 AMD_IP_BLOCK_TYPE_SMC,
1901 AMD_IP_BLOCK_TYPE_DCE,
1902 AMD_IP_BLOCK_TYPE_GFX,
1903 AMD_IP_BLOCK_TYPE_SDMA,
1904 AMD_IP_BLOCK_TYPE_VCE,
1905 };
1906
1907 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
1908 int j;
1909 struct amdgpu_ip_block *block;
1910
1911 for (j = 0; j < adev->num_ip_blocks; j++) {
1912 block = &adev->ip_blocks[j];
1913
1914 if (block->version->type != ip_order[i] ||
1915 !block->status.valid)
1916 continue;
1917
1918 r = block->version->funcs->hw_init(adev);
1919 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1920 }
1921 }
1922
1923 return 0;
1924}
1925
1926static int amdgpu_resume_phase1(struct amdgpu_device *adev)
1927{
1928 int i, r;
1929
1743 for (i = 0; i < adev->num_ip_blocks; i++) { 1930 for (i = 0; i < adev->num_ip_blocks; i++) {
1744 if (!adev->ip_blocks[i].status.valid) 1931 if (!adev->ip_blocks[i].status.valid)
1745 continue; 1932 continue;
1746
1747 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1933 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1748 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 1934 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
1749 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) 1935 adev->ip_blocks[i].version->type ==
1750 continue; 1936 AMD_IP_BLOCK_TYPE_IH) {
1751 1937 r = adev->ip_blocks[i].version->funcs->resume(adev);
1752 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1938 if (r) {
1753 if (r) { 1939 DRM_ERROR("resume of IP block <%s> failed %d\n",
1754 DRM_ERROR("resume of IP block <%s> failed %d\n", 1940 adev->ip_blocks[i].version->funcs->name, r);
1755 adev->ip_blocks[i].version->funcs->name, r); 1941 return r;
1756 return r; 1942 }
1757 } 1943 }
1758 } 1944 }
1759 1945
1760 return 0; 1946 return 0;
1761} 1947}
1762 1948
1763static int amdgpu_resume(struct amdgpu_device *adev) 1949static int amdgpu_resume_phase2(struct amdgpu_device *adev)
1764{ 1950{
1765 int i, r; 1951 int i, r;
1766 1952
1767 for (i = 0; i < adev->num_ip_blocks; i++) { 1953 for (i = 0; i < adev->num_ip_blocks; i++) {
1768 if (!adev->ip_blocks[i].status.valid) 1954 if (!adev->ip_blocks[i].status.valid)
1769 continue; 1955 continue;
1956 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1957 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
1958 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
1959 continue;
1770 r = adev->ip_blocks[i].version->funcs->resume(adev); 1960 r = adev->ip_blocks[i].version->funcs->resume(adev);
1771 if (r) { 1961 if (r) {
1772 DRM_ERROR("resume of IP block <%s> failed %d\n", 1962 DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -1778,6 +1968,18 @@ static int amdgpu_resume(struct amdgpu_device *adev)
1778 return 0; 1968 return 0;
1779} 1969}
1780 1970
1971static int amdgpu_resume(struct amdgpu_device *adev)
1972{
1973 int r;
1974
1975 r = amdgpu_resume_phase1(adev);
1976 if (r)
1977 return r;
1978 r = amdgpu_resume_phase2(adev);
1979
1980 return r;
1981}
1982
1781static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 1983static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
1782{ 1984{
1783 if (adev->is_atom_fw) { 1985 if (adev->is_atom_fw) {
@@ -1860,8 +2062,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1860 2062
1861 amdgpu_check_arguments(adev); 2063 amdgpu_check_arguments(adev);
1862 2064
1863 /* Registers mapping */
1864 /* TODO: block userspace mapping of io register */
1865 spin_lock_init(&adev->mmio_idx_lock); 2065 spin_lock_init(&adev->mmio_idx_lock);
1866 spin_lock_init(&adev->smc_idx_lock); 2066 spin_lock_init(&adev->smc_idx_lock);
1867 spin_lock_init(&adev->pcie_idx_lock); 2067 spin_lock_init(&adev->pcie_idx_lock);
@@ -1877,6 +2077,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1877 INIT_LIST_HEAD(&adev->gtt_list); 2077 INIT_LIST_HEAD(&adev->gtt_list);
1878 spin_lock_init(&adev->gtt_list_lock); 2078 spin_lock_init(&adev->gtt_list_lock);
1879 2079
2080 INIT_LIST_HEAD(&adev->ring_lru_list);
2081 spin_lock_init(&adev->ring_lru_list_lock);
2082
2083 INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler);
2084
2085 /* Registers mapping */
2086 /* TODO: block userspace mapping of io register */
1880 if (adev->asic_type >= CHIP_BONAIRE) { 2087 if (adev->asic_type >= CHIP_BONAIRE) {
1881 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 2088 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
1882 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 2089 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
@@ -1989,6 +2196,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1989 2196
1990 adev->accel_working = true; 2197 adev->accel_working = true;
1991 2198
2199 amdgpu_vm_check_compute_bug(adev);
2200
1992 /* Initialize the buffer migration limit. */ 2201 /* Initialize the buffer migration limit. */
1993 if (amdgpu_moverate >= 0) 2202 if (amdgpu_moverate >= 0)
1994 max_MBps = amdgpu_moverate; 2203 max_MBps = amdgpu_moverate;
@@ -2017,6 +2226,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2017 if (r) 2226 if (r)
2018 DRM_ERROR("registering register debugfs failed (%d).\n", r); 2227 DRM_ERROR("registering register debugfs failed (%d).\n", r);
2019 2228
2229 r = amdgpu_debugfs_test_ib_ring_init(adev);
2230 if (r)
2231 DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r);
2232
2020 r = amdgpu_debugfs_firmware_init(adev); 2233 r = amdgpu_debugfs_firmware_init(adev);
2021 if (r) 2234 if (r)
2022 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2235 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
@@ -2073,7 +2286,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2073 amdgpu_fence_driver_fini(adev); 2286 amdgpu_fence_driver_fini(adev);
2074 amdgpu_fbdev_fini(adev); 2287 amdgpu_fbdev_fini(adev);
2075 r = amdgpu_fini(adev); 2288 r = amdgpu_fini(adev);
2289 if (adev->firmware.gpu_info_fw) {
2290 release_firmware(adev->firmware.gpu_info_fw);
2291 adev->firmware.gpu_info_fw = NULL;
2292 }
2076 adev->accel_working = false; 2293 adev->accel_working = false;
2294 cancel_delayed_work_sync(&adev->late_init_work);
2077 /* free i2c buses */ 2295 /* free i2c buses */
2078 amdgpu_i2c_fini(adev); 2296 amdgpu_i2c_fini(adev);
2079 amdgpu_atombios_fini(adev); 2297 amdgpu_atombios_fini(adev);
@@ -2458,16 +2676,15 @@ err:
2458 * amdgpu_sriov_gpu_reset - reset the asic 2676 * amdgpu_sriov_gpu_reset - reset the asic
2459 * 2677 *
2460 * @adev: amdgpu device pointer 2678 * @adev: amdgpu device pointer
2461 * @voluntary: if this reset is requested by guest. 2679 * @job: which job trigger hang
2462 * (true means by guest and false means by HYPERVISOR )
2463 * 2680 *
2464 * Attempt the reset the GPU if it has hung (all asics). 2681 * Attempt the reset the GPU if it has hung (all asics).
2465 * for SRIOV case. 2682 * for SRIOV case.
2466 * Returns 0 for success or an error on failure. 2683 * Returns 0 for success or an error on failure.
2467 */ 2684 */
2468int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) 2685int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
2469{ 2686{
2470 int i, r = 0; 2687 int i, j, r = 0;
2471 int resched; 2688 int resched;
2472 struct amdgpu_bo *bo, *tmp; 2689 struct amdgpu_bo *bo, *tmp;
2473 struct amdgpu_ring *ring; 2690 struct amdgpu_ring *ring;
@@ -2480,22 +2697,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
2480 /* block TTM */ 2697 /* block TTM */
2481 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 2698 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
2482 2699
2483 /* block scheduler */ 2700 /* we start from the ring trigger GPU hang */
2484 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2701 j = job ? job->ring->idx : 0;
2485 ring = adev->rings[i];
2486 2702
2703 /* block scheduler */
2704 for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
2705 ring = adev->rings[i % AMDGPU_MAX_RINGS];
2487 if (!ring || !ring->sched.thread) 2706 if (!ring || !ring->sched.thread)
2488 continue; 2707 continue;
2489 2708
2490 kthread_park(ring->sched.thread); 2709 kthread_park(ring->sched.thread);
2710
2711 if (job && j != i)
2712 continue;
2713
2714 /* here give the last chance to check if job removed from mirror-list
2715 * since we already pay some time on kthread_park */
2716 if (job && list_empty(&job->base.node)) {
2717 kthread_unpark(ring->sched.thread);
2718 goto give_up_reset;
2719 }
2720
2721 if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
2722 amd_sched_job_kickout(&job->base);
2723
2724 /* only do job_reset on the hang ring if @job not NULL */
2491 amd_sched_hw_job_reset(&ring->sched); 2725 amd_sched_hw_job_reset(&ring->sched);
2492 }
2493 2726
2494 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 2727 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
2495 amdgpu_fence_driver_force_completion(adev); 2728 amdgpu_fence_driver_force_completion_ring(ring);
2729 }
2496 2730
2497 /* request to take full control of GPU before re-initialization */ 2731 /* request to take full control of GPU before re-initialization */
2498 if (voluntary) 2732 if (job)
2499 amdgpu_virt_reset_gpu(adev); 2733 amdgpu_virt_reset_gpu(adev);
2500 else 2734 else
2501 amdgpu_virt_request_full_gpu(adev, true); 2735 amdgpu_virt_request_full_gpu(adev, true);
@@ -2545,20 +2779,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
2545 } 2779 }
2546 dma_fence_put(fence); 2780 dma_fence_put(fence);
2547 2781
2548 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2782 for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
2549 struct amdgpu_ring *ring = adev->rings[i]; 2783 ring = adev->rings[i % AMDGPU_MAX_RINGS];
2550 if (!ring || !ring->sched.thread) 2784 if (!ring || !ring->sched.thread)
2551 continue; 2785 continue;
2552 2786
2787 if (job && j != i) {
2788 kthread_unpark(ring->sched.thread);
2789 continue;
2790 }
2791
2553 amd_sched_job_recovery(&ring->sched); 2792 amd_sched_job_recovery(&ring->sched);
2554 kthread_unpark(ring->sched.thread); 2793 kthread_unpark(ring->sched.thread);
2555 } 2794 }
2556 2795
2557 drm_helper_resume_force_mode(adev->ddev); 2796 drm_helper_resume_force_mode(adev->ddev);
2797give_up_reset:
2558 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 2798 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
2559 if (r) { 2799 if (r) {
2560 /* bad news, how to tell it to userspace ? */ 2800 /* bad news, how to tell it to userspace ? */
2561 dev_info(adev->dev, "GPU reset failed\n"); 2801 dev_info(adev->dev, "GPU reset failed\n");
2802 } else {
2803 dev_info(adev->dev, "GPU reset successed!\n");
2562 } 2804 }
2563 2805
2564 adev->gfx.in_reset = false; 2806 adev->gfx.in_reset = false;
@@ -2578,10 +2820,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
2578{ 2820{
2579 int i, r; 2821 int i, r;
2580 int resched; 2822 int resched;
2581 bool need_full_reset; 2823 bool need_full_reset, vram_lost = false;
2582
2583 if (amdgpu_sriov_vf(adev))
2584 return amdgpu_sriov_gpu_reset(adev, true);
2585 2824
2586 if (!amdgpu_check_soft_reset(adev)) { 2825 if (!amdgpu_check_soft_reset(adev)) {
2587 DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); 2826 DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
@@ -2641,16 +2880,27 @@ retry:
2641 2880
2642 if (!r) { 2881 if (!r) {
2643 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); 2882 dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
2644 r = amdgpu_resume(adev); 2883 r = amdgpu_resume_phase1(adev);
2884 if (r)
2885 goto out;
2886 vram_lost = amdgpu_check_vram_lost(adev);
2887 if (vram_lost) {
2888 DRM_ERROR("VRAM is lost!\n");
2889 atomic_inc(&adev->vram_lost_counter);
2890 }
2891 r = amdgpu_ttm_recover_gart(adev);
2892 if (r)
2893 goto out;
2894 r = amdgpu_resume_phase2(adev);
2895 if (r)
2896 goto out;
2897 if (vram_lost)
2898 amdgpu_fill_reset_magic(adev);
2645 } 2899 }
2646 } 2900 }
2901out:
2647 if (!r) { 2902 if (!r) {
2648 amdgpu_irq_gpu_reset_resume_helper(adev); 2903 amdgpu_irq_gpu_reset_resume_helper(adev);
2649 if (need_full_reset && amdgpu_need_backup(adev)) {
2650 r = amdgpu_ttm_recover_gart(adev);
2651 if (r)
2652 DRM_ERROR("gart recovery failed!!!\n");
2653 }
2654 r = amdgpu_ib_ring_tests(adev); 2904 r = amdgpu_ib_ring_tests(adev);
2655 if (r) { 2905 if (r) {
2656 dev_err(adev->dev, "ib ring test failed (%d).\n", r); 2906 dev_err(adev->dev, "ib ring test failed (%d).\n", r);
@@ -2712,10 +2962,11 @@ retry:
2712 drm_helper_resume_force_mode(adev->ddev); 2962 drm_helper_resume_force_mode(adev->ddev);
2713 2963
2714 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); 2964 ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
2715 if (r) { 2965 if (r)
2716 /* bad news, how to tell it to userspace ? */ 2966 /* bad news, how to tell it to userspace ? */
2717 dev_info(adev->dev, "GPU reset failed\n"); 2967 dev_info(adev->dev, "GPU reset failed\n");
2718 } 2968 else
2969 dev_info(adev->dev, "GPU reset successed!\n");
2719 2970
2720 return r; 2971 return r;
2721} 2972}
@@ -3499,11 +3750,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
3499 } 3750 }
3500} 3751}
3501 3752
3753static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
3754{
3755 struct drm_info_node *node = (struct drm_info_node *) m->private;
3756 struct drm_device *dev = node->minor->dev;
3757 struct amdgpu_device *adev = dev->dev_private;
3758 int r = 0, i;
3759
3760 /* hold on the scheduler */
3761 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
3762 struct amdgpu_ring *ring = adev->rings[i];
3763
3764 if (!ring || !ring->sched.thread)
3765 continue;
3766 kthread_park(ring->sched.thread);
3767 }
3768
3769 seq_printf(m, "run ib test:\n");
3770 r = amdgpu_ib_ring_tests(adev);
3771 if (r)
3772 seq_printf(m, "ib ring tests failed (%d).\n", r);
3773 else
3774 seq_printf(m, "ib ring tests passed.\n");
3775
3776 /* go on the scheduler */
3777 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
3778 struct amdgpu_ring *ring = adev->rings[i];
3779
3780 if (!ring || !ring->sched.thread)
3781 continue;
3782 kthread_unpark(ring->sched.thread);
3783 }
3784
3785 return 0;
3786}
3787
3788static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = {
3789 {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}
3790};
3791
3792static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
3793{
3794 return amdgpu_debugfs_add_files(adev,
3795 amdgpu_debugfs_test_ib_ring_list, 1);
3796}
3797
3502int amdgpu_debugfs_init(struct drm_minor *minor) 3798int amdgpu_debugfs_init(struct drm_minor *minor)
3503{ 3799{
3504 return 0; 3800 return 0;
3505} 3801}
3506#else 3802#else
3803static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
3804{
3805 return 0;
3806}
3507static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) 3807static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
3508{ 3808{
3509 return 0; 3809 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 38e9b0d3659a..1cb52fd19060 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -22,7 +22,7 @@
22 * Authors: Alex Deucher 22 * Authors: Alex Deucher
23 */ 23 */
24 24
25#include "drmP.h" 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_atombios.h" 27#include "amdgpu_atombios.h"
28#include "amdgpu_i2c.h" 28#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2d705e6a75a..b59f37c83fa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -39,7 +39,7 @@
39#include <linux/module.h> 39#include <linux/module.h>
40#include <linux/pm_runtime.h> 40#include <linux/pm_runtime.h>
41#include <linux/vga_switcheroo.h> 41#include <linux/vga_switcheroo.h>
42#include "drm_crtc_helper.h" 42#include <drm/drm_crtc_helper.h>
43 43
44#include "amdgpu.h" 44#include "amdgpu.h"
45#include "amdgpu_irq.h" 45#include "amdgpu_irq.h"
@@ -65,9 +65,12 @@
65 * - 3.13.0 - Add PRT support 65 * - 3.13.0 - Add PRT support
66 * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality 66 * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
67 * - 3.15.0 - Export more gpu info for gfx9 67 * - 3.15.0 - Export more gpu info for gfx9
68 * - 3.16.0 - Add reserved vmid support
69 * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
70 * - 3.18.0 - Export gpu always on cu bitmap
68 */ 71 */
69#define KMS_DRIVER_MAJOR 3 72#define KMS_DRIVER_MAJOR 3
70#define KMS_DRIVER_MINOR 15 73#define KMS_DRIVER_MINOR 18
71#define KMS_DRIVER_PATCHLEVEL 0 74#define KMS_DRIVER_PATCHLEVEL 0
72 75
73int amdgpu_vram_limit = 0; 76int amdgpu_vram_limit = 0;
@@ -92,7 +95,8 @@ int amdgpu_vm_size = -1;
92int amdgpu_vm_block_size = -1; 95int amdgpu_vm_block_size = -1;
93int amdgpu_vm_fault_stop = 0; 96int amdgpu_vm_fault_stop = 0;
94int amdgpu_vm_debug = 0; 97int amdgpu_vm_debug = 0;
95int amdgpu_vram_page_split = 1024; 98int amdgpu_vram_page_split = 512;
99int amdgpu_vm_update_mode = -1;
96int amdgpu_exp_hw_support = 0; 100int amdgpu_exp_hw_support = 0;
97int amdgpu_sched_jobs = 32; 101int amdgpu_sched_jobs = 32;
98int amdgpu_sched_hw_submission = 2; 102int amdgpu_sched_hw_submission = 2;
@@ -110,6 +114,8 @@ int amdgpu_prim_buf_per_se = 0;
110int amdgpu_pos_buf_per_se = 0; 114int amdgpu_pos_buf_per_se = 0;
111int amdgpu_cntl_sb_buf_per_se = 0; 115int amdgpu_cntl_sb_buf_per_se = 0;
112int amdgpu_param_buf_per_se = 0; 116int amdgpu_param_buf_per_se = 0;
117int amdgpu_job_hang_limit = 0;
118int amdgpu_lbpw = -1;
113 119
114MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 120MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
115module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); 121module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -177,6 +183,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
177MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); 183MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
178module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); 184module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
179 185
186MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
187module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
188
180MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); 189MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
181module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); 190module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
182 191
@@ -232,6 +241,38 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
232MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); 241MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
233module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); 242module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
234 243
244MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
245module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
246
247MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
248module_param_named(lbpw, amdgpu_lbpw, int, 0444);
249
250#ifdef CONFIG_DRM_AMDGPU_SI
251
252#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
253int amdgpu_si_support = 0;
254MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))");
255#else
256int amdgpu_si_support = 1;
257MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)");
258#endif
259
260module_param_named(si_support, amdgpu_si_support, int, 0444);
261#endif
262
263#ifdef CONFIG_DRM_AMDGPU_CIK
264
265#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
266int amdgpu_cik_support = 0;
267MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))");
268#else
269int amdgpu_cik_support = 1;
270MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)");
271#endif
272
273module_param_named(cik_support, amdgpu_cik_support, int, 0444);
274#endif
275
235 276
236static const struct pci_device_id pciidlist[] = { 277static const struct pci_device_id pciidlist[] = {
237#ifdef CONFIG_DRM_AMDGPU_SI 278#ifdef CONFIG_DRM_AMDGPU_SI
@@ -449,6 +490,7 @@ static const struct pci_device_id pciidlist[] = {
449 {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 490 {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
450 {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 491 {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
451 {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 492 {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
493 {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
452 {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 494 {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
453 /* Vega 10 */ 495 /* Vega 10 */
454 {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 496 {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
@@ -460,6 +502,9 @@ static const struct pci_device_id pciidlist[] = {
460 {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 502 {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
461 {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 503 {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
462 {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, 504 {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
505 /* Raven */
506 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
507
463 {0, 0, 0} 508 {0, 0, 0}
464}; 509};
465 510
@@ -491,6 +536,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
491static int amdgpu_pci_probe(struct pci_dev *pdev, 536static int amdgpu_pci_probe(struct pci_dev *pdev,
492 const struct pci_device_id *ent) 537 const struct pci_device_id *ent)
493{ 538{
539 struct drm_device *dev;
494 unsigned long flags = ent->driver_data; 540 unsigned long flags = ent->driver_data;
495 int ret; 541 int ret;
496 542
@@ -513,7 +559,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
513 if (ret) 559 if (ret)
514 return ret; 560 return ret;
515 561
516 return drm_get_pci_dev(pdev, ent, &kms_driver); 562 dev = drm_dev_alloc(&kms_driver, &pdev->dev);
563 if (IS_ERR(dev))
564 return PTR_ERR(dev);
565
566 ret = pci_enable_device(pdev);
567 if (ret)
568 goto err_free;
569
570 dev->pdev = pdev;
571
572 pci_set_drvdata(pdev, dev);
573
574 ret = drm_dev_register(dev, ent->driver_data);
575 if (ret)
576 goto err_pci;
577
578 return 0;
579
580err_pci:
581 pci_disable_device(pdev);
582err_free:
583 drm_dev_unref(dev);
584 return ret;
517} 585}
518 586
519static void 587static void
@@ -521,7 +589,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
521{ 589{
522 struct drm_device *dev = pci_get_drvdata(pdev); 590 struct drm_device *dev = pci_get_drvdata(pdev);
523 591
524 drm_put_dev(dev); 592 drm_dev_unregister(dev);
593 drm_dev_unref(dev);
525} 594}
526 595
527static void 596static void
@@ -715,11 +784,21 @@ static const struct file_operations amdgpu_driver_kms_fops = {
715#endif 784#endif
716}; 785};
717 786
787static bool
788amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
789 bool in_vblank_irq, int *vpos, int *hpos,
790 ktime_t *stime, ktime_t *etime,
791 const struct drm_display_mode *mode)
792{
793 return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
794 stime, etime, mode);
795}
796
718static struct drm_driver kms_driver = { 797static struct drm_driver kms_driver = {
719 .driver_features = 798 .driver_features =
720 DRIVER_USE_AGP | 799 DRIVER_USE_AGP |
721 DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | 800 DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
722 DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, 801 DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
723 .load = amdgpu_driver_load_kms, 802 .load = amdgpu_driver_load_kms,
724 .open = amdgpu_driver_open_kms, 803 .open = amdgpu_driver_open_kms,
725 .postclose = amdgpu_driver_postclose_kms, 804 .postclose = amdgpu_driver_postclose_kms,
@@ -729,8 +808,8 @@ static struct drm_driver kms_driver = {
729 .get_vblank_counter = amdgpu_get_vblank_counter_kms, 808 .get_vblank_counter = amdgpu_get_vblank_counter_kms,
730 .enable_vblank = amdgpu_enable_vblank_kms, 809 .enable_vblank = amdgpu_enable_vblank_kms,
731 .disable_vblank = amdgpu_disable_vblank_kms, 810 .disable_vblank = amdgpu_disable_vblank_kms,
732 .get_vblank_timestamp = amdgpu_get_vblank_timestamp_kms, 811 .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
733 .get_scanout_position = amdgpu_get_crtc_scanoutpos, 812 .get_scanout_position = amdgpu_get_crtc_scanout_position,
734#if defined(CONFIG_DEBUG_FS) 813#if defined(CONFIG_DEBUG_FS)
735 .debugfs_init = amdgpu_debugfs_init, 814 .debugfs_init = amdgpu_debugfs_init,
736#endif 815#endif
@@ -807,7 +886,7 @@ static int __init amdgpu_init(void)
807 driver->num_ioctls = amdgpu_max_kms_ioctl; 886 driver->num_ioctls = amdgpu_max_kms_ioctl;
808 amdgpu_register_atpx_handler(); 887 amdgpu_register_atpx_handler();
809 /* let modprobe override vga console setting */ 888 /* let modprobe override vga console setting */
810 return drm_pci_init(driver, pdriver); 889 return pci_register_driver(pdriver);
811 890
812error_sched: 891error_sched:
813 amdgpu_fence_slab_fini(); 892 amdgpu_fence_slab_fini();
@@ -822,7 +901,7 @@ error_sync:
822static void __exit amdgpu_exit(void) 901static void __exit amdgpu_exit(void)
823{ 902{
824 amdgpu_amdkfd_fini(); 903 amdgpu_amdkfd_fini();
825 drm_pci_exit(driver, pdriver); 904 pci_unregister_driver(pdriver);
826 amdgpu_unregister_atpx_handler(); 905 amdgpu_unregister_atpx_handler();
827 amdgpu_sync_fini(); 906 amdgpu_sync_fini();
828 amd_sched_fence_slab_fini(); 907 amd_sched_fence_slab_fini();
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 236d9950221b..c0d8c6ff6380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -425,10 +425,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
425 425
426void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) 426void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev)
427{ 427{
428 struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev; 428 struct amdgpu_fbdev *afbdev;
429 struct drm_fb_helper *fb_helper; 429 struct drm_fb_helper *fb_helper;
430 int ret; 430 int ret;
431 431
432 if (!adev)
433 return;
434
435 afbdev = adev->mode_info.rfbdev;
436
432 if (!afbdev) 437 if (!afbdev)
433 return; 438 return;
434 439
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7b60fb79c3a6..333bad749067 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
541 } 541 }
542} 542}
543 543
544void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring)
545{
546 if (ring)
547 amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
548}
549
544/* 550/*
545 * Common fence implementation 551 * Common fence implementation
546 */ 552 */
@@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = {
660 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, 666 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
661 {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} 667 {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
662}; 668};
669
670static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = {
671 {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
672};
663#endif 673#endif
664 674
665int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) 675int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
666{ 676{
667#if defined(CONFIG_DEBUG_FS) 677#if defined(CONFIG_DEBUG_FS)
678 if (amdgpu_sriov_vf(adev))
679 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1);
668 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); 680 return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
669#else 681#else
670 return 0; 682 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 902e6015abca..a57abc1a25fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
224 * 224 *
225 * Unbinds the requested pages from the gart page table and 225 * Unbinds the requested pages from the gart page table and
226 * replaces them with the dummy page (all asics). 226 * replaces them with the dummy page (all asics).
227 * Returns 0 for success, -EINVAL for failure.
227 */ 228 */
228void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, 229int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
229 int pages) 230 int pages)
230{ 231{
231 unsigned t; 232 unsigned t;
@@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
237 238
238 if (!adev->gart.ready) { 239 if (!adev->gart.ready) {
239 WARN(1, "trying to unbind memory from uninitialized GART !\n"); 240 WARN(1, "trying to unbind memory from uninitialized GART !\n");
240 return; 241 return -EINVAL;
241 } 242 }
242 243
243 t = offset / AMDGPU_GPU_PAGE_SIZE; 244 t = offset / AMDGPU_GPU_PAGE_SIZE;
@@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
258 } 259 }
259 mb(); 260 mb();
260 amdgpu_gart_flush_gpu_tlb(adev, 0); 261 amdgpu_gart_flush_gpu_tlb(adev, 0);
262 return 0;
261} 263}
262 264
263/** 265/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 94cb91cf93eb..621f739103a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
219 ttm_eu_backoff_reservation(&ticket, &list); 219 ttm_eu_backoff_reservation(&ticket, &list);
220} 220}
221 221
222static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
223{
224 if (r == -EDEADLK) {
225 r = amdgpu_gpu_reset(adev);
226 if (!r)
227 r = -EAGAIN;
228 }
229 return r;
230}
231
232/* 222/*
233 * GEM ioctls. 223 * GEM ioctls.
234 */ 224 */
@@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
249 AMDGPU_GEM_CREATE_CPU_GTT_USWC | 239 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
250 AMDGPU_GEM_CREATE_VRAM_CLEARED| 240 AMDGPU_GEM_CREATE_VRAM_CLEARED|
251 AMDGPU_GEM_CREATE_SHADOW | 241 AMDGPU_GEM_CREATE_SHADOW |
252 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { 242 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
253 r = -EINVAL; 243 return -EINVAL;
254 goto error_unlock; 244
255 }
256 /* reject invalid gem domains */ 245 /* reject invalid gem domains */
257 if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | 246 if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
258 AMDGPU_GEM_DOMAIN_GTT | 247 AMDGPU_GEM_DOMAIN_GTT |
259 AMDGPU_GEM_DOMAIN_VRAM | 248 AMDGPU_GEM_DOMAIN_VRAM |
260 AMDGPU_GEM_DOMAIN_GDS | 249 AMDGPU_GEM_DOMAIN_GDS |
261 AMDGPU_GEM_DOMAIN_GWS | 250 AMDGPU_GEM_DOMAIN_GWS |
262 AMDGPU_GEM_DOMAIN_OA)) { 251 AMDGPU_GEM_DOMAIN_OA))
263 r = -EINVAL; 252 return -EINVAL;
264 goto error_unlock;
265 }
266 253
267 /* create a gem object to contain this object in */ 254 /* create a gem object to contain this object in */
268 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | 255 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
@@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
274 size = size << AMDGPU_GWS_SHIFT; 261 size = size << AMDGPU_GWS_SHIFT;
275 else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) 262 else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
276 size = size << AMDGPU_OA_SHIFT; 263 size = size << AMDGPU_OA_SHIFT;
277 else { 264 else
278 r = -EINVAL; 265 return -EINVAL;
279 goto error_unlock;
280 }
281 } 266 }
282 size = roundup(size, PAGE_SIZE); 267 size = roundup(size, PAGE_SIZE);
283 268
@@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
286 args->in.domain_flags, 271 args->in.domain_flags,
287 kernel, &gobj); 272 kernel, &gobj);
288 if (r) 273 if (r)
289 goto error_unlock; 274 return r;
290 275
291 r = drm_gem_handle_create(filp, gobj, &handle); 276 r = drm_gem_handle_create(filp, gobj, &handle);
292 /* drop reference from allocate - handle holds it now */ 277 /* drop reference from allocate - handle holds it now */
293 drm_gem_object_unreference_unlocked(gobj); 278 drm_gem_object_unreference_unlocked(gobj);
294 if (r) 279 if (r)
295 goto error_unlock; 280 return r;
296 281
297 memset(args, 0, sizeof(*args)); 282 memset(args, 0, sizeof(*args));
298 args->out.handle = handle; 283 args->out.handle = handle;
299 return 0; 284 return 0;
300
301error_unlock:
302 r = amdgpu_gem_handle_lockup(adev, r);
303 return r;
304} 285}
305 286
306int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, 287int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
@@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
334 AMDGPU_GEM_DOMAIN_CPU, 0, 315 AMDGPU_GEM_DOMAIN_CPU, 0,
335 0, &gobj); 316 0, &gobj);
336 if (r) 317 if (r)
337 goto handle_lockup; 318 return r;
338 319
339 bo = gem_to_amdgpu_bo(gobj); 320 bo = gem_to_amdgpu_bo(gobj);
340 bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; 321 bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
@@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
374 /* drop reference from allocate - handle holds it now */ 355 /* drop reference from allocate - handle holds it now */
375 drm_gem_object_unreference_unlocked(gobj); 356 drm_gem_object_unreference_unlocked(gobj);
376 if (r) 357 if (r)
377 goto handle_lockup; 358 return r;
378 359
379 args->handle = handle; 360 args->handle = handle;
380 return 0; 361 return 0;
@@ -388,9 +369,6 @@ unlock_mmap_sem:
388release_object: 369release_object:
389 drm_gem_object_unreference_unlocked(gobj); 370 drm_gem_object_unreference_unlocked(gobj);
390 371
391handle_lockup:
392 r = amdgpu_gem_handle_lockup(adev, r);
393
394 return r; 372 return r;
395} 373}
396 374
@@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
456int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, 434int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
457 struct drm_file *filp) 435 struct drm_file *filp)
458{ 436{
459 struct amdgpu_device *adev = dev->dev_private;
460 union drm_amdgpu_gem_wait_idle *args = data; 437 union drm_amdgpu_gem_wait_idle *args = data;
461 struct drm_gem_object *gobj; 438 struct drm_gem_object *gobj;
462 struct amdgpu_bo *robj; 439 struct amdgpu_bo *robj;
@@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
484 r = ret; 461 r = ret;
485 462
486 drm_gem_object_unreference_unlocked(gobj); 463 drm_gem_object_unreference_unlocked(gobj);
487 r = amdgpu_gem_handle_lockup(adev, r);
488 return r; 464 return r;
489} 465}
490 466
@@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
593 uint64_t va_flags; 569 uint64_t va_flags;
594 int r = 0; 570 int r = 0;
595 571
596 if (!adev->vm_manager.enabled)
597 return -ENOTTY;
598
599 if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { 572 if (args->va_address < AMDGPU_VA_RESERVED_SIZE) {
600 dev_err(&dev->pdev->dev, 573 dev_err(&dev->pdev->dev,
601 "va_address 0x%lX is in reserved area 0x%X\n", 574 "va_address 0x%lX is in reserved area 0x%X\n",
@@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
621 args->operation); 594 args->operation);
622 return -EINVAL; 595 return -EINVAL;
623 } 596 }
597 if ((args->operation == AMDGPU_VA_OP_MAP) ||
598 (args->operation == AMDGPU_VA_OP_REPLACE)) {
599 if (amdgpu_kms_vram_lost(adev, fpriv))
600 return -ENODEV;
601 }
624 602
625 INIT_LIST_HEAD(&list); 603 INIT_LIST_HEAD(&list);
626 if ((args->operation != AMDGPU_VA_OP_CLEAR) && 604 if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 19943356cca7..e26108aad3fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
108 p = next + 1; 108 p = next + 1;
109 } 109 }
110} 110}
111
112void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
113{
114 int i, queue, pipe, mec;
115
116 /* policy for amdgpu compute queue ownership */
117 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
118 queue = i % adev->gfx.mec.num_queue_per_pipe;
119 pipe = (i / adev->gfx.mec.num_queue_per_pipe)
120 % adev->gfx.mec.num_pipe_per_mec;
121 mec = (i / adev->gfx.mec.num_queue_per_pipe)
122 / adev->gfx.mec.num_pipe_per_mec;
123
124 /* we've run out of HW */
125 if (mec >= adev->gfx.mec.num_mec)
126 break;
127
128 if (adev->gfx.mec.num_mec > 1) {
129 /* policy: amdgpu owns the first two queues of the first MEC */
130 if (mec == 0 && queue < 2)
131 set_bit(i, adev->gfx.mec.queue_bitmap);
132 } else {
133 /* policy: amdgpu owns all queues in the first pipe */
134 if (mec == 0 && pipe == 0)
135 set_bit(i, adev->gfx.mec.queue_bitmap);
136 }
137 }
138
139 /* update the number of active compute rings */
140 adev->gfx.num_compute_rings =
141 bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
142
143 /* If you hit this case and edited the policy, you probably just
144 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
145 if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
146 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
147}
148
149static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
150 struct amdgpu_ring *ring)
151{
152 int queue_bit;
153 int mec, pipe, queue;
154
155 queue_bit = adev->gfx.mec.num_mec
156 * adev->gfx.mec.num_pipe_per_mec
157 * adev->gfx.mec.num_queue_per_pipe;
158
159 while (queue_bit-- >= 0) {
160 if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
161 continue;
162
163 amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
164
165 /* Using pipes 2/3 from MEC 2 seems cause problems */
166 if (mec == 1 && pipe > 1)
167 continue;
168
169 ring->me = mec + 1;
170 ring->pipe = pipe;
171 ring->queue = queue;
172
173 return 0;
174 }
175
176 dev_err(adev->dev, "Failed to find a queue for KIQ\n");
177 return -EINVAL;
178}
179
180int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
181 struct amdgpu_ring *ring,
182 struct amdgpu_irq_src *irq)
183{
184 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
185 int r = 0;
186
187 mutex_init(&kiq->ring_mutex);
188
189 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
190 if (r)
191 return r;
192
193 ring->adev = NULL;
194 ring->ring_obj = NULL;
195 ring->use_doorbell = true;
196 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
197
198 r = amdgpu_gfx_kiq_acquire(adev, ring);
199 if (r)
200 return r;
201
202 ring->eop_gpu_addr = kiq->eop_gpu_addr;
203 sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
204 r = amdgpu_ring_init(adev, ring, 1024,
205 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
206 if (r)
207 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
208
209 return r;
210}
211
212void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
213 struct amdgpu_irq_src *irq)
214{
215 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
216 amdgpu_ring_fini(ring);
217}
218
219void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
220{
221 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
222
223 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
224}
225
226int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
227 unsigned hpd_size)
228{
229 int r;
230 u32 *hpd;
231 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
232
233 r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
234 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
235 &kiq->eop_gpu_addr, (void **)&hpd);
236 if (r) {
237 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
238 return r;
239 }
240
241 memset(hpd, 0, hpd_size);
242
243 r = amdgpu_bo_reserve(kiq->eop_obj, true);
244 if (unlikely(r != 0))
245 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
246 amdgpu_bo_kunmap(kiq->eop_obj);
247 amdgpu_bo_unreserve(kiq->eop_obj);
248
249 return 0;
250}
251
252/* create MQD for each compute queue */
253int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
254 unsigned mqd_size)
255{
256 struct amdgpu_ring *ring = NULL;
257 int r, i;
258
259 /* create MQD for KIQ */
260 ring = &adev->gfx.kiq.ring;
261 if (!ring->mqd_obj) {
262 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
263 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
264 &ring->mqd_gpu_addr, &ring->mqd_ptr);
265 if (r) {
266 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
267 return r;
268 }
269
270 /* prepare MQD backup */
271 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
272 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
273 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
274 }
275
276 /* create MQD for each KCQ */
277 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
278 ring = &adev->gfx.compute_ring[i];
279 if (!ring->mqd_obj) {
280 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
281 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
282 &ring->mqd_gpu_addr, &ring->mqd_ptr);
283 if (r) {
284 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
285 return r;
286 }
287
288 /* prepare MQD backup */
289 adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
290 if (!adev->gfx.mec.mqd_backup[i])
291 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
292 }
293 }
294
295 return 0;
296}
297
298void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
299{
300 struct amdgpu_ring *ring = NULL;
301 int i;
302
303 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
304 ring = &adev->gfx.compute_ring[i];
305 kfree(adev->gfx.mec.mqd_backup[i]);
306 amdgpu_bo_free_kernel(&ring->mqd_obj,
307 &ring->mqd_gpu_addr,
308 &ring->mqd_ptr);
309 }
310
311 ring = &adev->gfx.kiq.ring;
312 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
313 amdgpu_bo_free_kernel(&ring->mqd_obj,
314 &ring->mqd_gpu_addr,
315 &ring->mqd_ptr);
316}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index e02044086445..1f279050d334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
30void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, 30void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
31 unsigned max_sh); 31 unsigned max_sh);
32 32
33void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
34
35int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
36 struct amdgpu_ring *ring,
37 struct amdgpu_irq_src *irq);
38
39void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
40 struct amdgpu_irq_src *irq);
41
42void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
43int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
44 unsigned hpd_size);
45
46int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
47 unsigned mqd_size);
48void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
49
50/**
51 * amdgpu_gfx_create_bitmask - create a bitmask
52 *
53 * @bit_width: length of the mask
54 *
55 * create a variable length bit mask.
56 * Returns the bitmask.
57 */
58static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
59{
60 return (u32)((1ULL << bit_width) - 1);
61}
62
63static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
64 int mec, int pipe, int queue)
65{
66 int bit = 0;
67
68 bit += mec * adev->gfx.mec.num_pipe_per_mec
69 * adev->gfx.mec.num_queue_per_pipe;
70 bit += pipe * adev->gfx.mec.num_queue_per_pipe;
71 bit += queue;
72
73 return bit;
74}
75
76static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
77 int *mec, int *pipe, int *queue)
78{
79 *queue = bit % adev->gfx.mec.num_queue_per_pipe;
80 *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
81 % adev->gfx.mec.num_pipe_per_mec;
82 *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
83 / adev->gfx.mec.num_pipe_per_mec;
84
85}
86static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
87 int mec, int pipe, int queue)
88{
89 return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
90 adev->gfx.mec.queue_bitmap);
91}
92
33#endif 93#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 6e4ae0d983c2..f774b3f497d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
121{ 121{
122 struct amdgpu_device *adev = ring->adev; 122 struct amdgpu_device *adev = ring->adev;
123 struct amdgpu_ib *ib = &ibs[0]; 123 struct amdgpu_ib *ib = &ibs[0];
124 struct dma_fence *tmp = NULL;
124 bool skip_preamble, need_ctx_switch; 125 bool skip_preamble, need_ctx_switch;
125 unsigned patch_offset = ~0; 126 unsigned patch_offset = ~0;
126 struct amdgpu_vm *vm; 127 struct amdgpu_vm *vm;
@@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
160 dev_err(adev->dev, "scheduling IB failed (%d).\n", r); 161 dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
161 return r; 162 return r;
162 } 163 }
163 if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) 164
165 if (ring->funcs->emit_pipeline_sync && job &&
166 ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
167 amdgpu_vm_need_pipeline_sync(ring, job))) {
164 amdgpu_ring_emit_pipeline_sync(ring); 168 amdgpu_ring_emit_pipeline_sync(ring);
169 dma_fence_put(tmp);
170 }
171
172 if (ring->funcs->insert_start)
173 ring->funcs->insert_start(ring);
165 174
166 if (vm) { 175 if (vm) {
167 r = amdgpu_vm_flush(ring, job); 176 r = amdgpu_vm_flush(ring, job);
@@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
188 status |= AMDGPU_HAVE_CTX_SWITCH; 197 status |= AMDGPU_HAVE_CTX_SWITCH;
189 status |= job->preamble_status; 198 status |= job->preamble_status;
190 199
191 if (vm)
192 status |= AMDGPU_VM_DOMAIN;
193 amdgpu_ring_emit_cntxcntl(ring, status); 200 amdgpu_ring_emit_cntxcntl(ring, status);
194 } 201 }
195 202
@@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
208 need_ctx_switch = false; 215 need_ctx_switch = false;
209 } 216 }
210 217
218 if (ring->funcs->emit_tmz)
219 amdgpu_ring_emit_tmz(ring, false);
220
211 if (ring->funcs->emit_hdp_invalidate 221 if (ring->funcs->emit_hdp_invalidate
212#ifdef CONFIG_X86_64 222#ifdef CONFIG_X86_64
213 && !(adev->flags & AMD_IS_APU) 223 && !(adev->flags & AMD_IS_APU)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index a3da1a122fc8..3de8e74e5b3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -62,8 +62,9 @@ enum amdgpu_ih_clientid
62 AMDGPU_IH_CLIENTID_MP0 = 0x1e, 62 AMDGPU_IH_CLIENTID_MP0 = 0x1e,
63 AMDGPU_IH_CLIENTID_MP1 = 0x1f, 63 AMDGPU_IH_CLIENTID_MP1 = 0x1f,
64 64
65 AMDGPU_IH_CLIENTID_MAX 65 AMDGPU_IH_CLIENTID_MAX,
66 66
67 AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD
67}; 68};
68 69
69#define AMDGPU_IH_CLIENTID_LEGACY 0 70#define AMDGPU_IH_CLIENTID_LEGACY 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index a6b7e367a860..62da6c5c6095 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
83 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, 83 struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
84 reset_work); 84 reset_work);
85 85
86 amdgpu_gpu_reset(adev); 86 if (!amdgpu_sriov_vf(adev))
87 amdgpu_gpu_reset(adev);
87} 88}
88 89
89/* Disable *all* interrupts */ 90/* Disable *all* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 7570f2439a11..3d641e10e6b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
36 job->base.sched->name, 36 job->base.sched->name,
37 atomic_read(&job->ring->fence_drv.last_seq), 37 atomic_read(&job->ring->fence_drv.last_seq),
38 job->ring->fence_drv.sync_seq); 38 job->ring->fence_drv.sync_seq);
39 amdgpu_gpu_reset(job->adev); 39
40 if (amdgpu_sriov_vf(job->adev))
41 amdgpu_sriov_gpu_reset(job->adev, job);
42 else
43 amdgpu_gpu_reset(job->adev);
40} 44}
41 45
42int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 46int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
57 (*job)->vm = vm; 61 (*job)->vm = vm;
58 (*job)->ibs = (void *)&(*job)[1]; 62 (*job)->ibs = (void *)&(*job)[1];
59 (*job)->num_ibs = num_ibs; 63 (*job)->num_ibs = num_ibs;
60 (*job)->need_pipeline_sync = false;
61 64
62 amdgpu_sync_create(&(*job)->sync); 65 amdgpu_sync_create(&(*job)->sync);
66 amdgpu_sync_create(&(*job)->dep_sync);
67 amdgpu_sync_create(&(*job)->sched_sync);
63 68
64 return 0; 69 return 0;
65} 70}
@@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
98 103
99 dma_fence_put(job->fence); 104 dma_fence_put(job->fence);
100 amdgpu_sync_free(&job->sync); 105 amdgpu_sync_free(&job->sync);
106 amdgpu_sync_free(&job->dep_sync);
107 amdgpu_sync_free(&job->sched_sync);
101 kfree(job); 108 kfree(job);
102} 109}
103 110
@@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job)
107 114
108 dma_fence_put(job->fence); 115 dma_fence_put(job->fence);
109 amdgpu_sync_free(&job->sync); 116 amdgpu_sync_free(&job->sync);
117 amdgpu_sync_free(&job->dep_sync);
118 amdgpu_sync_free(&job->sched_sync);
110 kfree(job); 119 kfree(job);
111} 120}
112 121
@@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
138 struct amdgpu_job *job = to_amdgpu_job(sched_job); 147 struct amdgpu_job *job = to_amdgpu_job(sched_job);
139 struct amdgpu_vm *vm = job->vm; 148 struct amdgpu_vm *vm = job->vm;
140 149
141 struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); 150 struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
151 int r;
142 152
153 if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) {
154 r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
155 if (r)
156 DRM_ERROR("Error adding fence to sync (%d)\n", r);
157 }
158 if (!fence)
159 fence = amdgpu_sync_get_fence(&job->sync);
143 while (fence == NULL && vm && !job->vm_id) { 160 while (fence == NULL && vm && !job->vm_id) {
144 struct amdgpu_ring *ring = job->ring; 161 struct amdgpu_ring *ring = job->ring;
145 int r;
146 162
147 r = amdgpu_vm_grab_id(vm, ring, &job->sync, 163 r = amdgpu_vm_grab_id(vm, ring, &job->sync,
148 &job->base.s_fence->finished, 164 &job->base.s_fence->finished,
@@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
153 fence = amdgpu_sync_get_fence(&job->sync); 169 fence = amdgpu_sync_get_fence(&job->sync);
154 } 170 }
155 171
156 if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
157 job->need_pipeline_sync = true;
158
159 return fence; 172 return fence;
160} 173}
161 174
@@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
163{ 176{
164 struct dma_fence *fence = NULL; 177 struct dma_fence *fence = NULL;
165 struct amdgpu_job *job; 178 struct amdgpu_job *job;
179 struct amdgpu_fpriv *fpriv = NULL;
166 int r; 180 int r;
167 181
168 if (!sched_job) { 182 if (!sched_job) {
@@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
174 BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); 188 BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
175 189
176 trace_amdgpu_sched_run_job(job); 190 trace_amdgpu_sched_run_job(job);
177 r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); 191 if (job->vm)
178 if (r) 192 fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
179 DRM_ERROR("Error scheduling IBs (%d)\n", r); 193 /* skip ib schedule when vram is lost */
180 194 if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
195 DRM_ERROR("Skip scheduling IBs!\n");
196 else {
197 r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
198 if (r)
199 DRM_ERROR("Error scheduling IBs (%d)\n", r);
200 }
181 /* if gpu reset, hw fence will be replaced here */ 201 /* if gpu reset, hw fence will be replaced here */
182 dma_fence_put(job->fence); 202 dma_fence_put(job->fence);
183 job->fence = dma_fence_get(fence); 203 job->fence = dma_fence_get(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 96c341670782..b0b23101d1c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
87 struct amdgpu_device *adev; 87 struct amdgpu_device *adev;
88 int r, acpi_status; 88 int r, acpi_status;
89 89
90#ifdef CONFIG_DRM_AMDGPU_SI
91 if (!amdgpu_si_support) {
92 switch (flags & AMD_ASIC_MASK) {
93 case CHIP_TAHITI:
94 case CHIP_PITCAIRN:
95 case CHIP_VERDE:
96 case CHIP_OLAND:
97 case CHIP_HAINAN:
98 dev_info(dev->dev,
99 "SI support provided by radeon.\n");
100 dev_info(dev->dev,
101 "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
102 );
103 return -ENODEV;
104 }
105 }
106#endif
107#ifdef CONFIG_DRM_AMDGPU_CIK
108 if (!amdgpu_cik_support) {
109 switch (flags & AMD_ASIC_MASK) {
110 case CHIP_KAVERI:
111 case CHIP_BONAIRE:
112 case CHIP_HAWAII:
113 case CHIP_KABINI:
114 case CHIP_MULLINS:
115 dev_info(dev->dev,
116 "CIK support provided by radeon.\n");
117 dev_info(dev->dev,
118 "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
119 );
120 return -ENODEV;
121 }
122 }
123#endif
124
90 adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); 125 adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
91 if (adev == NULL) { 126 if (adev == NULL) {
92 return -ENOMEM; 127 return -ENOMEM;
@@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
235static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 270static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
236{ 271{
237 struct amdgpu_device *adev = dev->dev_private; 272 struct amdgpu_device *adev = dev->dev_private;
273 struct amdgpu_fpriv *fpriv = filp->driver_priv;
238 struct drm_amdgpu_info *info = data; 274 struct drm_amdgpu_info *info = data;
239 struct amdgpu_mode_info *minfo = &adev->mode_info; 275 struct amdgpu_mode_info *minfo = &adev->mode_info;
240 void __user *out = (void __user *)(uintptr_t)info->return_pointer; 276 void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
247 283
248 if (!info->return_size || !info->return_pointer) 284 if (!info->return_size || !info->return_pointer)
249 return -EINVAL; 285 return -EINVAL;
286 if (amdgpu_kms_vram_lost(adev, fpriv))
287 return -ENODEV;
250 288
251 switch (info->query) { 289 switch (info->query) {
252 case AMDGPU_INFO_ACCEL_WORKING: 290 case AMDGPU_INFO_ACCEL_WORKING:
@@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
319 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; 357 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
320 ib_size_alignment = 1; 358 ib_size_alignment = 1;
321 break; 359 break;
360 case AMDGPU_HW_IP_VCN_DEC:
361 type = AMD_IP_BLOCK_TYPE_VCN;
362 ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
363 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
364 ib_size_alignment = 16;
365 break;
366 case AMDGPU_HW_IP_VCN_ENC:
367 type = AMD_IP_BLOCK_TYPE_VCN;
368 for (i = 0; i < adev->vcn.num_enc_rings; i++)
369 ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
370 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
371 ib_size_alignment = 1;
372 break;
322 default: 373 default:
323 return -EINVAL; 374 return -EINVAL;
324 } 375 }
@@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
361 case AMDGPU_HW_IP_UVD_ENC: 412 case AMDGPU_HW_IP_UVD_ENC:
362 type = AMD_IP_BLOCK_TYPE_UVD; 413 type = AMD_IP_BLOCK_TYPE_UVD;
363 break; 414 break;
415 case AMDGPU_HW_IP_VCN_DEC:
416 case AMDGPU_HW_IP_VCN_ENC:
417 type = AMD_IP_BLOCK_TYPE_VCN;
418 break;
364 default: 419 default:
365 return -EINVAL; 420 return -EINVAL;
366 } 421 }
@@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
397 case AMDGPU_INFO_NUM_EVICTIONS: 452 case AMDGPU_INFO_NUM_EVICTIONS:
398 ui64 = atomic64_read(&adev->num_evictions); 453 ui64 = atomic64_read(&adev->num_evictions);
399 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 454 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
455 case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
456 ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
457 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
400 case AMDGPU_INFO_VRAM_USAGE: 458 case AMDGPU_INFO_VRAM_USAGE:
401 ui64 = atomic64_read(&adev->vram_usage); 459 ui64 = atomic64_read(&adev->vram_usage);
402 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 460 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
@@ -536,6 +594,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
536 dev_info.cu_active_number = adev->gfx.cu_info.number; 594 dev_info.cu_active_number = adev->gfx.cu_info.number;
537 dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; 595 dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
538 dev_info.ce_ram_size = adev->gfx.ce_ram_size; 596 dev_info.ce_ram_size = adev->gfx.ce_ram_size;
597 memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
598 sizeof(adev->gfx.cu_info.ao_cu_bitmap));
539 memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], 599 memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
540 sizeof(adev->gfx.cu_info.bitmap)); 600 sizeof(adev->gfx.cu_info.bitmap));
541 dev_info.vram_type = adev->mc.vram_type; 601 dev_info.vram_type = adev->mc.vram_type;
@@ -730,6 +790,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
730 vga_switcheroo_process_delayed_switch(); 790 vga_switcheroo_process_delayed_switch();
731} 791}
732 792
793bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
794 struct amdgpu_fpriv *fpriv)
795{
796 return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
797}
798
733/** 799/**
734 * amdgpu_driver_open_kms - drm callback for open 800 * amdgpu_driver_open_kms - drm callback for open
735 * 801 *
@@ -757,7 +823,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
757 goto out_suspend; 823 goto out_suspend;
758 } 824 }
759 825
760 r = amdgpu_vm_init(adev, &fpriv->vm); 826 r = amdgpu_vm_init(adev, &fpriv->vm,
827 AMDGPU_VM_CONTEXT_GFX);
761 if (r) { 828 if (r) {
762 kfree(fpriv); 829 kfree(fpriv);
763 goto out_suspend; 830 goto out_suspend;
@@ -782,6 +849,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
782 849
783 amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); 850 amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
784 851
852 fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
785 file_priv->driver_priv = fpriv; 853 file_priv->driver_priv = fpriv;
786 854
787out_suspend: 855out_suspend:
@@ -814,8 +882,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
814 882
815 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); 883 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
816 884
817 amdgpu_uvd_free_handles(adev, file_priv); 885 if (adev->asic_type != CHIP_RAVEN) {
818 amdgpu_vce_free_handles(adev, file_priv); 886 amdgpu_uvd_free_handles(adev, file_priv);
887 amdgpu_vce_free_handles(adev, file_priv);
888 }
819 889
820 amdgpu_vm_bo_rmv(adev, fpriv->prt_va); 890 amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
821 891
@@ -945,50 +1015,10 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
945 amdgpu_irq_put(adev, &adev->crtc_irq, idx); 1015 amdgpu_irq_put(adev, &adev->crtc_irq, idx);
946} 1016}
947 1017
948/**
949 * amdgpu_get_vblank_timestamp_kms - get vblank timestamp
950 *
951 * @dev: drm dev pointer
952 * @crtc: crtc to get the timestamp for
953 * @max_error: max error
954 * @vblank_time: time value
955 * @flags: flags passed to the driver
956 *
957 * Gets the timestamp on the requested crtc based on the
958 * scanout position. (all asics).
959 * Returns postive status flags on success, negative error on failure.
960 */
961int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
962 int *max_error,
963 struct timeval *vblank_time,
964 unsigned flags)
965{
966 struct drm_crtc *crtc;
967 struct amdgpu_device *adev = dev->dev_private;
968
969 if (pipe >= dev->num_crtcs) {
970 DRM_ERROR("Invalid crtc %u\n", pipe);
971 return -EINVAL;
972 }
973
974 /* Get associated drm_crtc: */
975 crtc = &adev->mode_info.crtcs[pipe]->base;
976 if (!crtc) {
977 /* This can occur on driver load if some component fails to
978 * initialize completely and driver is unloaded */
979 DRM_ERROR("Uninitialized crtc %d\n", pipe);
980 return -EINVAL;
981 }
982
983 /* Helper routine in DRM core does all the work: */
984 return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
985 vblank_time, flags,
986 &crtc->hwmode);
987}
988
989const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { 1018const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
990 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), 1019 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
991 DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), 1020 DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
1021 DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
992 DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), 1022 DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
993 /* KMS */ 1023 /* KMS */
994 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), 1024 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index dbd10618ec20..43a9d3aec6c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -534,6 +534,9 @@ struct amdgpu_framebuffer {
534 ((em) == ATOM_ENCODER_MODE_DP_MST)) 534 ((em) == ATOM_ENCODER_MODE_DP_MST))
535 535
536/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ 536/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
537#define DRM_SCANOUTPOS_VALID (1 << 0)
538#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1)
539#define DRM_SCANOUTPOS_ACCURATE (1 << 2)
537#define USE_REAL_VBLANKSTART (1 << 30) 540#define USE_REAL_VBLANKSTART (1 << 30)
538#define GET_DISTANCE_TO_VBLANKSTART (1 << 31) 541#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
539 542
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 365883d7948d..8ee69652be8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
960 return -EINVAL; 960 return -EINVAL;
961 961
962 /* hurrah the memory is not visible ! */ 962 /* hurrah the memory is not visible ! */
963 atomic64_inc(&adev->num_vram_cpu_page_faults);
963 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); 964 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
964 lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; 965 lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
965 for (i = 0; i < abo->placement.num_placement; i++) { 966 for (i = 0; i < abo->placement.num_placement; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index f5ae871aa11c..b7e1c026c0c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle)
72 case CHIP_CARRIZO: 72 case CHIP_CARRIZO:
73 case CHIP_STONEY: 73 case CHIP_STONEY:
74 case CHIP_VEGA10: 74 case CHIP_VEGA10:
75 case CHIP_RAVEN:
75 adev->pp_enabled = true; 76 adev->pp_enabled = true;
76 if (amdgpu_create_pp_handle(adev)) 77 if (amdgpu_create_pp_handle(adev))
77 return -EINVAL; 78 return -EINVAL;
@@ -187,6 +188,9 @@ static int amdgpu_pp_hw_fini(void *handle)
187 int ret = 0; 188 int ret = 0;
188 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 189 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
189 190
191 if (adev->pp_enabled && adev->pm.dpm_enabled)
192 amdgpu_pm_sysfs_fini(adev);
193
190 if (adev->powerplay.ip_funcs->hw_fini) 194 if (adev->powerplay.ip_funcs->hw_fini)
191 ret = adev->powerplay.ip_funcs->hw_fini( 195 ret = adev->powerplay.ip_funcs->hw_fini(
192 adev->powerplay.pp_handle); 196 adev->powerplay.pp_handle);
@@ -205,10 +209,9 @@ static void amdgpu_pp_late_fini(void *handle)
205 adev->powerplay.ip_funcs->late_fini( 209 adev->powerplay.ip_funcs->late_fini(
206 adev->powerplay.pp_handle); 210 adev->powerplay.pp_handle);
207 211
208 if (adev->pp_enabled && adev->pm.dpm_enabled)
209 amdgpu_pm_sysfs_fini(adev);
210 212
211 amd_powerplay_destroy(adev->powerplay.pp_handle); 213 if (adev->pp_enabled)
214 amd_powerplay_destroy(adev->powerplay.pp_handle);
212} 215}
213 216
214static int amdgpu_pp_suspend(void *handle) 217static int amdgpu_pp_suspend(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index ac5e92e5d59d..4083be61b328 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -24,12 +24,13 @@
24 */ 24 */
25 25
26#include <linux/firmware.h> 26#include <linux/firmware.h>
27#include "drmP.h" 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_psp.h" 29#include "amdgpu_psp.h"
30#include "amdgpu_ucode.h" 30#include "amdgpu_ucode.h"
31#include "soc15_common.h" 31#include "soc15_common.h"
32#include "psp_v3_1.h" 32#include "psp_v3_1.h"
33#include "psp_v10_0.h"
33 34
34static void psp_set_funcs(struct amdgpu_device *adev); 35static void psp_set_funcs(struct amdgpu_device *adev);
35 36
@@ -61,6 +62,12 @@ static int psp_sw_init(void *handle)
61 psp->compare_sram_data = psp_v3_1_compare_sram_data; 62 psp->compare_sram_data = psp_v3_1_compare_sram_data;
62 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; 63 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
63 break; 64 break;
65 case CHIP_RAVEN:
66 psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
67 psp->ring_init = psp_v10_0_ring_init;
68 psp->cmd_submit = psp_v10_0_cmd_submit;
69 psp->compare_sram_data = psp_v10_0_compare_sram_data;
70 break;
64 default: 71 default:
65 return -EINVAL; 72 return -EINVAL;
66 } 73 }
@@ -145,8 +152,8 @@ static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
145 uint64_t tmr_mc, uint32_t size) 152 uint64_t tmr_mc, uint32_t size)
146{ 153{
147 cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; 154 cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
148 cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; 155 cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
149 cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); 156 cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
150 cmd->cmd.cmd_setup_tmr.buf_size = size; 157 cmd->cmd.cmd_setup_tmr.buf_size = size;
151} 158}
152 159
@@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp)
230 int ret; 237 int ret;
231 struct psp_gfx_cmd_resp *cmd; 238 struct psp_gfx_cmd_resp *cmd;
232 239
240 /* If PSP version doesn't match ASD version, asd loading will be failed.
241 * add workaround to bypass it for sriov now.
242 * TODO: add version check to make it common
243 */
244 if (amdgpu_sriov_vf(psp->adev))
245 return 0;
246
233 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 247 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
234 if (!cmd) 248 if (!cmd)
235 return -ENOMEM; 249 return -ENOMEM;
@@ -319,14 +333,11 @@ static int psp_load_fw(struct amdgpu_device *adev)
319{ 333{
320 int ret; 334 int ret;
321 struct psp_context *psp = &adev->psp; 335 struct psp_context *psp = &adev->psp;
322 struct psp_gfx_cmd_resp *cmd;
323 336
324 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); 337 psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
325 if (!cmd) 338 if (!psp->cmd)
326 return -ENOMEM; 339 return -ENOMEM;
327 340
328 psp->cmd = cmd;
329
330 ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, 341 ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
331 AMDGPU_GEM_DOMAIN_GTT, 342 AMDGPU_GEM_DOMAIN_GTT,
332 &psp->fw_pri_bo, 343 &psp->fw_pri_bo,
@@ -365,8 +376,6 @@ static int psp_load_fw(struct amdgpu_device *adev)
365 if (ret) 376 if (ret)
366 goto failed_mem; 377 goto failed_mem;
367 378
368 kfree(cmd);
369
370 return 0; 379 return 0;
371 380
372failed_mem: 381failed_mem:
@@ -376,7 +385,8 @@ failed_mem1:
376 amdgpu_bo_free_kernel(&psp->fw_pri_bo, 385 amdgpu_bo_free_kernel(&psp->fw_pri_bo,
377 &psp->fw_pri_mc_addr, &psp->fw_pri_buf); 386 &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
378failed: 387failed:
379 kfree(cmd); 388 kfree(psp->cmd);
389 psp->cmd = NULL;
380 return ret; 390 return ret;
381} 391}
382 392
@@ -436,6 +446,9 @@ static int psp_hw_fini(void *handle)
436 amdgpu_bo_free_kernel(&psp->fence_buf_bo, 446 amdgpu_bo_free_kernel(&psp->fence_buf_bo,
437 &psp->fence_buf_mc_addr, &psp->fence_buf); 447 &psp->fence_buf_mc_addr, &psp->fence_buf);
438 448
449 kfree(psp->cmd);
450 psp->cmd = NULL;
451
439 return 0; 452 return 0;
440} 453}
441 454
@@ -542,3 +555,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block =
542 .rev = 0, 555 .rev = 0,
543 .funcs = &psp_ip_funcs, 556 .funcs = &psp_ip_funcs,
544}; 557};
558
559const struct amdgpu_ip_block_version psp_v10_0_ip_block =
560{
561 .type = AMD_IP_BLOCK_TYPE_PSP,
562 .major = 10,
563 .minor = 0,
564 .rev = 0,
565 .funcs = &psp_ip_funcs,
566};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 0301e4e0b297..1a1c8b469f93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
138extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, 138extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
139 uint32_t field_val, uint32_t mask, bool check_changed); 139 uint32_t field_val, uint32_t mask, bool check_changed);
140 140
141extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
142
141#endif 143#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
new file mode 100644
index 000000000000..befc09b68543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -0,0 +1,299 @@
1/*
2 * Copyright 2017 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Andres Rodriguez
23 */
24
25#include "amdgpu.h"
26#include "amdgpu_ring.h"
27
28static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
29 int hw_ip)
30{
31 if (!mapper)
32 return -EINVAL;
33
34 if (hw_ip > AMDGPU_MAX_IP_NUM)
35 return -EINVAL;
36
37 mapper->hw_ip = hw_ip;
38 mutex_init(&mapper->lock);
39
40 memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
41
42 return 0;
43}
44
45static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
46 int ring)
47{
48 return mapper->queue_map[ring];
49}
50
51static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
52 int ring, struct amdgpu_ring *pring)
53{
54 if (WARN_ON(mapper->queue_map[ring])) {
55 DRM_ERROR("Un-expected ring re-map\n");
56 return -EINVAL;
57 }
58
59 mapper->queue_map[ring] = pring;
60
61 return 0;
62}
63
64static int amdgpu_identity_map(struct amdgpu_device *adev,
65 struct amdgpu_queue_mapper *mapper,
66 int ring,
67 struct amdgpu_ring **out_ring)
68{
69 switch (mapper->hw_ip) {
70 case AMDGPU_HW_IP_GFX:
71 *out_ring = &adev->gfx.gfx_ring[ring];
72 break;
73 case AMDGPU_HW_IP_COMPUTE:
74 *out_ring = &adev->gfx.compute_ring[ring];
75 break;
76 case AMDGPU_HW_IP_DMA:
77 *out_ring = &adev->sdma.instance[ring].ring;
78 break;
79 case AMDGPU_HW_IP_UVD:
80 *out_ring = &adev->uvd.ring;
81 break;
82 case AMDGPU_HW_IP_VCE:
83 *out_ring = &adev->vce.ring[ring];
84 break;
85 case AMDGPU_HW_IP_UVD_ENC:
86 *out_ring = &adev->uvd.ring_enc[ring];
87 break;
88 case AMDGPU_HW_IP_VCN_DEC:
89 *out_ring = &adev->vcn.ring_dec;
90 break;
91 case AMDGPU_HW_IP_VCN_ENC:
92 *out_ring = &adev->vcn.ring_enc[ring];
93 break;
94 default:
95 *out_ring = NULL;
96 DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
97 return -EINVAL;
98 }
99
100 return amdgpu_update_cached_map(mapper, ring, *out_ring);
101}
102
103static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
104{
105 switch (hw_ip) {
106 case AMDGPU_HW_IP_GFX:
107 return AMDGPU_RING_TYPE_GFX;
108 case AMDGPU_HW_IP_COMPUTE:
109 return AMDGPU_RING_TYPE_COMPUTE;
110 case AMDGPU_HW_IP_DMA:
111 return AMDGPU_RING_TYPE_SDMA;
112 case AMDGPU_HW_IP_UVD:
113 return AMDGPU_RING_TYPE_UVD;
114 case AMDGPU_HW_IP_VCE:
115 return AMDGPU_RING_TYPE_VCE;
116 default:
117 DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
118 return -1;
119 }
120}
121
122static int amdgpu_lru_map(struct amdgpu_device *adev,
123 struct amdgpu_queue_mapper *mapper,
124 int user_ring,
125 struct amdgpu_ring **out_ring)
126{
127 int r, i, j;
128 int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
129 int ring_blacklist[AMDGPU_MAX_RINGS];
130 struct amdgpu_ring *ring;
131
132 /* 0 is a valid ring index, so initialize to -1 */
133 memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
134
135 for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
136 ring = mapper->queue_map[i];
137 if (ring)
138 ring_blacklist[j++] = ring->idx;
139 }
140
141 r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
142 j, out_ring);
143 if (r)
144 return r;
145
146 return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
147}
148
149/**
150 * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
151 *
152 * @adev: amdgpu_device pointer
153 * @mgr: amdgpu_queue_mgr structure holding queue information
154 *
155 * Initialize the the selected @mgr (all asics).
156 *
157 * Returns 0 on success, error on failure.
158 */
159int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
160 struct amdgpu_queue_mgr *mgr)
161{
162 int i, r;
163
164 if (!adev || !mgr)
165 return -EINVAL;
166
167 memset(mgr, 0, sizeof(*mgr));
168
169 for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
170 r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
171 if (r)
172 return r;
173 }
174
175 return 0;
176}
177
178/**
179 * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
180 *
181 * @adev: amdgpu_device pointer
182 * @mgr: amdgpu_queue_mgr structure holding queue information
183 *
184 * De-initialize the the selected @mgr (all asics).
185 *
186 * Returns 0 on success, error on failure.
187 */
188int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
189 struct amdgpu_queue_mgr *mgr)
190{
191 return 0;
192}
193
194/**
195 * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
196 *
197 * @adev: amdgpu_device pointer
198 * @mgr: amdgpu_queue_mgr structure holding queue information
199 * @hw_ip: HW IP enum
200 * @instance: HW instance
201 * @ring: user ring id
202 * @our_ring: pointer to mapped amdgpu_ring
203 *
204 * Map a userspace ring id to an appropriate kernel ring. Different
205 * policies are configurable at a HW IP level.
206 *
207 * Returns 0 on success, error on failure.
208 */
209int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
210 struct amdgpu_queue_mgr *mgr,
211 int hw_ip, int instance, int ring,
212 struct amdgpu_ring **out_ring)
213{
214 int r, ip_num_rings;
215 struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
216
217 if (!adev || !mgr || !out_ring)
218 return -EINVAL;
219
220 if (hw_ip >= AMDGPU_MAX_IP_NUM)
221 return -EINVAL;
222
223 if (ring >= AMDGPU_MAX_RINGS)
224 return -EINVAL;
225
226 /* Right now all IPs have only one instance - multiple rings. */
227 if (instance != 0) {
228 DRM_ERROR("invalid ip instance: %d\n", instance);
229 return -EINVAL;
230 }
231
232 switch (hw_ip) {
233 case AMDGPU_HW_IP_GFX:
234 ip_num_rings = adev->gfx.num_gfx_rings;
235 break;
236 case AMDGPU_HW_IP_COMPUTE:
237 ip_num_rings = adev->gfx.num_compute_rings;
238 break;
239 case AMDGPU_HW_IP_DMA:
240 ip_num_rings = adev->sdma.num_instances;
241 break;
242 case AMDGPU_HW_IP_UVD:
243 ip_num_rings = 1;
244 break;
245 case AMDGPU_HW_IP_VCE:
246 ip_num_rings = adev->vce.num_rings;
247 break;
248 case AMDGPU_HW_IP_UVD_ENC:
249 ip_num_rings = adev->uvd.num_enc_rings;
250 break;
251 case AMDGPU_HW_IP_VCN_DEC:
252 ip_num_rings = 1;
253 break;
254 case AMDGPU_HW_IP_VCN_ENC:
255 ip_num_rings = adev->vcn.num_enc_rings;
256 break;
257 default:
258 DRM_ERROR("unknown ip type: %d\n", hw_ip);
259 return -EINVAL;
260 }
261
262 if (ring >= ip_num_rings) {
263 DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
264 ring, ip_num_rings, hw_ip);
265 return -EINVAL;
266 }
267
268 mutex_lock(&mapper->lock);
269
270 *out_ring = amdgpu_get_cached_map(mapper, ring);
271 if (*out_ring) {
272 /* cache hit */
273 r = 0;
274 goto out_unlock;
275 }
276
277 switch (mapper->hw_ip) {
278 case AMDGPU_HW_IP_GFX:
279 case AMDGPU_HW_IP_UVD:
280 case AMDGPU_HW_IP_VCE:
281 case AMDGPU_HW_IP_UVD_ENC:
282 case AMDGPU_HW_IP_VCN_DEC:
283 case AMDGPU_HW_IP_VCN_ENC:
284 r = amdgpu_identity_map(adev, mapper, ring, out_ring);
285 break;
286 case AMDGPU_HW_IP_DMA:
287 case AMDGPU_HW_IP_COMPUTE:
288 r = amdgpu_lru_map(adev, mapper, ring, out_ring);
289 break;
290 default:
291 *out_ring = NULL;
292 r = -EINVAL;
293 DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
294 }
295
296out_unlock:
297 mutex_unlock(&mapper->lock);
298 return r;
299}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a85db0c0bc3..75165e07b1cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
135 135
136 if (ring->funcs->end_use) 136 if (ring->funcs->end_use)
137 ring->funcs->end_use(ring); 137 ring->funcs->end_use(ring);
138
139 amdgpu_ring_lru_touch(ring->adev, ring);
138} 140}
139 141
140/** 142/**
@@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
253 } 255 }
254 256
255 ring->max_dw = max_dw; 257 ring->max_dw = max_dw;
258 INIT_LIST_HEAD(&ring->lru_list);
259 amdgpu_ring_lru_touch(adev, ring);
256 260
257 if (amdgpu_debugfs_ring_init(adev, ring)) { 261 if (amdgpu_debugfs_ring_init(adev, ring)) {
258 DRM_ERROR("Failed to register debugfs file for rings !\n"); 262 DRM_ERROR("Failed to register debugfs file for rings !\n");
259 } 263 }
264
260 return 0; 265 return 0;
261} 266}
262 267
@@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
294 ring->adev->rings[ring->idx] = NULL; 299 ring->adev->rings[ring->idx] = NULL;
295} 300}
296 301
302static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
303 struct amdgpu_ring *ring)
304{
305 /* list_move_tail handles the case where ring isn't part of the list */
306 list_move_tail(&ring->lru_list, &adev->ring_lru_list);
307}
308
309static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
310 int *blacklist, int num_blacklist)
311{
312 int i;
313
314 for (i = 0; i < num_blacklist; i++) {
315 if (ring->idx == blacklist[i])
316 return true;
317 }
318
319 return false;
320}
321
322/**
323 * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
324 *
325 * @adev: amdgpu_device pointer
326 * @type: amdgpu_ring_type enum
327 * @blacklist: blacklisted ring ids array
328 * @num_blacklist: number of entries in @blacklist
329 * @ring: output ring
330 *
331 * Retrieve the amdgpu_ring structure for the least recently used ring of
332 * a specific IP block (all asics).
333 * Returns 0 on success, error on failure.
334 */
335int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
336 int num_blacklist, struct amdgpu_ring **ring)
337{
338 struct amdgpu_ring *entry;
339
340 /* List is sorted in LRU order, find first entry corresponding
341 * to the desired HW IP */
342 *ring = NULL;
343 spin_lock(&adev->ring_lru_list_lock);
344 list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
345 if (entry->funcs->type != type)
346 continue;
347
348 if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
349 continue;
350
351 *ring = entry;
352 amdgpu_ring_lru_touch_locked(adev, *ring);
353 break;
354 }
355 spin_unlock(&adev->ring_lru_list_lock);
356
357 if (!*ring) {
358 DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
359 return -EINVAL;
360 }
361
362 return 0;
363}
364
365/**
366 * amdgpu_ring_lru_touch - mark a ring as recently being used
367 *
368 * @adev: amdgpu_device pointer
369 * @ring: ring to touch
370 *
371 * Move @ring to the tail of the lru list
372 */
373void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
374{
375 spin_lock(&adev->ring_lru_list_lock);
376 amdgpu_ring_lru_touch_locked(adev, ring);
377 spin_unlock(&adev->ring_lru_list_lock);
378}
379
297/* 380/*
298 * Debugfs info 381 * Debugfs info
299 */ 382 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 944443c5b90a..bc8dec992f73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -47,7 +47,9 @@ enum amdgpu_ring_type {
47 AMDGPU_RING_TYPE_UVD, 47 AMDGPU_RING_TYPE_UVD,
48 AMDGPU_RING_TYPE_VCE, 48 AMDGPU_RING_TYPE_VCE,
49 AMDGPU_RING_TYPE_KIQ, 49 AMDGPU_RING_TYPE_KIQ,
50 AMDGPU_RING_TYPE_UVD_ENC 50 AMDGPU_RING_TYPE_UVD_ENC,
51 AMDGPU_RING_TYPE_VCN_DEC,
52 AMDGPU_RING_TYPE_VCN_ENC
51}; 53};
52 54
53struct amdgpu_device; 55struct amdgpu_device;
@@ -76,6 +78,7 @@ struct amdgpu_fence_driver {
76int amdgpu_fence_driver_init(struct amdgpu_device *adev); 78int amdgpu_fence_driver_init(struct amdgpu_device *adev);
77void amdgpu_fence_driver_fini(struct amdgpu_device *adev); 79void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
78void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); 80void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
81void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring);
79 82
80int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, 83int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
81 unsigned num_hw_submission); 84 unsigned num_hw_submission);
@@ -130,6 +133,7 @@ struct amdgpu_ring_funcs {
130 int (*test_ib)(struct amdgpu_ring *ring, long timeout); 133 int (*test_ib)(struct amdgpu_ring *ring, long timeout);
131 /* insert NOP packets */ 134 /* insert NOP packets */
132 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); 135 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
136 void (*insert_start)(struct amdgpu_ring *ring);
133 void (*insert_end)(struct amdgpu_ring *ring); 137 void (*insert_end)(struct amdgpu_ring *ring);
134 /* pad the indirect buffer to the necessary number of dw */ 138 /* pad the indirect buffer to the necessary number of dw */
135 void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); 139 void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@@ -142,6 +146,7 @@ struct amdgpu_ring_funcs {
142 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); 146 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
143 void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); 147 void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
144 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); 148 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
149 void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
145}; 150};
146 151
147struct amdgpu_ring { 152struct amdgpu_ring {
@@ -149,6 +154,7 @@ struct amdgpu_ring {
149 const struct amdgpu_ring_funcs *funcs; 154 const struct amdgpu_ring_funcs *funcs;
150 struct amdgpu_fence_driver fence_drv; 155 struct amdgpu_fence_driver fence_drv;
151 struct amd_gpu_scheduler sched; 156 struct amd_gpu_scheduler sched;
157 struct list_head lru_list;
152 158
153 struct amdgpu_bo *ring_obj; 159 struct amdgpu_bo *ring_obj;
154 volatile uint32_t *ring; 160 volatile uint32_t *ring;
@@ -180,6 +186,7 @@ struct amdgpu_ring {
180 u64 cond_exe_gpu_addr; 186 u64 cond_exe_gpu_addr;
181 volatile u32 *cond_exe_cpu_addr; 187 volatile u32 *cond_exe_cpu_addr;
182 unsigned vm_inv_eng; 188 unsigned vm_inv_eng;
189 bool has_compute_vm_bug;
183#if defined(CONFIG_DEBUG_FS) 190#if defined(CONFIG_DEBUG_FS)
184 struct dentry *ent; 191 struct dentry *ent;
185#endif 192#endif
@@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
194 unsigned ring_size, struct amdgpu_irq_src *irq_src, 201 unsigned ring_size, struct amdgpu_irq_src *irq_src,
195 unsigned irq_type); 202 unsigned irq_type);
196void amdgpu_ring_fini(struct amdgpu_ring *ring); 203void amdgpu_ring_fini(struct amdgpu_ring *ring);
204int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
205 int num_blacklist, struct amdgpu_ring **ring);
206void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
197static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) 207static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
198{ 208{
199 int i = 0; 209 int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index ed814e6d0207..a6899180b265 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
298 return NULL; 298 return NULL;
299} 299}
300 300
301int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
302{
303 struct amdgpu_sync_entry *e;
304 struct hlist_node *tmp;
305 int i, r;
306
307 hash_for_each_safe(sync->fences, i, tmp, e, node) {
308 r = dma_fence_wait(e->fence, intr);
309 if (r)
310 return r;
311
312 hash_del(&e->node);
313 dma_fence_put(e->fence);
314 kmem_cache_free(amdgpu_sync_slab, e);
315 }
316
317 return 0;
318}
319
301/** 320/**
302 * amdgpu_sync_free - free the sync object 321 * amdgpu_sync_free - free the sync object
303 * 322 *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 605be266e07f..dc7687993317 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
49struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 49struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
50 struct amdgpu_ring *ring); 50 struct amdgpu_ring *ring);
51struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); 51struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
52int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
52void amdgpu_sync_free(struct amdgpu_sync *sync); 53void amdgpu_sync_free(struct amdgpu_sync *sync);
53int amdgpu_sync_init(void); 54int amdgpu_sync_init(void);
54void amdgpu_sync_fini(void); 55void amdgpu_sync_fini(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5db0230e45c6..c9b131b13ef7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -29,11 +29,11 @@
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> 29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30 * Dave Airlie 30 * Dave Airlie
31 */ 31 */
32#include <ttm/ttm_bo_api.h> 32#include <drm/ttm/ttm_bo_api.h>
33#include <ttm/ttm_bo_driver.h> 33#include <drm/ttm/ttm_bo_driver.h>
34#include <ttm/ttm_placement.h> 34#include <drm/ttm/ttm_placement.h>
35#include <ttm/ttm_module.h> 35#include <drm/ttm/ttm_module.h>
36#include <ttm/ttm_page_alloc.h> 36#include <drm/ttm/ttm_page_alloc.h>
37#include <drm/drmP.h> 37#include <drm/drmP.h>
38#include <drm/amdgpu_drm.h> 38#include <drm/amdgpu_drm.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
@@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
745 return r; 745 return r;
746 } 746 }
747 747
748 spin_lock(&gtt->adev->gtt_list_lock);
748 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); 749 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
749 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; 750 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
750 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, 751 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
@@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
753 if (r) { 754 if (r) {
754 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", 755 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
755 ttm->num_pages, gtt->offset); 756 ttm->num_pages, gtt->offset);
756 return r; 757 goto error_gart_bind;
757 } 758 }
758 spin_lock(&gtt->adev->gtt_list_lock); 759
759 list_add_tail(&gtt->list, &gtt->adev->gtt_list); 760 list_add_tail(&gtt->list, &gtt->adev->gtt_list);
761error_gart_bind:
760 spin_unlock(&gtt->adev->gtt_list_lock); 762 spin_unlock(&gtt->adev->gtt_list_lock);
761 return 0; 763 return r;
762} 764}
763 765
764int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) 766int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
789static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) 791static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
790{ 792{
791 struct amdgpu_ttm_tt *gtt = (void *)ttm; 793 struct amdgpu_ttm_tt *gtt = (void *)ttm;
794 int r;
792 795
793 if (gtt->userptr) 796 if (gtt->userptr)
794 amdgpu_ttm_tt_unpin_userptr(ttm); 797 amdgpu_ttm_tt_unpin_userptr(ttm);
@@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
797 return 0; 800 return 0;
798 801
799 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ 802 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
800 if (gtt->adev->gart.ready)
801 amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
802
803 spin_lock(&gtt->adev->gtt_list_lock); 803 spin_lock(&gtt->adev->gtt_list_lock);
804 r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
805 if (r) {
806 DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
807 gtt->ttm.ttm.num_pages, gtt->offset);
808 goto error_unbind;
809 }
804 list_del_init(&gtt->list); 810 list_del_init(&gtt->list);
811error_unbind:
805 spin_unlock(&gtt->adev->gtt_list_lock); 812 spin_unlock(&gtt->adev->gtt_list_lock);
806 813 return r;
807 return 0;
808} 814}
809 815
810static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) 816static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
@@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1115 /* Change the size here instead of the init above so only lpfn is affected */ 1121 /* Change the size here instead of the init above so only lpfn is affected */
1116 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 1122 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
1117 1123
1118 r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, 1124 r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true,
1119 AMDGPU_GEM_DOMAIN_VRAM, 1125 AMDGPU_GEM_DOMAIN_VRAM,
1120 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1126 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1121 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1127 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
@@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1462 if (size & 0x3 || *pos & 0x3) 1468 if (size & 0x3 || *pos & 0x3)
1463 return -EINVAL; 1469 return -EINVAL;
1464 1470
1471 if (*pos >= adev->mc.mc_vram_size)
1472 return -ENXIO;
1473
1465 while (size) { 1474 while (size) {
1466 unsigned long flags; 1475 unsigned long flags;
1467 uint32_t value; 1476 uint32_t value;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index dfd1c98efa7c..4f50eeb65855 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
197 } 197 }
198} 198}
199 199
200void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
201{
202 uint16_t version_major = le16_to_cpu(hdr->header_version_major);
203 uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
204
205 DRM_DEBUG("GPU_INFO\n");
206 amdgpu_ucode_print_common_hdr(hdr);
207
208 if (version_major == 1) {
209 const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr =
210 container_of(hdr, struct gpu_info_firmware_header_v1_0, header);
211
212 DRM_DEBUG("version_major: %u\n",
213 le16_to_cpu(gpu_info_hdr->version_major));
214 DRM_DEBUG("version_minor: %u\n",
215 le16_to_cpu(gpu_info_hdr->version_minor));
216 } else {
217 DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor);
218 }
219}
220
200int amdgpu_ucode_validate(const struct firmware *fw) 221int amdgpu_ucode_validate(const struct firmware *fw)
201{ 222{
202 const struct common_firmware_header *hdr = 223 const struct common_firmware_header *hdr =
@@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
253 return AMDGPU_FW_LOAD_DIRECT; 274 return AMDGPU_FW_LOAD_DIRECT;
254 else 275 else
255 return AMDGPU_FW_LOAD_PSP; 276 return AMDGPU_FW_LOAD_PSP;
277 case CHIP_RAVEN:
278#if 0
279 if (!load_type)
280 return AMDGPU_FW_LOAD_DIRECT;
281 else
282 return AMDGPU_FW_LOAD_PSP;
283#else
284 return AMDGPU_FW_LOAD_DIRECT;
285#endif
256 default: 286 default:
257 DRM_ERROR("Unknow firmware load type\n"); 287 DRM_ERROR("Unknow firmware load type\n");
258 } 288 }
@@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
349 379
350 err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, 380 err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
351 amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 381 amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
352 0, NULL, NULL, bo); 382 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
383 NULL, NULL, bo);
353 if (err) { 384 if (err) {
354 dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); 385 dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
355 goto failed; 386 goto failed;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 758f03a1770d..30b5500dc152 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 {
113 uint32_t digest_size; 113 uint32_t digest_size;
114}; 114};
115 115
116/* gpu info payload */
117struct gpu_info_firmware_v1_0 {
118 uint32_t gc_num_se;
119 uint32_t gc_num_cu_per_sh;
120 uint32_t gc_num_sh_per_se;
121 uint32_t gc_num_rb_per_se;
122 uint32_t gc_num_tccs;
123 uint32_t gc_num_gprs;
124 uint32_t gc_num_max_gs_thds;
125 uint32_t gc_gs_table_depth;
126 uint32_t gc_gsprim_buff_depth;
127 uint32_t gc_parameter_cache_depth;
128 uint32_t gc_double_offchip_lds_buffer;
129 uint32_t gc_wave_size;
130 uint32_t gc_max_waves_per_simd;
131 uint32_t gc_max_scratch_slots_per_cu;
132 uint32_t gc_lds_size;
133};
134
135/* version_major=1, version_minor=0 */
136struct gpu_info_firmware_header_v1_0 {
137 struct common_firmware_header header;
138 uint16_t version_major; /* version */
139 uint16_t version_minor; /* version */
140};
141
116/* header is fixed size */ 142/* header is fixed size */
117union amdgpu_firmware_header { 143union amdgpu_firmware_header {
118 struct common_firmware_header common; 144 struct common_firmware_header common;
@@ -124,6 +150,7 @@ union amdgpu_firmware_header {
124 struct rlc_firmware_header_v2_0 rlc_v2_0; 150 struct rlc_firmware_header_v2_0 rlc_v2_0;
125 struct sdma_firmware_header_v1_0 sdma; 151 struct sdma_firmware_header_v1_0 sdma;
126 struct sdma_firmware_header_v1_1 sdma_v1_1; 152 struct sdma_firmware_header_v1_1 sdma_v1_1;
153 struct gpu_info_firmware_header_v1_0 gpu_info;
127 uint8_t raw[0x100]; 154 uint8_t raw[0x100];
128}; 155};
129 156
@@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
184void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); 211void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
185void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); 212void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
186void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); 213void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
214void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
187int amdgpu_ucode_validate(const struct firmware *fw); 215int amdgpu_ucode_validate(const struct firmware *fw);
188bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, 216bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
189 uint16_t hdr_major, uint16_t hdr_minor); 217 uint16_t hdr_major, uint16_t hdr_minor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 735c38d7db0d..b692ad402252 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
165 adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | 165 adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
166 (binary_id << 8)); 166 (binary_id << 8));
167 167
168 /* allocate firmware, stack and heap BO */ 168 r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
169 169 AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
170 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 170 &adev->vce.gpu_addr, &adev->vce.cpu_addr);
171 AMDGPU_GEM_DOMAIN_VRAM,
172 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
173 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
174 NULL, NULL, &adev->vce.vcpu_bo);
175 if (r) { 171 if (r) {
176 dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); 172 dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
177 return r; 173 return r;
178 } 174 }
179 175
180 r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
181 if (r) {
182 amdgpu_bo_unref(&adev->vce.vcpu_bo);
183 dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
184 return r;
185 }
186
187 r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM,
188 &adev->vce.gpu_addr);
189 amdgpu_bo_unreserve(adev->vce.vcpu_bo);
190 if (r) {
191 amdgpu_bo_unref(&adev->vce.vcpu_bo);
192 dev_err(adev->dev, "(%d) VCE bo pin failed\n", r);
193 return r;
194 }
195
196
197 ring = &adev->vce.ring[0]; 176 ring = &adev->vce.ring[0];
198 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; 177 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
199 r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, 178 r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
@@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
230 209
231 amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); 210 amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
232 211
233 amdgpu_bo_unref(&adev->vce.vcpu_bo); 212 amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
213 (void **)&adev->vce.cpu_addr);
234 214
235 for (i = 0; i < adev->vce.num_rings; i++) 215 for (i = 0; i < adev->vce.num_rings; i++)
236 amdgpu_ring_fini(&adev->vce.ring[i]); 216 amdgpu_ring_fini(&adev->vce.ring[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 0a7f18c461e4..5ce54cde472d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -33,6 +33,8 @@
33struct amdgpu_vce { 33struct amdgpu_vce {
34 struct amdgpu_bo *vcpu_bo; 34 struct amdgpu_bo *vcpu_bo;
35 uint64_t gpu_addr; 35 uint64_t gpu_addr;
36 void *cpu_addr;
37 void *saved_bo;
36 unsigned fw_version; 38 unsigned fw_version;
37 unsigned fb_version; 39 unsigned fb_version;
38 atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; 40 atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
new file mode 100644
index 000000000000..09190fadd228
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -0,0 +1,654 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26
27#include <linux/firmware.h>
28#include <linux/module.h>
29#include <drm/drmP.h>
30#include <drm/drm.h>
31
32#include "amdgpu.h"
33#include "amdgpu_pm.h"
34#include "amdgpu_vcn.h"
35#include "soc15d.h"
36#include "soc15_common.h"
37
38#include "vega10/soc15ip.h"
39#include "raven1/VCN/vcn_1_0_offset.h"
40
41/* 1 second timeout */
42#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
43
44/* Firmware Names */
45#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
46
47MODULE_FIRMWARE(FIRMWARE_RAVEN);
48
49static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
50
51int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
52{
53 struct amdgpu_ring *ring;
54 struct amd_sched_rq *rq;
55 unsigned long bo_size;
56 const char *fw_name;
57 const struct common_firmware_header *hdr;
58 unsigned version_major, version_minor, family_id;
59 int r;
60
61 INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
62
63 switch (adev->asic_type) {
64 case CHIP_RAVEN:
65 fw_name = FIRMWARE_RAVEN;
66 break;
67 default:
68 return -EINVAL;
69 }
70
71 r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
72 if (r) {
73 dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
74 fw_name);
75 return r;
76 }
77
78 r = amdgpu_ucode_validate(adev->vcn.fw);
79 if (r) {
80 dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
81 fw_name);
82 release_firmware(adev->vcn.fw);
83 adev->vcn.fw = NULL;
84 return r;
85 }
86
87 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
88 family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
89 version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
90 version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
91 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
92 version_major, version_minor, family_id);
93
94
95 bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
96 + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
97 + AMDGPU_VCN_SESSION_SIZE * 40;
98 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
99 AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
100 &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
101 if (r) {
102 dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
103 return r;
104 }
105
106 ring = &adev->vcn.ring_dec;
107 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
108 r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
109 rq, amdgpu_sched_jobs);
110 if (r != 0) {
111 DRM_ERROR("Failed setting up VCN dec run queue.\n");
112 return r;
113 }
114
115 ring = &adev->vcn.ring_enc[0];
116 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
117 r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
118 rq, amdgpu_sched_jobs);
119 if (r != 0) {
120 DRM_ERROR("Failed setting up VCN enc run queue.\n");
121 return r;
122 }
123
124 return 0;
125}
126
127int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
128{
129 int i;
130
131 kfree(adev->vcn.saved_bo);
132
133 amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
134
135 amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc);
136
137 amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
138 &adev->vcn.gpu_addr,
139 (void **)&adev->vcn.cpu_addr);
140
141 amdgpu_ring_fini(&adev->vcn.ring_dec);
142
143 for (i = 0; i < adev->vcn.num_enc_rings; ++i)
144 amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
145
146 release_firmware(adev->vcn.fw);
147
148 return 0;
149}
150
151int amdgpu_vcn_suspend(struct amdgpu_device *adev)
152{
153 unsigned size;
154 void *ptr;
155
156 if (adev->vcn.vcpu_bo == NULL)
157 return 0;
158
159 cancel_delayed_work_sync(&adev->vcn.idle_work);
160
161 size = amdgpu_bo_size(adev->vcn.vcpu_bo);
162 ptr = adev->vcn.cpu_addr;
163
164 adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
165 if (!adev->vcn.saved_bo)
166 return -ENOMEM;
167
168 memcpy_fromio(adev->vcn.saved_bo, ptr, size);
169
170 return 0;
171}
172
173int amdgpu_vcn_resume(struct amdgpu_device *adev)
174{
175 unsigned size;
176 void *ptr;
177
178 if (adev->vcn.vcpu_bo == NULL)
179 return -EINVAL;
180
181 size = amdgpu_bo_size(adev->vcn.vcpu_bo);
182 ptr = adev->vcn.cpu_addr;
183
184 if (adev->vcn.saved_bo != NULL) {
185 memcpy_toio(ptr, adev->vcn.saved_bo, size);
186 kfree(adev->vcn.saved_bo);
187 adev->vcn.saved_bo = NULL;
188 } else {
189 const struct common_firmware_header *hdr;
190 unsigned offset;
191
192 hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
193 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
194 memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
195 le32_to_cpu(hdr->ucode_size_bytes));
196 size -= le32_to_cpu(hdr->ucode_size_bytes);
197 ptr += le32_to_cpu(hdr->ucode_size_bytes);
198 memset_io(ptr, 0, size);
199 }
200
201 return 0;
202}
203
204static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
205{
206 struct amdgpu_device *adev =
207 container_of(work, struct amdgpu_device, vcn.idle_work.work);
208 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
209
210 if (fences == 0) {
211 if (adev->pm.dpm_enabled) {
212 amdgpu_dpm_enable_uvd(adev, false);
213 } else {
214 amdgpu_asic_set_uvd_clocks(adev, 0, 0);
215 }
216 } else {
217 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
218 }
219}
220
221void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
222{
223 struct amdgpu_device *adev = ring->adev;
224 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
225
226 if (set_clocks) {
227 if (adev->pm.dpm_enabled) {
228 amdgpu_dpm_enable_uvd(adev, true);
229 } else {
230 amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
231 }
232 }
233}
234
235void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
236{
237 schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
238}
239
240int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
241{
242 struct amdgpu_device *adev = ring->adev;
243 uint32_t tmp = 0;
244 unsigned i;
245 int r;
246
247 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
248 r = amdgpu_ring_alloc(ring, 3);
249 if (r) {
250 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
251 ring->idx, r);
252 return r;
253 }
254 amdgpu_ring_write(ring,
255 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
256 amdgpu_ring_write(ring, 0xDEADBEEF);
257 amdgpu_ring_commit(ring);
258 for (i = 0; i < adev->usec_timeout; i++) {
259 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
260 if (tmp == 0xDEADBEEF)
261 break;
262 DRM_UDELAY(1);
263 }
264
265 if (i < adev->usec_timeout) {
266 DRM_INFO("ring test on %d succeeded in %d usecs\n",
267 ring->idx, i);
268 } else {
269 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
270 ring->idx, tmp);
271 r = -EINVAL;
272 }
273 return r;
274}
275
276static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
277 bool direct, struct dma_fence **fence)
278{
279 struct ttm_validate_buffer tv;
280 struct ww_acquire_ctx ticket;
281 struct list_head head;
282 struct amdgpu_job *job;
283 struct amdgpu_ib *ib;
284 struct dma_fence *f = NULL;
285 struct amdgpu_device *adev = ring->adev;
286 uint64_t addr;
287 int i, r;
288
289 memset(&tv, 0, sizeof(tv));
290 tv.bo = &bo->tbo;
291
292 INIT_LIST_HEAD(&head);
293 list_add(&tv.head, &head);
294
295 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
296 if (r)
297 return r;
298
299 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
300 if (r)
301 goto err;
302
303 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
304 if (r)
305 goto err;
306
307 ib = &job->ibs[0];
308 addr = amdgpu_bo_gpu_offset(bo);
309 ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
310 ib->ptr[1] = addr;
311 ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
312 ib->ptr[3] = addr >> 32;
313 ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
314 ib->ptr[5] = 0;
315 for (i = 6; i < 16; i += 2) {
316 ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
317 ib->ptr[i+1] = 0;
318 }
319 ib->length_dw = 16;
320
321 if (direct) {
322 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
323 job->fence = dma_fence_get(f);
324 if (r)
325 goto err_free;
326
327 amdgpu_job_free(job);
328 } else {
329 r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
330 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
331 if (r)
332 goto err_free;
333 }
334
335 ttm_eu_fence_buffer_objects(&ticket, &head, f);
336
337 if (fence)
338 *fence = dma_fence_get(f);
339 amdgpu_bo_unref(&bo);
340 dma_fence_put(f);
341
342 return 0;
343
344err_free:
345 amdgpu_job_free(job);
346
347err:
348 ttm_eu_backoff_reservation(&ticket, &head);
349 return r;
350}
351
352static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
353 struct dma_fence **fence)
354{
355 struct amdgpu_device *adev = ring->adev;
356 struct amdgpu_bo *bo;
357 uint32_t *msg;
358 int r, i;
359
360 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
361 AMDGPU_GEM_DOMAIN_VRAM,
362 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
363 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
364 NULL, NULL, &bo);
365 if (r)
366 return r;
367
368 r = amdgpu_bo_reserve(bo, false);
369 if (r) {
370 amdgpu_bo_unref(&bo);
371 return r;
372 }
373
374 r = amdgpu_bo_kmap(bo, (void **)&msg);
375 if (r) {
376 amdgpu_bo_unreserve(bo);
377 amdgpu_bo_unref(&bo);
378 return r;
379 }
380
381 msg[0] = cpu_to_le32(0x00000028);
382 msg[1] = cpu_to_le32(0x00000038);
383 msg[2] = cpu_to_le32(0x00000001);
384 msg[3] = cpu_to_le32(0x00000000);
385 msg[4] = cpu_to_le32(handle);
386 msg[5] = cpu_to_le32(0x00000000);
387 msg[6] = cpu_to_le32(0x00000001);
388 msg[7] = cpu_to_le32(0x00000028);
389 msg[8] = cpu_to_le32(0x00000010);
390 msg[9] = cpu_to_le32(0x00000000);
391 msg[10] = cpu_to_le32(0x00000007);
392 msg[11] = cpu_to_le32(0x00000000);
393 msg[12] = cpu_to_le32(0x00000780);
394 msg[13] = cpu_to_le32(0x00000440);
395 for (i = 14; i < 1024; ++i)
396 msg[i] = cpu_to_le32(0x0);
397
398 amdgpu_bo_kunmap(bo);
399 amdgpu_bo_unreserve(bo);
400
401 return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
402}
403
404static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
405 bool direct, struct dma_fence **fence)
406{
407 struct amdgpu_device *adev = ring->adev;
408 struct amdgpu_bo *bo;
409 uint32_t *msg;
410 int r, i;
411
412 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
413 AMDGPU_GEM_DOMAIN_VRAM,
414 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
415 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
416 NULL, NULL, &bo);
417 if (r)
418 return r;
419
420 r = amdgpu_bo_reserve(bo, false);
421 if (r) {
422 amdgpu_bo_unref(&bo);
423 return r;
424 }
425
426 r = amdgpu_bo_kmap(bo, (void **)&msg);
427 if (r) {
428 amdgpu_bo_unreserve(bo);
429 amdgpu_bo_unref(&bo);
430 return r;
431 }
432
433 msg[0] = cpu_to_le32(0x00000028);
434 msg[1] = cpu_to_le32(0x00000018);
435 msg[2] = cpu_to_le32(0x00000000);
436 msg[3] = cpu_to_le32(0x00000002);
437 msg[4] = cpu_to_le32(handle);
438 msg[5] = cpu_to_le32(0x00000000);
439 for (i = 6; i < 1024; ++i)
440 msg[i] = cpu_to_le32(0x0);
441
442 amdgpu_bo_kunmap(bo);
443 amdgpu_bo_unreserve(bo);
444
445 return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
446}
447
448int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
449{
450 struct dma_fence *fence;
451 long r;
452
453 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
454 if (r) {
455 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
456 goto error;
457 }
458
459 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
460 if (r) {
461 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
462 goto error;
463 }
464
465 r = dma_fence_wait_timeout(fence, false, timeout);
466 if (r == 0) {
467 DRM_ERROR("amdgpu: IB test timed out.\n");
468 r = -ETIMEDOUT;
469 } else if (r < 0) {
470 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
471 } else {
472 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
473 r = 0;
474 }
475
476 dma_fence_put(fence);
477
478error:
479 return r;
480}
481
482int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
483{
484 struct amdgpu_device *adev = ring->adev;
485 uint32_t rptr = amdgpu_ring_get_rptr(ring);
486 unsigned i;
487 int r;
488
489 r = amdgpu_ring_alloc(ring, 16);
490 if (r) {
491 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
492 ring->idx, r);
493 return r;
494 }
495 amdgpu_ring_write(ring, VCN_ENC_CMD_END);
496 amdgpu_ring_commit(ring);
497
498 for (i = 0; i < adev->usec_timeout; i++) {
499 if (amdgpu_ring_get_rptr(ring) != rptr)
500 break;
501 DRM_UDELAY(1);
502 }
503
504 if (i < adev->usec_timeout) {
505 DRM_INFO("ring test on %d succeeded in %d usecs\n",
506 ring->idx, i);
507 } else {
508 DRM_ERROR("amdgpu: ring %d test failed\n",
509 ring->idx);
510 r = -ETIMEDOUT;
511 }
512
513 return r;
514}
515
516static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
517 struct dma_fence **fence)
518{
519 const unsigned ib_size_dw = 16;
520 struct amdgpu_job *job;
521 struct amdgpu_ib *ib;
522 struct dma_fence *f = NULL;
523 uint64_t dummy;
524 int i, r;
525
526 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
527 if (r)
528 return r;
529
530 ib = &job->ibs[0];
531 dummy = ib->gpu_addr + 1024;
532
533 ib->length_dw = 0;
534 ib->ptr[ib->length_dw++] = 0x00000018;
535 ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
536 ib->ptr[ib->length_dw++] = handle;
537 ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
538 ib->ptr[ib->length_dw++] = dummy;
539 ib->ptr[ib->length_dw++] = 0x0000000b;
540
541 ib->ptr[ib->length_dw++] = 0x00000014;
542 ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
543 ib->ptr[ib->length_dw++] = 0x0000001c;
544 ib->ptr[ib->length_dw++] = 0x00000000;
545 ib->ptr[ib->length_dw++] = 0x00000000;
546
547 ib->ptr[ib->length_dw++] = 0x00000008;
548 ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
549
550 for (i = ib->length_dw; i < ib_size_dw; ++i)
551 ib->ptr[i] = 0x0;
552
553 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
554 job->fence = dma_fence_get(f);
555 if (r)
556 goto err;
557
558 amdgpu_job_free(job);
559 if (fence)
560 *fence = dma_fence_get(f);
561 dma_fence_put(f);
562
563 return 0;
564
565err:
566 amdgpu_job_free(job);
567 return r;
568}
569
570static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
571 struct dma_fence **fence)
572{
573 const unsigned ib_size_dw = 16;
574 struct amdgpu_job *job;
575 struct amdgpu_ib *ib;
576 struct dma_fence *f = NULL;
577 uint64_t dummy;
578 int i, r;
579
580 r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
581 if (r)
582 return r;
583
584 ib = &job->ibs[0];
585 dummy = ib->gpu_addr + 1024;
586
587 ib->length_dw = 0;
588 ib->ptr[ib->length_dw++] = 0x00000018;
589 ib->ptr[ib->length_dw++] = 0x00000001;
590 ib->ptr[ib->length_dw++] = handle;
591 ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
592 ib->ptr[ib->length_dw++] = dummy;
593 ib->ptr[ib->length_dw++] = 0x0000000b;
594
595 ib->ptr[ib->length_dw++] = 0x00000014;
596 ib->ptr[ib->length_dw++] = 0x00000002;
597 ib->ptr[ib->length_dw++] = 0x0000001c;
598 ib->ptr[ib->length_dw++] = 0x00000000;
599 ib->ptr[ib->length_dw++] = 0x00000000;
600
601 ib->ptr[ib->length_dw++] = 0x00000008;
602 ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
603
604 for (i = ib->length_dw; i < ib_size_dw; ++i)
605 ib->ptr[i] = 0x0;
606
607 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
608 job->fence = dma_fence_get(f);
609 if (r)
610 goto err;
611
612 amdgpu_job_free(job);
613 if (fence)
614 *fence = dma_fence_get(f);
615 dma_fence_put(f);
616
617 return 0;
618
619err:
620 amdgpu_job_free(job);
621 return r;
622}
623
624int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
625{
626 struct dma_fence *fence = NULL;
627 long r;
628
629 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
630 if (r) {
631 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
632 goto error;
633 }
634
635 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
636 if (r) {
637 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
638 goto error;
639 }
640
641 r = dma_fence_wait_timeout(fence, false, timeout);
642 if (r == 0) {
643 DRM_ERROR("amdgpu: IB test timed out.\n");
644 r = -ETIMEDOUT;
645 } else if (r < 0) {
646 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
647 } else {
648 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
649 r = 0;
650 }
651error:
652 dma_fence_put(fence);
653 return r;
654}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
new file mode 100644
index 000000000000..d50ba0657854
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -0,0 +1,77 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __AMDGPU_VCN_H__
25#define __AMDGPU_VCN_H__
26
27#define AMDGPU_VCN_STACK_SIZE (200*1024)
28#define AMDGPU_VCN_HEAP_SIZE (256*1024)
29#define AMDGPU_VCN_SESSION_SIZE (50*1024)
30#define AMDGPU_VCN_FIRMWARE_OFFSET 256
31#define AMDGPU_VCN_MAX_ENC_RINGS 3
32
33#define VCN_DEC_CMD_FENCE 0x00000000
34#define VCN_DEC_CMD_TRAP 0x00000001
35#define VCN_DEC_CMD_WRITE_REG 0x00000004
36#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006
37#define VCN_DEC_CMD_PACKET_START 0x0000000a
38#define VCN_DEC_CMD_PACKET_END 0x0000000b
39
40#define VCN_ENC_CMD_NO_OP 0x00000000
41#define VCN_ENC_CMD_END 0x00000001
42#define VCN_ENC_CMD_IB 0x00000002
43#define VCN_ENC_CMD_FENCE 0x00000003
44#define VCN_ENC_CMD_TRAP 0x00000004
45#define VCN_ENC_CMD_REG_WRITE 0x0000000b
46#define VCN_ENC_CMD_REG_WAIT 0x0000000c
47
48struct amdgpu_vcn {
49 struct amdgpu_bo *vcpu_bo;
50 void *cpu_addr;
51 uint64_t gpu_addr;
52 unsigned fw_version;
53 void *saved_bo;
54 struct delayed_work idle_work;
55 const struct firmware *fw; /* VCN firmware */
56 struct amdgpu_ring ring_dec;
57 struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
58 struct amdgpu_irq_src irq;
59 struct amd_sched_entity entity_dec;
60 struct amd_sched_entity entity_enc;
61 unsigned num_enc_rings;
62};
63
64int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
65int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
66int amdgpu_vcn_suspend(struct amdgpu_device *adev);
67int amdgpu_vcn_resume(struct amdgpu_device *adev);
68void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
69void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
70
71int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
72int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
73
74int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
75int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
76
77#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 6bf5cea294f2..8a081e162d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include "amdgpu.h" 24#include "amdgpu.h"
25#define MAX_KIQ_REG_WAIT 100000
25 26
26int amdgpu_allocate_static_csa(struct amdgpu_device *adev) 27int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
27{ 28{
@@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
105 /* enable virtual display */ 106 /* enable virtual display */
106 adev->mode_info.num_crtc = 1; 107 adev->mode_info.num_crtc = 1;
107 adev->enable_virtual_display = true; 108 adev->enable_virtual_display = true;
109 adev->cg_flags = 0;
110 adev->pg_flags = 0;
108 111
109 mutex_init(&adev->virt.lock_kiq);
110 mutex_init(&adev->virt.lock_reset); 112 mutex_init(&adev->virt.lock_reset);
111} 113}
112 114
@@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
120 122
121 BUG_ON(!ring->funcs->emit_rreg); 123 BUG_ON(!ring->funcs->emit_rreg);
122 124
123 mutex_lock(&adev->virt.lock_kiq); 125 mutex_lock(&kiq->ring_mutex);
124 amdgpu_ring_alloc(ring, 32); 126 amdgpu_ring_alloc(ring, 32);
125 amdgpu_ring_emit_rreg(ring, reg); 127 amdgpu_ring_emit_rreg(ring, reg);
126 amdgpu_fence_emit(ring, &f); 128 amdgpu_fence_emit(ring, &f);
127 amdgpu_ring_commit(ring); 129 amdgpu_ring_commit(ring);
128 mutex_unlock(&adev->virt.lock_kiq); 130 mutex_unlock(&kiq->ring_mutex);
129 131
130 r = dma_fence_wait(f, false); 132 r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
131 if (r)
132 DRM_ERROR("wait for kiq fence error: %ld.\n", r);
133 dma_fence_put(f); 133 dma_fence_put(f);
134 if (r < 1) {
135 DRM_ERROR("wait for kiq fence error: %ld.\n", r);
136 return ~0;
137 }
134 138
135 val = adev->wb.wb[adev->virt.reg_val_offs]; 139 val = adev->wb.wb[adev->virt.reg_val_offs];
136 140
@@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
146 150
147 BUG_ON(!ring->funcs->emit_wreg); 151 BUG_ON(!ring->funcs->emit_wreg);
148 152
149 mutex_lock(&adev->virt.lock_kiq); 153 mutex_lock(&kiq->ring_mutex);
150 amdgpu_ring_alloc(ring, 32); 154 amdgpu_ring_alloc(ring, 32);
151 amdgpu_ring_emit_wreg(ring, reg, v); 155 amdgpu_ring_emit_wreg(ring, reg, v);
152 amdgpu_fence_emit(ring, &f); 156 amdgpu_fence_emit(ring, &f);
153 amdgpu_ring_commit(ring); 157 amdgpu_ring_commit(ring);
154 mutex_unlock(&adev->virt.lock_kiq); 158 mutex_unlock(&kiq->ring_mutex);
155 159
156 r = dma_fence_wait(f, false); 160 r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
157 if (r) 161 if (r < 1)
158 DRM_ERROR("wait for kiq fence error: %ld.\n", r); 162 DRM_ERROR("wait for kiq fence error: %ld.\n", r);
159 dma_fence_put(f); 163 dma_fence_put(f);
160} 164}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index a8ed162cc0bc..9e1062edb76e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -52,7 +52,6 @@ struct amdgpu_virt {
52 uint64_t csa_vmid0_addr; 52 uint64_t csa_vmid0_addr;
53 bool chained_ib_support; 53 bool chained_ib_support;
54 uint32_t reg_val_offs; 54 uint32_t reg_val_offs;
55 struct mutex lock_kiq;
56 struct mutex lock_reset; 55 struct mutex lock_reset;
57 struct amdgpu_irq_src ack_irq; 56 struct amdgpu_irq_src ack_irq;
58 struct amdgpu_irq_src rcv_irq; 57 struct amdgpu_irq_src rcv_irq;
@@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
97int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); 96int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
98int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); 97int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
99int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); 98int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
100int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); 99int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
101int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); 100int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
102void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); 101void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
103 102
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 07ff3b1514f1..5795f81369f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -79,6 +79,12 @@ struct amdgpu_pte_update_params {
79 uint64_t flags); 79 uint64_t flags);
80 /* indicate update pt or its shadow */ 80 /* indicate update pt or its shadow */
81 bool shadow; 81 bool shadow;
82 /* The next two are used during VM update by CPU
83 * DMA addresses to use for mapping
84 * Kernel pointer of PD/PT BO that needs to be updated
85 */
86 dma_addr_t *pages_addr;
87 void *kptr;
82}; 88};
83 89
84/* Helper to disable partial resident texture feature from a fence callback */ 90/* Helper to disable partial resident texture feature from a fence callback */
@@ -275,12 +281,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
275 adev->vm_manager.block_size; 281 adev->vm_manager.block_size;
276 unsigned pt_idx, from, to; 282 unsigned pt_idx, from, to;
277 int r; 283 int r;
284 u64 flags;
278 285
279 if (!parent->entries) { 286 if (!parent->entries) {
280 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 287 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
281 288
282 parent->entries = drm_calloc_large(num_entries, 289 parent->entries = kvmalloc_array(num_entries,
283 sizeof(struct amdgpu_vm_pt)); 290 sizeof(struct amdgpu_vm_pt),
291 GFP_KERNEL | __GFP_ZERO);
284 if (!parent->entries) 292 if (!parent->entries)
285 return -ENOMEM; 293 return -ENOMEM;
286 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); 294 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
@@ -299,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
299 saddr = saddr & ((1 << shift) - 1); 307 saddr = saddr & ((1 << shift) - 1);
300 eaddr = eaddr & ((1 << shift) - 1); 308 eaddr = eaddr & ((1 << shift) - 1);
301 309
310 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
311 AMDGPU_GEM_CREATE_VRAM_CLEARED;
312 if (vm->use_cpu_for_update)
313 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
314 else
315 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
316 AMDGPU_GEM_CREATE_SHADOW);
317
302 /* walk over the address space and allocate the page tables */ 318 /* walk over the address space and allocate the page tables */
303 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 319 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
304 struct reservation_object *resv = vm->root.bo->tbo.resv; 320 struct reservation_object *resv = vm->root.bo->tbo.resv;
@@ -310,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
310 amdgpu_vm_bo_size(adev, level), 326 amdgpu_vm_bo_size(adev, level),
311 AMDGPU_GPU_PAGE_SIZE, true, 327 AMDGPU_GPU_PAGE_SIZE, true,
312 AMDGPU_GEM_DOMAIN_VRAM, 328 AMDGPU_GEM_DOMAIN_VRAM,
313 AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 329 flags,
314 AMDGPU_GEM_CREATE_SHADOW |
315 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
316 AMDGPU_GEM_CREATE_VRAM_CLEARED,
317 NULL, resv, &pt); 330 NULL, resv, &pt);
318 if (r) 331 if (r)
319 return r; 332 return r;
@@ -391,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
391 atomic_read(&adev->gpu_reset_counter); 404 atomic_read(&adev->gpu_reset_counter);
392} 405}
393 406
407static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
408{
409 return !!vm->reserved_vmid[vmhub];
410}
411
412/* idr_mgr->lock must be held */
413static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
414 struct amdgpu_ring *ring,
415 struct amdgpu_sync *sync,
416 struct dma_fence *fence,
417 struct amdgpu_job *job)
418{
419 struct amdgpu_device *adev = ring->adev;
420 unsigned vmhub = ring->funcs->vmhub;
421 uint64_t fence_context = adev->fence_context + ring->idx;
422 struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
423 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
424 struct dma_fence *updates = sync->last_vm_update;
425 int r = 0;
426 struct dma_fence *flushed, *tmp;
427 bool needs_flush = false;
428
429 flushed = id->flushed_updates;
430 if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
431 (atomic64_read(&id->owner) != vm->client_id) ||
432 (job->vm_pd_addr != id->pd_gpu_addr) ||
433 (updates && (!flushed || updates->context != flushed->context ||
434 dma_fence_is_later(updates, flushed))) ||
435 (!id->last_flush || (id->last_flush->context != fence_context &&
436 !dma_fence_is_signaled(id->last_flush)))) {
437 needs_flush = true;
438 /* to prevent one context starved by another context */
439 id->pd_gpu_addr = 0;
440 tmp = amdgpu_sync_peek_fence(&id->active, ring);
441 if (tmp) {
442 r = amdgpu_sync_fence(adev, sync, tmp);
443 return r;
444 }
445 }
446
447 /* Good we can use this VMID. Remember this submission as
448 * user of the VMID.
449 */
450 r = amdgpu_sync_fence(ring->adev, &id->active, fence);
451 if (r)
452 goto out;
453
454 if (updates && (!flushed || updates->context != flushed->context ||
455 dma_fence_is_later(updates, flushed))) {
456 dma_fence_put(id->flushed_updates);
457 id->flushed_updates = dma_fence_get(updates);
458 }
459 id->pd_gpu_addr = job->vm_pd_addr;
460 atomic64_set(&id->owner, vm->client_id);
461 job->vm_needs_flush = needs_flush;
462 if (needs_flush) {
463 dma_fence_put(id->last_flush);
464 id->last_flush = NULL;
465 }
466 job->vm_id = id - id_mgr->ids;
467 trace_amdgpu_vm_grab_id(vm, ring, job);
468out:
469 return r;
470}
471
394/** 472/**
395 * amdgpu_vm_grab_id - allocate the next free VMID 473 * amdgpu_vm_grab_id - allocate the next free VMID
396 * 474 *
@@ -415,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
415 unsigned i; 493 unsigned i;
416 int r = 0; 494 int r = 0;
417 495
496 mutex_lock(&id_mgr->lock);
497 if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
498 r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
499 mutex_unlock(&id_mgr->lock);
500 return r;
501 }
418 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 502 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
419 if (!fences) 503 if (!fences) {
504 mutex_unlock(&id_mgr->lock);
420 return -ENOMEM; 505 return -ENOMEM;
421 506 }
422 mutex_lock(&id_mgr->lock);
423
424 /* Check if we have an idle VMID */ 507 /* Check if we have an idle VMID */
425 i = 0; 508 i = 0;
426 list_for_each_entry(idle, &id_mgr->ids_lru, list) { 509 list_for_each_entry(idle, &id_mgr->ids_lru, list) {
@@ -521,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
521 id->pd_gpu_addr = job->vm_pd_addr; 604 id->pd_gpu_addr = job->vm_pd_addr;
522 dma_fence_put(id->flushed_updates); 605 dma_fence_put(id->flushed_updates);
523 id->flushed_updates = dma_fence_get(updates); 606 id->flushed_updates = dma_fence_get(updates);
524 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
525 atomic64_set(&id->owner, vm->client_id); 607 atomic64_set(&id->owner, vm->client_id);
526 608
527needs_flush: 609needs_flush:
@@ -540,40 +622,118 @@ error:
540 return r; 622 return r;
541} 623}
542 624
543static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) 625static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
626 struct amdgpu_vm *vm,
627 unsigned vmhub)
628{
629 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
630
631 mutex_lock(&id_mgr->lock);
632 if (vm->reserved_vmid[vmhub]) {
633 list_add(&vm->reserved_vmid[vmhub]->list,
634 &id_mgr->ids_lru);
635 vm->reserved_vmid[vmhub] = NULL;
636 atomic_dec(&id_mgr->reserved_vmid_num);
637 }
638 mutex_unlock(&id_mgr->lock);
639}
640
641static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
642 struct amdgpu_vm *vm,
643 unsigned vmhub)
644{
645 struct amdgpu_vm_id_manager *id_mgr;
646 struct amdgpu_vm_id *idle;
647 int r = 0;
648
649 id_mgr = &adev->vm_manager.id_mgr[vmhub];
650 mutex_lock(&id_mgr->lock);
651 if (vm->reserved_vmid[vmhub])
652 goto unlock;
653 if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
654 AMDGPU_VM_MAX_RESERVED_VMID) {
655 DRM_ERROR("Over limitation of reserved vmid\n");
656 atomic_dec(&id_mgr->reserved_vmid_num);
657 r = -EINVAL;
658 goto unlock;
659 }
660 /* Select the first entry VMID */
661 idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
662 list_del_init(&idle->list);
663 vm->reserved_vmid[vmhub] = idle;
664 mutex_unlock(&id_mgr->lock);
665
666 return 0;
667unlock:
668 mutex_unlock(&id_mgr->lock);
669 return r;
670}
671
672/**
673 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
674 *
675 * @adev: amdgpu_device pointer
676 */
677void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
544{ 678{
545 struct amdgpu_device *adev = ring->adev;
546 const struct amdgpu_ip_block *ip_block; 679 const struct amdgpu_ip_block *ip_block;
680 bool has_compute_vm_bug;
681 struct amdgpu_ring *ring;
682 int i;
547 683
548 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 684 has_compute_vm_bug = false;
549 /* only compute rings */
550 return false;
551 685
552 ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 686 ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
553 if (!ip_block) 687 if (ip_block) {
554 return false; 688 /* Compute has a VM bug for GFX version < 7.
689 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
690 if (ip_block->version->major <= 7)
691 has_compute_vm_bug = true;
692 else if (ip_block->version->major == 8)
693 if (adev->gfx.mec_fw_version < 673)
694 has_compute_vm_bug = true;
695 }
555 696
556 if (ip_block->version->major <= 7) { 697 for (i = 0; i < adev->num_rings; i++) {
557 /* gfx7 has no workaround */ 698 ring = adev->rings[i];
558 return true; 699 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
559 } else if (ip_block->version->major == 8) { 700 /* only compute rings */
560 if (adev->gfx.mec_fw_version >= 673) 701 ring->has_compute_vm_bug = has_compute_vm_bug;
561 /* gfx8 is fixed in MEC firmware 673 */
562 return false;
563 else 702 else
564 return true; 703 ring->has_compute_vm_bug = false;
565 } 704 }
566 return false;
567} 705}
568 706
569static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) 707bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
708 struct amdgpu_job *job)
570{ 709{
571 u64 addr = mc_addr; 710 struct amdgpu_device *adev = ring->adev;
711 unsigned vmhub = ring->funcs->vmhub;
712 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
713 struct amdgpu_vm_id *id;
714 bool gds_switch_needed;
715 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
716
717 if (job->vm_id == 0)
718 return false;
719 id = &id_mgr->ids[job->vm_id];
720 gds_switch_needed = ring->funcs->emit_gds_switch && (
721 id->gds_base != job->gds_base ||
722 id->gds_size != job->gds_size ||
723 id->gws_base != job->gws_base ||
724 id->gws_size != job->gws_size ||
725 id->oa_base != job->oa_base ||
726 id->oa_size != job->oa_size);
572 727
573 if (adev->gart.gart_funcs->adjust_mc_addr) 728 if (amdgpu_vm_had_gpu_reset(adev, id))
574 addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); 729 return true;
575 730
576 return addr; 731 return vm_flush_needed || gds_switch_needed;
732}
733
734static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
735{
736 return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
577} 737}
578 738
579/** 739/**
@@ -598,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
598 id->gws_size != job->gws_size || 758 id->gws_size != job->gws_size ||
599 id->oa_base != job->oa_base || 759 id->oa_base != job->oa_base ||
600 id->oa_size != job->oa_size); 760 id->oa_size != job->oa_size);
601 bool vm_flush_needed = job->vm_needs_flush || 761 bool vm_flush_needed = job->vm_needs_flush;
602 amdgpu_vm_ring_has_compute_vm_bug(ring);
603 unsigned patch_offset = 0; 762 unsigned patch_offset = 0;
604 int r; 763 int r;
605 764
@@ -614,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
614 if (ring->funcs->init_cond_exec) 773 if (ring->funcs->init_cond_exec)
615 patch_offset = amdgpu_ring_init_cond_exec(ring); 774 patch_offset = amdgpu_ring_init_cond_exec(ring);
616 775
617 if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
618 amdgpu_ring_emit_pipeline_sync(ring);
619
620 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 776 if (ring->funcs->emit_vm_flush && vm_flush_needed) {
621 u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
622 struct dma_fence *fence; 777 struct dma_fence *fence;
623 778
624 trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); 779 trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
625 amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); 780 amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
626 781
627 r = amdgpu_fence_emit(ring, &fence); 782 r = amdgpu_fence_emit(ring, &fence);
628 if (r) 783 if (r)
@@ -631,10 +786,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
631 mutex_lock(&id_mgr->lock); 786 mutex_lock(&id_mgr->lock);
632 dma_fence_put(id->last_flush); 787 dma_fence_put(id->last_flush);
633 id->last_flush = fence; 788 id->last_flush = fence;
789 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
634 mutex_unlock(&id_mgr->lock); 790 mutex_unlock(&id_mgr->lock);
635 } 791 }
636 792
637 if (gds_switch_needed) { 793 if (ring->funcs->emit_gds_switch && gds_switch_needed) {
638 id->gds_base = job->gds_base; 794 id->gds_base = job->gds_base;
639 id->gds_size = job->gds_size; 795 id->gds_size = job->gds_size;
640 id->gws_base = job->gws_base; 796 id->gws_base = job->gws_base;
@@ -672,6 +828,7 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
672 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 828 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
673 struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; 829 struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
674 830
831 atomic64_set(&id->owner, 0);
675 id->gds_base = 0; 832 id->gds_base = 0;
676 id->gds_size = 0; 833 id->gds_size = 0;
677 id->gws_base = 0; 834 id->gws_base = 0;
@@ -681,6 +838,26 @@ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
681} 838}
682 839
683/** 840/**
841 * amdgpu_vm_reset_all_id - reset VMID to zero
842 *
843 * @adev: amdgpu device structure
844 *
845 * Reset VMID to force flush on next use
846 */
847void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
848{
849 unsigned i, j;
850
851 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
852 struct amdgpu_vm_id_manager *id_mgr =
853 &adev->vm_manager.id_mgr[i];
854
855 for (j = 1; j < id_mgr->num_ids; ++j)
856 amdgpu_vm_reset_id(adev, i, j);
857 }
858}
859
860/**
684 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 861 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
685 * 862 *
686 * @vm: requested vm 863 * @vm: requested vm
@@ -784,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
784 return result; 961 return result;
785} 962}
786 963
964/**
965 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
966 *
967 * @params: see amdgpu_pte_update_params definition
968 * @pe: kmap addr of the page entry
969 * @addr: dst addr to write into pe
970 * @count: number of page entries to update
971 * @incr: increase next addr by incr bytes
972 * @flags: hw access flags
973 *
974 * Write count number of PT/PD entries directly.
975 */
976static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
977 uint64_t pe, uint64_t addr,
978 unsigned count, uint32_t incr,
979 uint64_t flags)
980{
981 unsigned int i;
982 uint64_t value;
983
984 for (i = 0; i < count; i++) {
985 value = params->pages_addr ?
986 amdgpu_vm_map_gart(params->pages_addr, addr) :
987 addr;
988 amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
989 i, value, flags);
990 addr += incr;
991 }
992
993 /* Flush HDP */
994 mb();
995 amdgpu_gart_flush_gpu_tlb(params->adev, 0);
996}
997
998static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo)
999{
1000 struct amdgpu_sync sync;
1001 int r;
1002
1003 amdgpu_sync_create(&sync);
1004 amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM);
1005 r = amdgpu_sync_wait(&sync, true);
1006 amdgpu_sync_free(&sync);
1007
1008 return r;
1009}
1010
787/* 1011/*
788 * amdgpu_vm_update_level - update a single level in the hierarchy 1012 * amdgpu_vm_update_level - update a single level in the hierarchy
789 * 1013 *
@@ -800,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
800 unsigned level) 1024 unsigned level)
801{ 1025{
802 struct amdgpu_bo *shadow; 1026 struct amdgpu_bo *shadow;
803 struct amdgpu_ring *ring; 1027 struct amdgpu_ring *ring = NULL;
804 uint64_t pd_addr, shadow_addr; 1028 uint64_t pd_addr, shadow_addr = 0;
805 uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); 1029 uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
806 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; 1030 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
807 unsigned count = 0, pt_idx, ndw; 1031 unsigned count = 0, pt_idx, ndw = 0;
808 struct amdgpu_job *job; 1032 struct amdgpu_job *job;
809 struct amdgpu_pte_update_params params; 1033 struct amdgpu_pte_update_params params;
810 struct dma_fence *fence = NULL; 1034 struct dma_fence *fence = NULL;
@@ -813,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
813 1037
814 if (!parent->entries) 1038 if (!parent->entries)
815 return 0; 1039 return 0;
816 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
817 1040
818 /* padding, etc. */ 1041 memset(&params, 0, sizeof(params));
819 ndw = 64; 1042 params.adev = adev;
1043 shadow = parent->bo->shadow;
1044
1045 WARN_ON(vm->use_cpu_for_update && shadow);
1046 if (vm->use_cpu_for_update && !shadow) {
1047 r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
1048 if (r)
1049 return r;
1050 r = amdgpu_vm_bo_wait(adev, parent->bo);
1051 if (unlikely(r)) {
1052 amdgpu_bo_kunmap(parent->bo);
1053 return r;
1054 }
1055 params.func = amdgpu_vm_cpu_set_ptes;
1056 } else {
1057 if (shadow) {
1058 r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
1059 if (r)
1060 return r;
1061 }
1062 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1063 sched);
820 1064
821 /* assume the worst case */ 1065 /* padding, etc. */
822 ndw += parent->last_entry_used * 6; 1066 ndw = 64;
823 1067
824 pd_addr = amdgpu_bo_gpu_offset(parent->bo); 1068 /* assume the worst case */
1069 ndw += parent->last_entry_used * 6;
825 1070
826 shadow = parent->bo->shadow; 1071 pd_addr = amdgpu_bo_gpu_offset(parent->bo);
827 if (shadow) { 1072
828 r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); 1073 if (shadow) {
1074 shadow_addr = amdgpu_bo_gpu_offset(shadow);
1075 ndw *= 2;
1076 } else {
1077 shadow_addr = 0;
1078 }
1079
1080 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
829 if (r) 1081 if (r)
830 return r; 1082 return r;
831 shadow_addr = amdgpu_bo_gpu_offset(shadow);
832 ndw *= 2;
833 } else {
834 shadow_addr = 0;
835 }
836 1083
837 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1084 params.ib = &job->ibs[0];
838 if (r) 1085 params.func = amdgpu_vm_do_set_ptes;
839 return r; 1086 }
840 1087
841 memset(&params, 0, sizeof(params));
842 params.adev = adev;
843 params.ib = &job->ibs[0];
844 1088
845 /* walk over the address space and update the directory */ 1089 /* walk over the address space and update the directory */
846 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1090 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
@@ -860,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
860 } 1104 }
861 1105
862 pt = amdgpu_bo_gpu_offset(bo); 1106 pt = amdgpu_bo_gpu_offset(bo);
1107 pt = amdgpu_gart_get_vm_pde(adev, pt);
863 if (parent->entries[pt_idx].addr == pt) 1108 if (parent->entries[pt_idx].addr == pt)
864 continue; 1109 continue;
865 1110
@@ -871,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
871 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { 1116 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
872 1117
873 if (count) { 1118 if (count) {
874 uint64_t pt_addr =
875 amdgpu_vm_adjust_mc_addr(adev, last_pt);
876
877 if (shadow) 1119 if (shadow)
878 amdgpu_vm_do_set_ptes(&params, 1120 params.func(&params,
879 last_shadow, 1121 last_shadow,
880 pt_addr, count, 1122 last_pt, count,
881 incr, 1123 incr,
882 AMDGPU_PTE_VALID); 1124 AMDGPU_PTE_VALID);
883 1125
884 amdgpu_vm_do_set_ptes(&params, last_pde, 1126 params.func(&params, last_pde,
885 pt_addr, count, incr, 1127 last_pt, count, incr,
886 AMDGPU_PTE_VALID); 1128 AMDGPU_PTE_VALID);
887 } 1129 }
888 1130
889 count = 1; 1131 count = 1;
@@ -896,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
896 } 1138 }
897 1139
898 if (count) { 1140 if (count) {
899 uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt);
900
901 if (vm->root.bo->shadow) 1141 if (vm->root.bo->shadow)
902 amdgpu_vm_do_set_ptes(&params, last_shadow, pt_addr, 1142 params.func(&params, last_shadow, last_pt,
903 count, incr, AMDGPU_PTE_VALID); 1143 count, incr, AMDGPU_PTE_VALID);
904 1144
905 amdgpu_vm_do_set_ptes(&params, last_pde, pt_addr, 1145 params.func(&params, last_pde, last_pt,
906 count, incr, AMDGPU_PTE_VALID); 1146 count, incr, AMDGPU_PTE_VALID);
907 } 1147 }
908 1148
909 if (params.ib->length_dw == 0) { 1149 if (params.func == amdgpu_vm_cpu_set_ptes)
1150 amdgpu_bo_kunmap(parent->bo);
1151 else if (params.ib->length_dw == 0) {
910 amdgpu_job_free(job); 1152 amdgpu_job_free(job);
911 } else { 1153 } else {
912 amdgpu_ring_pad_ib(ring, params.ib); 1154 amdgpu_ring_pad_ib(ring, params.ib);
@@ -950,6 +1192,32 @@ error_free:
950} 1192}
951 1193
952/* 1194/*
1195 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
1196 *
1197 * @parent: parent PD
1198 *
1199 * Mark all PD level as invalid after an error.
1200 */
1201static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
1202{
1203 unsigned pt_idx;
1204
1205 /*
1206 * Recurse into the subdirectories. This recursion is harmless because
1207 * we only have a maximum of 5 layers.
1208 */
1209 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1210 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1211
1212 if (!entry->bo)
1213 continue;
1214
1215 entry->addr = ~0ULL;
1216 amdgpu_vm_invalidate_level(entry);
1217 }
1218}
1219
1220/*
953 * amdgpu_vm_update_directories - make sure that all directories are valid 1221 * amdgpu_vm_update_directories - make sure that all directories are valid
954 * 1222 *
955 * @adev: amdgpu_device pointer 1223 * @adev: amdgpu_device pointer
@@ -961,7 +1229,13 @@ error_free:
961int amdgpu_vm_update_directories(struct amdgpu_device *adev, 1229int amdgpu_vm_update_directories(struct amdgpu_device *adev,
962 struct amdgpu_vm *vm) 1230 struct amdgpu_vm *vm)
963{ 1231{
964 return amdgpu_vm_update_level(adev, vm, &vm->root, 0); 1232 int r;
1233
1234 r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
1235 if (r)
1236 amdgpu_vm_invalidate_level(&vm->root);
1237
1238 return r;
965} 1239}
966 1240
967/** 1241/**
@@ -1001,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
1001 * @flags: mapping flags 1275 * @flags: mapping flags
1002 * 1276 *
1003 * Update the page tables in the range @start - @end. 1277 * Update the page tables in the range @start - @end.
1278 * Returns 0 for success, -EINVAL for failure.
1004 */ 1279 */
1005static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, 1280static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1006 uint64_t start, uint64_t end, 1281 uint64_t start, uint64_t end,
1007 uint64_t dst, uint64_t flags) 1282 uint64_t dst, uint64_t flags)
1008{ 1283{
1009 struct amdgpu_device *adev = params->adev; 1284 struct amdgpu_device *adev = params->adev;
1010 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; 1285 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1011 1286
1012 uint64_t cur_pe_start, cur_nptes, cur_dst; 1287 uint64_t addr, pe_start;
1013 uint64_t addr; /* next GPU address to be updated */
1014 struct amdgpu_bo *pt; 1288 struct amdgpu_bo *pt;
1015 unsigned nptes; /* next number of ptes to be updated */ 1289 unsigned nptes;
1016 uint64_t next_pe_start; 1290 int r;
1017 1291 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
1018 /* initialize the variables */
1019 addr = start;
1020 pt = amdgpu_vm_get_pt(params, addr);
1021 if (!pt) {
1022 pr_err("PT not found, aborting update_ptes\n");
1023 return;
1024 }
1025
1026 if (params->shadow) {
1027 if (!pt->shadow)
1028 return;
1029 pt = pt->shadow;
1030 }
1031 if ((addr & ~mask) == (end & ~mask))
1032 nptes = end - addr;
1033 else
1034 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1035
1036 cur_pe_start = amdgpu_bo_gpu_offset(pt);
1037 cur_pe_start += (addr & mask) * 8;
1038 cur_nptes = nptes;
1039 cur_dst = dst;
1040 1292
1041 /* for next ptb*/
1042 addr += nptes;
1043 dst += nptes * AMDGPU_GPU_PAGE_SIZE;
1044 1293
1045 /* walk over the address space and update the page tables */ 1294 /* walk over the address space and update the page tables */
1046 while (addr < end) { 1295 for (addr = start; addr < end; addr += nptes) {
1047 pt = amdgpu_vm_get_pt(params, addr); 1296 pt = amdgpu_vm_get_pt(params, addr);
1048 if (!pt) { 1297 if (!pt) {
1049 pr_err("PT not found, aborting update_ptes\n"); 1298 pr_err("PT not found, aborting update_ptes\n");
1050 return; 1299 return -EINVAL;
1051 } 1300 }
1052 1301
1053 if (params->shadow) { 1302 if (params->shadow) {
1303 if (WARN_ONCE(use_cpu_update,
1304 "CPU VM update doesn't suuport shadow pages"))
1305 return 0;
1306
1054 if (!pt->shadow) 1307 if (!pt->shadow)
1055 return; 1308 return 0;
1056 pt = pt->shadow; 1309 pt = pt->shadow;
1057 } 1310 }
1058 1311
@@ -1061,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1061 else 1314 else
1062 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1315 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1063 1316
1064 next_pe_start = amdgpu_bo_gpu_offset(pt); 1317 if (use_cpu_update) {
1065 next_pe_start += (addr & mask) * 8; 1318 r = amdgpu_bo_kmap(pt, (void *)&pe_start);
1319 if (r)
1320 return r;
1321 } else
1322 pe_start = amdgpu_bo_gpu_offset(pt);
1066 1323
1067 if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && 1324 pe_start += (addr & mask) * 8;
1068 ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
1069 /* The next ptb is consecutive to current ptb.
1070 * Don't call the update function now.
1071 * Will update two ptbs together in future.
1072 */
1073 cur_nptes += nptes;
1074 } else {
1075 params->func(params, cur_pe_start, cur_dst, cur_nptes,
1076 AMDGPU_GPU_PAGE_SIZE, flags);
1077 1325
1078 cur_pe_start = next_pe_start; 1326 params->func(params, pe_start, dst, nptes,
1079 cur_nptes = nptes; 1327 AMDGPU_GPU_PAGE_SIZE, flags);
1080 cur_dst = dst;
1081 }
1082 1328
1083 /* for next ptb*/
1084 addr += nptes;
1085 dst += nptes * AMDGPU_GPU_PAGE_SIZE; 1329 dst += nptes * AMDGPU_GPU_PAGE_SIZE;
1330
1331 if (use_cpu_update)
1332 amdgpu_bo_kunmap(pt);
1086 } 1333 }
1087 1334
1088 params->func(params, cur_pe_start, cur_dst, cur_nptes, 1335 return 0;
1089 AMDGPU_GPU_PAGE_SIZE, flags);
1090} 1336}
1091 1337
1092/* 1338/*
@@ -1098,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1098 * @end: last PTE to handle 1344 * @end: last PTE to handle
1099 * @dst: addr those PTEs should point to 1345 * @dst: addr those PTEs should point to
1100 * @flags: hw mapping flags 1346 * @flags: hw mapping flags
1347 * Returns 0 for success, -EINVAL for failure.
1101 */ 1348 */
1102static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, 1349static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1103 uint64_t start, uint64_t end, 1350 uint64_t start, uint64_t end,
1104 uint64_t dst, uint64_t flags) 1351 uint64_t dst, uint64_t flags)
1105{ 1352{
1353 int r;
1354
1106 /** 1355 /**
1107 * The MC L1 TLB supports variable sized pages, based on a fragment 1356 * The MC L1 TLB supports variable sized pages, based on a fragment
1108 * field in the PTE. When this field is set to a non-zero value, page 1357 * field in the PTE. When this field is set to a non-zero value, page
@@ -1131,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1131 1380
1132 /* system pages are non continuously */ 1381 /* system pages are non continuously */
1133 if (params->src || !(flags & AMDGPU_PTE_VALID) || 1382 if (params->src || !(flags & AMDGPU_PTE_VALID) ||
1134 (frag_start >= frag_end)) { 1383 (frag_start >= frag_end))
1135 1384 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1136 amdgpu_vm_update_ptes(params, start, end, dst, flags);
1137 return;
1138 }
1139 1385
1140 /* handle the 4K area at the beginning */ 1386 /* handle the 4K area at the beginning */
1141 if (start != frag_start) { 1387 if (start != frag_start) {
1142 amdgpu_vm_update_ptes(params, start, frag_start, 1388 r = amdgpu_vm_update_ptes(params, start, frag_start,
1143 dst, flags); 1389 dst, flags);
1390 if (r)
1391 return r;
1144 dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; 1392 dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
1145 } 1393 }
1146 1394
1147 /* handle the area in the middle */ 1395 /* handle the area in the middle */
1148 amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, 1396 r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
1149 flags | frag_flags); 1397 flags | frag_flags);
1398 if (r)
1399 return r;
1150 1400
1151 /* handle the 4K area at the end */ 1401 /* handle the 4K area at the end */
1152 if (frag_end != end) { 1402 if (frag_end != end) {
1153 dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; 1403 dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
1154 amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); 1404 r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
1155 } 1405 }
1406 return r;
1156} 1407}
1157 1408
1158/** 1409/**
@@ -1194,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1194 params.vm = vm; 1445 params.vm = vm;
1195 params.src = src; 1446 params.src = src;
1196 1447
1448 if (vm->use_cpu_for_update) {
1449 /* params.src is used as flag to indicate system Memory */
1450 if (pages_addr)
1451 params.src = ~0;
1452
1453 /* Wait for PT BOs to be free. PTs share the same resv. object
1454 * as the root PD BO
1455 */
1456 r = amdgpu_vm_bo_wait(adev, vm->root.bo);
1457 if (unlikely(r))
1458 return r;
1459
1460 params.func = amdgpu_vm_cpu_set_ptes;
1461 params.pages_addr = pages_addr;
1462 params.shadow = false;
1463 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1464 addr, flags);
1465 }
1466
1197 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 1467 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1198 1468
1199 /* sync to everything on unmapping */ 1469 /* sync to everything on unmapping */
@@ -1273,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1273 goto error_free; 1543 goto error_free;
1274 1544
1275 params.shadow = true; 1545 params.shadow = true;
1276 amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags); 1546 r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1547 if (r)
1548 goto error_free;
1277 params.shadow = false; 1549 params.shadow = false;
1278 amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags); 1550 r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1551 if (r)
1552 goto error_free;
1279 1553
1280 amdgpu_ring_pad_ib(ring, params.ib); 1554 amdgpu_ring_pad_ib(ring, params.ib);
1281 WARN_ON(params.ib->length_dw > ndw); 1555 WARN_ON(params.ib->length_dw > ndw);
@@ -2116,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
2116 * 2390 *
2117 * @adev: amdgpu_device pointer 2391 * @adev: amdgpu_device pointer
2118 * @vm: requested vm 2392 * @vm: requested vm
2393 * @vm_context: Indicates if it GFX or Compute context
2119 * 2394 *
2120 * Init @vm fields. 2395 * Init @vm fields.
2121 */ 2396 */
2122int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2397int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2398 int vm_context)
2123{ 2399{
2124 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2400 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2125 AMDGPU_VM_PTE_COUNT(adev) * 8); 2401 AMDGPU_VM_PTE_COUNT(adev) * 8);
2126 unsigned ring_instance; 2402 unsigned ring_instance;
2127 struct amdgpu_ring *ring; 2403 struct amdgpu_ring *ring;
2128 struct amd_sched_rq *rq; 2404 struct amd_sched_rq *rq;
2129 int r; 2405 int r, i;
2406 u64 flags;
2130 2407
2131 vm->va = RB_ROOT; 2408 vm->va = RB_ROOT;
2132 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); 2409 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
2410 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2411 vm->reserved_vmid[i] = NULL;
2133 spin_lock_init(&vm->status_lock); 2412 spin_lock_init(&vm->status_lock);
2134 INIT_LIST_HEAD(&vm->invalidated); 2413 INIT_LIST_HEAD(&vm->invalidated);
2135 INIT_LIST_HEAD(&vm->cleared); 2414 INIT_LIST_HEAD(&vm->cleared);
@@ -2146,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2146 if (r) 2425 if (r)
2147 return r; 2426 return r;
2148 2427
2428 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
2429 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2430 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2431 else
2432 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2433 AMDGPU_VM_USE_CPU_FOR_GFX);
2434 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2435 vm->use_cpu_for_update ? "CPU" : "SDMA");
2436 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2437 "CPU update of VM recommended only for large BAR system\n");
2149 vm->last_dir_update = NULL; 2438 vm->last_dir_update = NULL;
2150 2439
2440 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
2441 AMDGPU_GEM_CREATE_VRAM_CLEARED;
2442 if (vm->use_cpu_for_update)
2443 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2444 else
2445 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
2446 AMDGPU_GEM_CREATE_SHADOW);
2447
2151 r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, 2448 r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
2152 AMDGPU_GEM_DOMAIN_VRAM, 2449 AMDGPU_GEM_DOMAIN_VRAM,
2153 AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2450 flags,
2154 AMDGPU_GEM_CREATE_SHADOW |
2155 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
2156 AMDGPU_GEM_CREATE_VRAM_CLEARED,
2157 NULL, NULL, &vm->root.bo); 2451 NULL, NULL, &vm->root.bo);
2158 if (r) 2452 if (r)
2159 goto error_free_sched_entity; 2453 goto error_free_sched_entity;
@@ -2198,7 +2492,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
2198 for (i = 0; i <= level->last_entry_used; i++) 2492 for (i = 0; i <= level->last_entry_used; i++)
2199 amdgpu_vm_free_levels(&level->entries[i]); 2493 amdgpu_vm_free_levels(&level->entries[i]);
2200 2494
2201 drm_free_large(level->entries); 2495 kvfree(level->entries);
2202} 2496}
2203 2497
2204/** 2498/**
@@ -2214,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2214{ 2508{
2215 struct amdgpu_bo_va_mapping *mapping, *tmp; 2509 struct amdgpu_bo_va_mapping *mapping, *tmp;
2216 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2510 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
2511 int i;
2217 2512
2218 amd_sched_entity_fini(vm->entity.sched, &vm->entity); 2513 amd_sched_entity_fini(vm->entity.sched, &vm->entity);
2219 2514
@@ -2237,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2237 2532
2238 amdgpu_vm_free_levels(&vm->root); 2533 amdgpu_vm_free_levels(&vm->root);
2239 dma_fence_put(vm->last_dir_update); 2534 dma_fence_put(vm->last_dir_update);
2535 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2536 amdgpu_vm_free_reserved_vmid(adev, vm, i);
2240} 2537}
2241 2538
2242/** 2539/**
@@ -2256,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2256 2553
2257 mutex_init(&id_mgr->lock); 2554 mutex_init(&id_mgr->lock);
2258 INIT_LIST_HEAD(&id_mgr->ids_lru); 2555 INIT_LIST_HEAD(&id_mgr->ids_lru);
2556 atomic_set(&id_mgr->reserved_vmid_num, 0);
2259 2557
2260 /* skip over VMID 0, since it is the system VM */ 2558 /* skip over VMID 0, since it is the system VM */
2261 for (j = 1; j < id_mgr->num_ids; ++j) { 2559 for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -2270,11 +2568,27 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2270 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2568 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2271 adev->vm_manager.seqno[i] = 0; 2569 adev->vm_manager.seqno[i] = 0;
2272 2570
2273
2274 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 2571 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2275 atomic64_set(&adev->vm_manager.client_counter, 0); 2572 atomic64_set(&adev->vm_manager.client_counter, 0);
2276 spin_lock_init(&adev->vm_manager.prt_lock); 2573 spin_lock_init(&adev->vm_manager.prt_lock);
2277 atomic_set(&adev->vm_manager.num_prt_users, 0); 2574 atomic_set(&adev->vm_manager.num_prt_users, 0);
2575
2576 /* If not overridden by the user, by default, only in large BAR systems
2577 * Compute VM tables will be updated by CPU
2578 */
2579#ifdef CONFIG_X86_64
2580 if (amdgpu_vm_update_mode == -1) {
2581 if (amdgpu_vm_is_large_bar(adev))
2582 adev->vm_manager.vm_update_mode =
2583 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2584 else
2585 adev->vm_manager.vm_update_mode = 0;
2586 } else
2587 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2588#else
2589 adev->vm_manager.vm_update_mode = 0;
2590#endif
2591
2278} 2592}
2279 2593
2280/** 2594/**
@@ -2302,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2302 } 2616 }
2303 } 2617 }
2304} 2618}
2619
2620int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2621{
2622 union drm_amdgpu_vm *args = data;
2623 struct amdgpu_device *adev = dev->dev_private;
2624 struct amdgpu_fpriv *fpriv = filp->driver_priv;
2625 int r;
2626
2627 switch (args->in.op) {
2628 case AMDGPU_VM_OP_RESERVE_VMID:
2629 /* current, we only have requirement to reserve vmid from gfxhub */
2630 r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
2631 AMDGPU_GFXHUB);
2632 if (r)
2633 return r;
2634 break;
2635 case AMDGPU_VM_OP_UNRESERVE_VMID:
2636 amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
2637 break;
2638 default:
2639 return -EINVAL;
2640 }
2641
2642 return 0;
2643}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d97e28b4bdc4..936f158bc5ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry;
84 84
85/* hardcode that limit for now */ 85/* hardcode that limit for now */
86#define AMDGPU_VA_RESERVED_SIZE (8 << 20) 86#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
87/* max vmids dedicated for process */
88#define AMDGPU_VM_MAX_RESERVED_VMID 1
89
90#define AMDGPU_VM_CONTEXT_GFX 0
91#define AMDGPU_VM_CONTEXT_COMPUTE 1
92
93/* See vm_update_mode */
94#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
95#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
96
87 97
88struct amdgpu_vm_pt { 98struct amdgpu_vm_pt {
89 struct amdgpu_bo *bo; 99 struct amdgpu_bo *bo;
@@ -123,8 +133,13 @@ struct amdgpu_vm {
123 133
124 /* client id */ 134 /* client id */
125 u64 client_id; 135 u64 client_id;
136 /* dedicated to vm */
137 struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
126 /* each VM will map on CSA */ 138 /* each VM will map on CSA */
127 struct amdgpu_bo_va *csa_bo_va; 139 struct amdgpu_bo_va *csa_bo_va;
140
141 /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
142 bool use_cpu_for_update;
128}; 143};
129 144
130struct amdgpu_vm_id { 145struct amdgpu_vm_id {
@@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager {
152 unsigned num_ids; 167 unsigned num_ids;
153 struct list_head ids_lru; 168 struct list_head ids_lru;
154 struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; 169 struct amdgpu_vm_id ids[AMDGPU_NUM_VM];
170 atomic_t reserved_vmid_num;
155}; 171};
156 172
157struct amdgpu_vm_manager { 173struct amdgpu_vm_manager {
@@ -168,8 +184,6 @@ struct amdgpu_vm_manager {
168 uint32_t block_size; 184 uint32_t block_size;
169 /* vram base address for page table entry */ 185 /* vram base address for page table entry */
170 u64 vram_base_offset; 186 u64 vram_base_offset;
171 /* is vm enabled? */
172 bool enabled;
173 /* vm pte handling */ 187 /* vm pte handling */
174 const struct amdgpu_vm_pte_funcs *vm_pte_funcs; 188 const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
175 struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; 189 struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
@@ -181,11 +195,18 @@ struct amdgpu_vm_manager {
181 /* partial resident texture handling */ 195 /* partial resident texture handling */
182 spinlock_t prt_lock; 196 spinlock_t prt_lock;
183 atomic_t num_prt_users; 197 atomic_t num_prt_users;
198
199 /* controls how VM page tables are updated for Graphics and Compute.
200 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
201 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
202 */
203 int vm_update_mode;
184}; 204};
185 205
186void amdgpu_vm_manager_init(struct amdgpu_device *adev); 206void amdgpu_vm_manager_init(struct amdgpu_device *adev);
187void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 207void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
188int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); 208int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
209 int vm_context);
189void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 210void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
190void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 211void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
191 struct list_head *validated, 212 struct list_head *validated,
@@ -204,6 +225,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
204int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); 225int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
205void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, 226void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
206 unsigned vmid); 227 unsigned vmid);
228void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
207int amdgpu_vm_update_directories(struct amdgpu_device *adev, 229int amdgpu_vm_update_directories(struct amdgpu_device *adev,
208 struct amdgpu_vm *vm); 230 struct amdgpu_vm *vm);
209int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 231int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -238,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
238void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 260void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
239 struct amdgpu_bo_va *bo_va); 261 struct amdgpu_bo_va *bo_va);
240void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); 262void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
263int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
264bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
265 struct amdgpu_job *job);
266void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
241 267
242#endif 268#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a4831fe0223b..a2c59a08b2bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
220} 220}
221 221
222const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { 222const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
223 amdgpu_vram_mgr_init, 223 .init = amdgpu_vram_mgr_init,
224 amdgpu_vram_mgr_fini, 224 .takedown = amdgpu_vram_mgr_fini,
225 amdgpu_vram_mgr_new, 225 .get_node = amdgpu_vram_mgr_new,
226 amdgpu_vram_mgr_del, 226 .put_node = amdgpu_vram_mgr_del,
227 amdgpu_vram_mgr_debug 227 .debug = amdgpu_vram_mgr_debug
228}; 228};
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8c9bc75a9c2d..8a0818b23ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
165 struct drm_device *dev = crtc->dev; 165 struct drm_device *dev = crtc->dev;
166 struct amdgpu_device *adev = dev->dev_private; 166 struct amdgpu_device *adev = dev->dev_private;
167 int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); 167 int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
168 ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; 168 ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
169 169
170 memset(&args, 0, sizeof(args)); 170 memset(&args, 0, sizeof(args));
171 171
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
178void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) 178void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
179{ 179{
180 int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating); 180 int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
181 ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args; 181 ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
182 182
183 memset(&args, 0, sizeof(args)); 183 memset(&args, 0, sizeof(args));
184 184
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 6dc1410b380f..cb508a211b2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -22,7 +22,7 @@
22 */ 22 */
23 23
24#include <linux/firmware.h> 24#include <linux/firmware.h>
25#include "drmP.h" 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_pm.h" 27#include "amdgpu_pm.h"
28#include "amdgpu_ucode.h" 28#include "amdgpu_ucode.h"
@@ -906,6 +906,12 @@ static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev)
906 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); 906 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
907 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; 907 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
908 908
909 /* disable mclk switching if the refresh is >120Hz, even if the
910 * blanking period would allow it
911 */
912 if (amdgpu_dpm_get_vrefresh(adev) > 120)
913 return true;
914
909 if (vblank_time < switch_limit) 915 if (vblank_time < switch_limit)
910 return true; 916 return true;
911 else 917 else
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_smc.c b/drivers/gpu/drm/amd/amdgpu/ci_smc.c
index 7eb9069db8e3..b8ba51e045b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c
@@ -23,7 +23,7 @@
23 */ 23 */
24 24
25#include <linux/firmware.h> 25#include <linux/firmware.h>
26#include "drmP.h" 26#include <drm/drmP.h>
27#include "amdgpu.h" 27#include "amdgpu.h"
28#include "cikd.h" 28#include "cikd.h"
29#include "ppsmc.h" 29#include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 9d33e5641419..37a499ab30eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -24,7 +24,7 @@
24#include <linux/firmware.h> 24#include <linux/firmware.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include "drmP.h" 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_atombios.h" 29#include "amdgpu_atombios.h"
30#include "amdgpu_ih.h" 30#include "amdgpu_ih.h"
@@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
964} 964}
965 965
966static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { 966static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
967 {mmGRBM_STATUS, false}, 967 {mmGRBM_STATUS},
968 {mmGB_ADDR_CONFIG, false}, 968 {mmGB_ADDR_CONFIG},
969 {mmMC_ARB_RAMCFG, false}, 969 {mmMC_ARB_RAMCFG},
970 {mmGB_TILE_MODE0, false}, 970 {mmGB_TILE_MODE0},
971 {mmGB_TILE_MODE1, false}, 971 {mmGB_TILE_MODE1},
972 {mmGB_TILE_MODE2, false}, 972 {mmGB_TILE_MODE2},
973 {mmGB_TILE_MODE3, false}, 973 {mmGB_TILE_MODE3},
974 {mmGB_TILE_MODE4, false}, 974 {mmGB_TILE_MODE4},
975 {mmGB_TILE_MODE5, false}, 975 {mmGB_TILE_MODE5},
976 {mmGB_TILE_MODE6, false}, 976 {mmGB_TILE_MODE6},
977 {mmGB_TILE_MODE7, false}, 977 {mmGB_TILE_MODE7},
978 {mmGB_TILE_MODE8, false}, 978 {mmGB_TILE_MODE8},
979 {mmGB_TILE_MODE9, false}, 979 {mmGB_TILE_MODE9},
980 {mmGB_TILE_MODE10, false}, 980 {mmGB_TILE_MODE10},
981 {mmGB_TILE_MODE11, false}, 981 {mmGB_TILE_MODE11},
982 {mmGB_TILE_MODE12, false}, 982 {mmGB_TILE_MODE12},
983 {mmGB_TILE_MODE13, false}, 983 {mmGB_TILE_MODE13},
984 {mmGB_TILE_MODE14, false}, 984 {mmGB_TILE_MODE14},
985 {mmGB_TILE_MODE15, false}, 985 {mmGB_TILE_MODE15},
986 {mmGB_TILE_MODE16, false}, 986 {mmGB_TILE_MODE16},
987 {mmGB_TILE_MODE17, false}, 987 {mmGB_TILE_MODE17},
988 {mmGB_TILE_MODE18, false}, 988 {mmGB_TILE_MODE18},
989 {mmGB_TILE_MODE19, false}, 989 {mmGB_TILE_MODE19},
990 {mmGB_TILE_MODE20, false}, 990 {mmGB_TILE_MODE20},
991 {mmGB_TILE_MODE21, false}, 991 {mmGB_TILE_MODE21},
992 {mmGB_TILE_MODE22, false}, 992 {mmGB_TILE_MODE22},
993 {mmGB_TILE_MODE23, false}, 993 {mmGB_TILE_MODE23},
994 {mmGB_TILE_MODE24, false}, 994 {mmGB_TILE_MODE24},
995 {mmGB_TILE_MODE25, false}, 995 {mmGB_TILE_MODE25},
996 {mmGB_TILE_MODE26, false}, 996 {mmGB_TILE_MODE26},
997 {mmGB_TILE_MODE27, false}, 997 {mmGB_TILE_MODE27},
998 {mmGB_TILE_MODE28, false}, 998 {mmGB_TILE_MODE28},
999 {mmGB_TILE_MODE29, false}, 999 {mmGB_TILE_MODE29},
1000 {mmGB_TILE_MODE30, false}, 1000 {mmGB_TILE_MODE30},
1001 {mmGB_TILE_MODE31, false}, 1001 {mmGB_TILE_MODE31},
1002 {mmGB_MACROTILE_MODE0, false}, 1002 {mmGB_MACROTILE_MODE0},
1003 {mmGB_MACROTILE_MODE1, false}, 1003 {mmGB_MACROTILE_MODE1},
1004 {mmGB_MACROTILE_MODE2, false}, 1004 {mmGB_MACROTILE_MODE2},
1005 {mmGB_MACROTILE_MODE3, false}, 1005 {mmGB_MACROTILE_MODE3},
1006 {mmGB_MACROTILE_MODE4, false}, 1006 {mmGB_MACROTILE_MODE4},
1007 {mmGB_MACROTILE_MODE5, false}, 1007 {mmGB_MACROTILE_MODE5},
1008 {mmGB_MACROTILE_MODE6, false}, 1008 {mmGB_MACROTILE_MODE6},
1009 {mmGB_MACROTILE_MODE7, false}, 1009 {mmGB_MACROTILE_MODE7},
1010 {mmGB_MACROTILE_MODE8, false}, 1010 {mmGB_MACROTILE_MODE8},
1011 {mmGB_MACROTILE_MODE9, false}, 1011 {mmGB_MACROTILE_MODE9},
1012 {mmGB_MACROTILE_MODE10, false}, 1012 {mmGB_MACROTILE_MODE10},
1013 {mmGB_MACROTILE_MODE11, false}, 1013 {mmGB_MACROTILE_MODE11},
1014 {mmGB_MACROTILE_MODE12, false}, 1014 {mmGB_MACROTILE_MODE12},
1015 {mmGB_MACROTILE_MODE13, false}, 1015 {mmGB_MACROTILE_MODE13},
1016 {mmGB_MACROTILE_MODE14, false}, 1016 {mmGB_MACROTILE_MODE14},
1017 {mmGB_MACROTILE_MODE15, false}, 1017 {mmGB_MACROTILE_MODE15},
1018 {mmCC_RB_BACKEND_DISABLE, false, true}, 1018 {mmCC_RB_BACKEND_DISABLE, true},
1019 {mmGC_USER_RB_BACKEND_DISABLE, false, true}, 1019 {mmGC_USER_RB_BACKEND_DISABLE, true},
1020 {mmGB_BACKEND_MAP, false, false}, 1020 {mmGB_BACKEND_MAP, false},
1021 {mmPA_SC_RASTER_CONFIG, false, true}, 1021 {mmPA_SC_RASTER_CONFIG, true},
1022 {mmPA_SC_RASTER_CONFIG_1, false, true}, 1022 {mmPA_SC_RASTER_CONFIG_1, true},
1023}; 1023};
1024 1024
1025static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, 1025static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
@@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
1050 if (reg_offset != cik_allowed_read_registers[i].reg_offset) 1050 if (reg_offset != cik_allowed_read_registers[i].reg_offset)
1051 continue; 1051 continue;
1052 1052
1053 if (!cik_allowed_read_registers[i].untouched) 1053 *value = cik_allowed_read_registers[i].grbm_indexed ?
1054 *value = cik_allowed_read_registers[i].grbm_indexed ? 1054 cik_read_indexed_register(adev, se_num,
1055 cik_read_indexed_register(adev, se_num, 1055 sh_num, reg_offset) :
1056 sh_num, reg_offset) : 1056 RREG32(reg_offset);
1057 RREG32(reg_offset);
1058 return 0; 1057 return 0;
1059 } 1058 }
1060 return -EINVAL; 1059 return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index c57c3f18af01..b8918432c572 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "cikd.h" 26#include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index a5f294ebff5c..0c1209cdd1cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "vid.h" 26#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 0cdeb6a2e4a0..9f78c03a2e31 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_pm.h" 25#include "amdgpu_pm.h"
26#include "amdgpu_i2c.h" 26#include "amdgpu_i2c.h"
@@ -1207,8 +1207,11 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
1207 u32 tmp, wm_mask, lb_vblank_lead_lines = 0; 1207 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1208 1208
1209 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1209 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1210 active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 1210 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
1211 line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 1211 (u32)mode->clock);
1212 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
1213 (u32)mode->clock);
1214 line_time = min(line_time, (u32)65535);
1212 1215
1213 /* watermark for high clocks */ 1216 /* watermark for high clocks */
1214 if (adev->pm.dpm_enabled) { 1217 if (adev->pm.dpm_enabled) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 773654a19749..4bcf01dc567a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_pm.h" 25#include "amdgpu_pm.h"
26#include "amdgpu_i2c.h" 26#include "amdgpu_i2c.h"
@@ -1176,8 +1176,11 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
1176 u32 tmp, wm_mask, lb_vblank_lead_lines = 0; 1176 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1177 1177
1178 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1178 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1179 active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 1179 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
1180 line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 1180 (u32)mode->clock);
1181 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
1182 (u32)mode->clock);
1183 line_time = min(line_time, (u32)65535);
1181 1184
1182 /* watermark for high clocks */ 1185 /* watermark for high clocks */
1183 if (adev->pm.dpm_enabled) { 1186 if (adev->pm.dpm_enabled) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 1f3552967ba3..fd134a4629d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_pm.h" 25#include "amdgpu_pm.h"
26#include "amdgpu_i2c.h" 26#include "amdgpu_i2c.h"
@@ -118,14 +118,27 @@ static const struct {
118static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, 118static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
119 u32 block_offset, u32 reg) 119 u32 block_offset, u32 reg)
120{ 120{
121 DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n"); 121 unsigned long flags;
122 return 0; 122 u32 r;
123
124 spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
125 WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
126 r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
127 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
128
129 return r;
123} 130}
124 131
125static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, 132static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
126 u32 block_offset, u32 reg, u32 v) 133 u32 block_offset, u32 reg, u32 v)
127{ 134{
128 DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n"); 135 unsigned long flags;
136
137 spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
138 WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
139 reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
140 WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
141 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
129} 142}
130 143
131static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) 144static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
@@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
501 514
502static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) 515static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
503{ 516{
504 int num_crtc = 0;
505
506 switch (adev->asic_type) { 517 switch (adev->asic_type) {
507 case CHIP_TAHITI: 518 case CHIP_TAHITI:
508 case CHIP_PITCAIRN: 519 case CHIP_PITCAIRN:
509 case CHIP_VERDE: 520 case CHIP_VERDE:
510 num_crtc = 6; 521 return 6;
511 break;
512 case CHIP_OLAND: 522 case CHIP_OLAND:
513 num_crtc = 2; 523 return 2;
514 break;
515 default: 524 default:
516 num_crtc = 0; 525 return 0;
517 } 526 }
518 return num_crtc;
519} 527}
520 528
521void dce_v6_0_disable_dce(struct amdgpu_device *adev) 529void dce_v6_0_disable_dce(struct amdgpu_device *adev)
@@ -983,8 +991,11 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
983 fixed20_12 a, b, c; 991 fixed20_12 a, b, c;
984 992
985 if (amdgpu_crtc->base.enabled && num_heads && mode) { 993 if (amdgpu_crtc->base.enabled && num_heads && mode) {
986 active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 994 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
987 line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 995 (u32)mode->clock);
996 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
997 (u32)mode->clock);
998 line_time = min(line_time, (u32)65535);
988 priority_a_cnt = 0; 999 priority_a_cnt = 0;
989 priority_b_cnt = 0; 1000 priority_b_cnt = 0;
990 1001
@@ -1222,17 +1233,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
1222 dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); 1233 dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
1223 } 1234 }
1224} 1235}
1225/* 1236
1226static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) 1237static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
1227{ 1238{
1228 int i; 1239 int i;
1229 u32 offset, tmp; 1240 u32 tmp;
1230 1241
1231 for (i = 0; i < adev->mode_info.audio.num_pins; i++) { 1242 for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
1232 offset = adev->mode_info.audio.pin[i].offset; 1243 tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset,
1233 tmp = RREG32_AUDIO_ENDPT(offset, 1244 ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
1234 AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); 1245 if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT,
1235 if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) 1246 PORT_CONNECTIVITY))
1236 adev->mode_info.audio.pin[i].connected = false; 1247 adev->mode_info.audio.pin[i].connected = false;
1237 else 1248 else
1238 adev->mode_info.audio.pin[i].connected = true; 1249 adev->mode_info.audio.pin[i].connected = true;
@@ -1254,45 +1265,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade
1254 return NULL; 1265 return NULL;
1255} 1266}
1256 1267
1257static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder) 1268static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
1258{ 1269{
1259 struct amdgpu_device *adev = encoder->dev->dev_private; 1270 struct amdgpu_device *adev = encoder->dev->dev_private;
1260 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); 1271 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1261 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; 1272 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1262 u32 offset;
1263 1273
1264 if (!dig || !dig->afmt || !dig->afmt->pin) 1274 if (!dig || !dig->afmt || !dig->afmt->pin)
1265 return; 1275 return;
1266 1276
1267 offset = dig->afmt->offset; 1277 WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset,
1268 1278 REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT,
1269 WREG32(AFMT_AUDIO_SRC_CONTROL + offset, 1279 dig->afmt->pin->id));
1270 AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id));
1271
1272} 1280}
1273 1281
1274static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, 1282static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
1275 struct drm_display_mode *mode) 1283 struct drm_display_mode *mode)
1276{ 1284{
1277 DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n"); 1285 struct amdgpu_device *adev = encoder->dev->dev_private;
1286 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1287 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1288 struct drm_connector *connector;
1289 struct amdgpu_connector *amdgpu_connector = NULL;
1290 int interlace = 0;
1291 u32 tmp;
1292
1293 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
1294 if (connector->encoder == encoder) {
1295 amdgpu_connector = to_amdgpu_connector(connector);
1296 break;
1297 }
1298 }
1299
1300 if (!amdgpu_connector) {
1301 DRM_ERROR("Couldn't find encoder's connector\n");
1302 return;
1303 }
1304
1305 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1306 interlace = 1;
1307
1308 if (connector->latency_present[interlace]) {
1309 tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
1310 VIDEO_LIPSYNC, connector->video_latency[interlace]);
1311 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
1312 AUDIO_LIPSYNC, connector->audio_latency[interlace]);
1313 } else {
1314 tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
1315 VIDEO_LIPSYNC, 0);
1316 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
1317 AUDIO_LIPSYNC, 0);
1318 }
1319 WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
1320 ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
1278} 1321}
1279 1322
1280static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) 1323static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
1281{ 1324{
1282 DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n"); 1325 struct amdgpu_device *adev = encoder->dev->dev_private;
1326 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1327 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1328 struct drm_connector *connector;
1329 struct amdgpu_connector *amdgpu_connector = NULL;
1330 u8 *sadb = NULL;
1331 int sad_count;
1332 u32 tmp;
1333
1334 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
1335 if (connector->encoder == encoder) {
1336 amdgpu_connector = to_amdgpu_connector(connector);
1337 break;
1338 }
1339 }
1340
1341 if (!amdgpu_connector) {
1342 DRM_ERROR("Couldn't find encoder's connector\n");
1343 return;
1344 }
1345
1346 sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb);
1347 if (sad_count < 0) {
1348 DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
1349 sad_count = 0;
1350 }
1351
1352 /* program the speaker allocation */
1353 tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
1354 ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
1355 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1356 HDMI_CONNECTION, 0);
1357 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1358 DP_CONNECTION, 0);
1359
1360 if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
1361 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1362 DP_CONNECTION, 1);
1363 else
1364 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1365 HDMI_CONNECTION, 1);
1366
1367 if (sad_count)
1368 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1369 SPEAKER_ALLOCATION, sadb[0]);
1370 else
1371 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
1372 SPEAKER_ALLOCATION, 5); /* stereo */
1373
1374 WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
1375 ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
1376
1377 kfree(sadb);
1283} 1378}
1284 1379
1285static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) 1380static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
1286{ 1381{
1287 DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n"); 1382 struct amdgpu_device *adev = encoder->dev->dev_private;
1383 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1384 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1385 struct drm_connector *connector;
1386 struct amdgpu_connector *amdgpu_connector = NULL;
1387 struct cea_sad *sads;
1388 int i, sad_count;
1389
1390 static const u16 eld_reg_to_type[][2] = {
1391 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
1392 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
1393 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
1394 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
1395 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
1396 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
1397 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
1398 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
1399 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
1400 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
1401 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
1402 { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
1403 };
1404
1405 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
1406 if (connector->encoder == encoder) {
1407 amdgpu_connector = to_amdgpu_connector(connector);
1408 break;
1409 }
1410 }
1411
1412 if (!amdgpu_connector) {
1413 DRM_ERROR("Couldn't find encoder's connector\n");
1414 return;
1415 }
1416
1417 sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads);
1418 if (sad_count <= 0) {
1419 DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
1420 return;
1421 }
1422
1423 for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
1424 u32 tmp = 0;
1425 u8 stereo_freqs = 0;
1426 int max_channels = -1;
1427 int j;
1428
1429 for (j = 0; j < sad_count; j++) {
1430 struct cea_sad *sad = &sads[j];
1431
1432 if (sad->format == eld_reg_to_type[i][1]) {
1433 if (sad->channels > max_channels) {
1434 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
1435 MAX_CHANNELS, sad->channels);
1436 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
1437 DESCRIPTOR_BYTE_2, sad->byte2);
1438 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
1439 SUPPORTED_FREQUENCIES, sad->freq);
1440 max_channels = sad->channels;
1441 }
1442
1443 if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
1444 stereo_freqs |= sad->freq;
1445 else
1446 break;
1447 }
1448 }
1449
1450 tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
1451 SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
1452 WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
1453 }
1454
1455 kfree(sads);
1288 1456
1289} 1457}
1290*/ 1458
1291static void dce_v6_0_audio_enable(struct amdgpu_device *adev, 1459static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
1292 struct amdgpu_audio_pin *pin, 1460 struct amdgpu_audio_pin *pin,
1293 bool enable) 1461 bool enable)
1294{ 1462{
1295 DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n"); 1463 if (!pin)
1464 return;
1465
1466 WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
1467 enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
1296} 1468}
1297 1469
1298static const u32 pin_offsets[7] = 1470static const u32 pin_offsets[7] =
@@ -1308,42 +1480,372 @@ static const u32 pin_offsets[7] =
1308 1480
1309static int dce_v6_0_audio_init(struct amdgpu_device *adev) 1481static int dce_v6_0_audio_init(struct amdgpu_device *adev)
1310{ 1482{
1483 int i;
1484
1485 if (!amdgpu_audio)
1486 return 0;
1487
1488 adev->mode_info.audio.enabled = true;
1489
1490 switch (adev->asic_type) {
1491 case CHIP_TAHITI:
1492 case CHIP_PITCAIRN:
1493 case CHIP_VERDE:
1494 default:
1495 adev->mode_info.audio.num_pins = 6;
1496 break;
1497 case CHIP_OLAND:
1498 adev->mode_info.audio.num_pins = 2;
1499 break;
1500 }
1501
1502 for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
1503 adev->mode_info.audio.pin[i].channels = -1;
1504 adev->mode_info.audio.pin[i].rate = -1;
1505 adev->mode_info.audio.pin[i].bits_per_sample = -1;
1506 adev->mode_info.audio.pin[i].status_bits = 0;
1507 adev->mode_info.audio.pin[i].category_code = 0;
1508 adev->mode_info.audio.pin[i].connected = false;
1509 adev->mode_info.audio.pin[i].offset = pin_offsets[i];
1510 adev->mode_info.audio.pin[i].id = i;
1511 dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
1512 }
1513
1311 return 0; 1514 return 0;
1312} 1515}
1313 1516
1314static void dce_v6_0_audio_fini(struct amdgpu_device *adev) 1517static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
1315{ 1518{
1519 int i;
1316 1520
1521 if (!amdgpu_audio)
1522 return;
1523
1524 if (!adev->mode_info.audio.enabled)
1525 return;
1526
1527 for (i = 0; i < adev->mode_info.audio.num_pins; i++)
1528 dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
1529
1530 adev->mode_info.audio.enabled = false;
1317} 1531}
1318 1532
1319/* 1533static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder)
1320static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
1321{ 1534{
1322 DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n"); 1535 struct drm_device *dev = encoder->dev;
1536 struct amdgpu_device *adev = dev->dev_private;
1537 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1538 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1539 u32 tmp;
1540
1541 tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
1542 tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
1543 tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1);
1544 tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1);
1545 WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
1323} 1546}
1324*/ 1547
1325/* 1548static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder,
1326 * build a HDMI Video Info Frame 1549 uint32_t clock, int bpc)
1327 */ 1550{
1328/* 1551 struct drm_device *dev = encoder->dev;
1329static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, 1552 struct amdgpu_device *adev = dev->dev_private;
1330 void *buffer, size_t size) 1553 struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
1554 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1555 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1556 u32 tmp;
1557
1558 tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
1559 tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
1560 tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE,
1561 bpc > 8 ? 0 : 1);
1562 WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
1563
1564 tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
1565 tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
1566 WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
1567 tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
1568 tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
1569 WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
1570
1571 tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
1572 tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
1573 WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
1574 tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
1575 tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
1576 WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
1577
1578 tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
1579 tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
1580 WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
1581 tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
1582 tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
1583 WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
1584}
1585
1586static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
1587 struct drm_display_mode *mode)
1331{ 1588{
1332 DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n"); 1589 struct drm_device *dev = encoder->dev;
1590 struct amdgpu_device *adev = dev->dev_private;
1591 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1592 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1593 struct hdmi_avi_infoframe frame;
1594 u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
1595 uint8_t *payload = buffer + 3;
1596 uint8_t *header = buffer;
1597 ssize_t err;
1598 u32 tmp;
1599
1600 err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
1601 if (err < 0) {
1602 DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
1603 return;
1604 }
1605
1606 err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
1607 if (err < 0) {
1608 DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
1609 return;
1610 }
1611
1612 WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
1613 payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24));
1614 WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
1615 payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24));
1616 WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
1617 payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24));
1618 WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
1619 payload[0xC] | (payload[0xD] << 8) | (header[1] << 24));
1620
1621 tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
1622 /* anything other than 0 */
1623 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1,
1624 HDMI_AUDIO_INFO_LINE, 2);
1625 WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
1333} 1626}
1334 1627
1335static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) 1628static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
1336{ 1629{
1337 DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n"); 1630 struct drm_device *dev = encoder->dev;
1631 struct amdgpu_device *adev = dev->dev_private;
1632 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
1633 int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
1634 u32 tmp;
1635
1636 /*
1637 * Two dtos: generally use dto0 for hdmi, dto1 for dp.
1638 * Express [24MHz / target pixel clock] as an exact rational
1639 * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
1640 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
1641 */
1642 tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
1643 tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
1644 DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id);
1645 if (em == ATOM_ENCODER_MODE_HDMI) {
1646 tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
1647 DCCG_AUDIO_DTO_SEL, 0);
1648 } else if (ENCODER_MODE_IS_DP(em)) {
1649 tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
1650 DCCG_AUDIO_DTO_SEL, 1);
1651 }
1652 WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
1653 if (em == ATOM_ENCODER_MODE_HDMI) {
1654 WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000);
1655 WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock);
1656 } else if (ENCODER_MODE_IS_DP(em)) {
1657 WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000);
1658 WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock);
1659 }
1338} 1660}
1339*/ 1661
1340/* 1662static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder)
1341 * update the info frames with the data from the current display mode 1663{
1342 */ 1664 struct drm_device *dev = encoder->dev;
1665 struct amdgpu_device *adev = dev->dev_private;
1666 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1667 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1668 u32 tmp;
1669
1670 tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
1671 tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
1672 WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
1673
1674 tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
1675 tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
1676 WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
1677
1678 tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
1679 tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
1680 WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
1681
1682 tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
1683 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
1684 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
1685 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
1686 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
1687 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
1688 tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
1689 WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
1690
1691 tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset);
1692 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff);
1693 WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp);
1694
1695 tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
1696 tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
1697 tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
1698 WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
1699
1700 tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
1701 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1);
1702 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
1703 WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
1704}
1705
1706static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute)
1707{
1708 struct drm_device *dev = encoder->dev;
1709 struct amdgpu_device *adev = dev->dev_private;
1710 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1711 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1712 u32 tmp;
1713
1714 tmp = RREG32(mmHDMI_GC + dig->afmt->offset);
1715 tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0);
1716 WREG32(mmHDMI_GC + dig->afmt->offset, tmp);
1717}
1718
1719static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable)
1720{
1721 struct drm_device *dev = encoder->dev;
1722 struct amdgpu_device *adev = dev->dev_private;
1723 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1724 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1725 u32 tmp;
1726
1727 if (enable) {
1728 tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
1729 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
1730 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
1731 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
1732 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
1733 WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
1734
1735 tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
1736 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
1737 WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
1738
1739 tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
1740 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
1741 WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
1742 } else {
1743 tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
1744 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0);
1745 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0);
1746 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0);
1747 tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0);
1748 WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
1749
1750 tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
1751 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0);
1752 WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
1753 }
1754}
1755
1756static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable)
1757{
1758 struct drm_device *dev = encoder->dev;
1759 struct amdgpu_device *adev = dev->dev_private;
1760 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1761 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1762 u32 tmp;
1763
1764 if (enable) {
1765 tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
1766 tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
1767 WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
1768
1769 tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset);
1770 tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1);
1771 WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp);
1772
1773 tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset);
1774 tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
1775 tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1);
1776 tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1);
1777 tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
1778 WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp);
1779 } else {
1780 WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0);
1781 }
1782}
1783
1343static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, 1784static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
1344 struct drm_display_mode *mode) 1785 struct drm_display_mode *mode)
1345{ 1786{
1346 DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n"); 1787 struct drm_device *dev = encoder->dev;
1788 struct amdgpu_device *adev = dev->dev_private;
1789 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
1790 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
1791 struct drm_connector *connector;
1792 struct amdgpu_connector *amdgpu_connector = NULL;
1793 int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
1794 int bpc = 8;
1795
1796 if (!dig || !dig->afmt)
1797 return;
1798
1799 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
1800 if (connector->encoder == encoder) {
1801 amdgpu_connector = to_amdgpu_connector(connector);
1802 break;
1803 }
1804 }
1805
1806 if (!amdgpu_connector) {
1807 DRM_ERROR("Couldn't find encoder's connector\n");
1808 return;
1809 }
1810
1811 if (!dig->afmt->enabled)
1812 return;
1813
1814 dig->afmt->pin = dce_v6_0_audio_get_pin(adev);
1815 if (!dig->afmt->pin)
1816 return;
1817
1818 if (encoder->crtc) {
1819 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
1820 bpc = amdgpu_crtc->bpc;
1821 }
1822
1823 /* disable audio before setting up hw */
1824 dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
1825
1826 dce_v6_0_audio_set_mute(encoder, true);
1827 dce_v6_0_audio_write_speaker_allocation(encoder);
1828 dce_v6_0_audio_write_sad_regs(encoder);
1829 dce_v6_0_audio_write_latency_fields(encoder, mode);
1830 if (em == ATOM_ENCODER_MODE_HDMI) {
1831 dce_v6_0_audio_set_dto(encoder, mode->clock);
1832 dce_v6_0_audio_set_vbi_packet(encoder);
1833 dce_v6_0_audio_set_acr(encoder, mode->clock, bpc);
1834 } else if (ENCODER_MODE_IS_DP(em)) {
1835 dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10);
1836 }
1837 dce_v6_0_audio_set_packet(encoder);
1838 dce_v6_0_audio_select_pin(encoder);
1839 dce_v6_0_audio_set_avi_infoframe(encoder, mode);
1840 dce_v6_0_audio_set_mute(encoder, false);
1841 if (em == ATOM_ENCODER_MODE_HDMI) {
1842 dce_v6_0_audio_hdmi_enable(encoder, 1);
1843 } else if (ENCODER_MODE_IS_DP(em)) {
1844 dce_v6_0_audio_dp_enable(encoder, 1);
1845 }
1846
1847 /* enable audio after setting up hw */
1848 dce_v6_0_audio_enable(adev, dig->afmt->pin, true);
1347} 1849}
1348 1850
1349static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) 1851static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
@@ -1359,6 +1861,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
1359 /* Silent, r600_hdmi_enable will raise WARN for us */ 1861 /* Silent, r600_hdmi_enable will raise WARN for us */
1360 if (enable && dig->afmt->enabled) 1862 if (enable && dig->afmt->enabled)
1361 return; 1863 return;
1864
1362 if (!enable && !dig->afmt->enabled) 1865 if (!enable && !dig->afmt->enabled)
1363 return; 1866 return;
1364 1867
@@ -2753,6 +3256,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
2753{ 3256{
2754 3257
2755 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); 3258 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
3259 int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
2756 3260
2757 amdgpu_encoder->pixel_clock = adjusted_mode->clock; 3261 amdgpu_encoder->pixel_clock = adjusted_mode->clock;
2758 3262
@@ -2762,7 +3266,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
2762 /* set scaler clears this on some chips */ 3266 /* set scaler clears this on some chips */
2763 dce_v6_0_set_interleave(encoder->crtc, mode); 3267 dce_v6_0_set_interleave(encoder->crtc, mode);
2764 3268
2765 if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { 3269 if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) {
2766 dce_v6_0_afmt_enable(encoder, true); 3270 dce_v6_0_afmt_enable(encoder, true);
2767 dce_v6_0_afmt_setmode(encoder, adjusted_mode); 3271 dce_v6_0_afmt_setmode(encoder, adjusted_mode);
2768 } 3272 }
@@ -2824,11 +3328,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
2824 3328
2825 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); 3329 struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
2826 struct amdgpu_encoder_atom_dig *dig; 3330 struct amdgpu_encoder_atom_dig *dig;
3331 int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
2827 3332
2828 amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); 3333 amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
2829 3334
2830 if (amdgpu_atombios_encoder_is_digital(encoder)) { 3335 if (amdgpu_atombios_encoder_is_digital(encoder)) {
2831 if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) 3336 if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em))
2832 dce_v6_0_afmt_enable(encoder, false); 3337 dce_v6_0_afmt_enable(encoder, false);
2833 dig = amdgpu_encoder->enc_priv; 3338 dig = amdgpu_encoder->enc_priv;
2834 dig->dig_encoder = -1; 3339 dig->dig_encoder = -1;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 3c558c170e5e..a9e869554627 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_pm.h" 25#include "amdgpu_pm.h"
26#include "amdgpu_i2c.h" 26#include "amdgpu_i2c.h"
@@ -1091,8 +1091,11 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
1091 u32 tmp, wm_mask, lb_vblank_lead_lines = 0; 1091 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1092 1092
1093 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1093 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1094 active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; 1094 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
1095 line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); 1095 (u32)mode->clock);
1096 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
1097 (u32)mode->clock);
1098 line_time = min(line_time, (u32)65535);
1096 1099
1097 /* watermark for high clocks */ 1100 /* watermark for high clocks */
1098 if (adev->pm.dpm_enabled) { 1101 if (adev->pm.dpm_enabled) {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index f1b479b6ac98..90bb08309a53 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_pm.h" 25#include "amdgpu_pm.h"
26#include "amdgpu_i2c.h" 26#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index a125f9d44577..5173ca1fd159 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -393,8 +393,11 @@ out:
393 393
394static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) 394static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
395{ 395{
396 const u32 num_tile_mode_states = 32; 396 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
397 u32 reg_offset, gb_tile_moden, split_equal_to_row_size; 397 u32 reg_offset, split_equal_to_row_size, *tilemode;
398
399 memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array));
400 tilemode = adev->gfx.config.tile_mode_array;
398 401
399 switch (adev->gfx.config.mem_row_size_in_kb) { 402 switch (adev->gfx.config.mem_row_size_in_kb) {
400 case 1: 403 case 1:
@@ -410,887 +413,680 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
410 } 413 }
411 414
412 if (adev->asic_type == CHIP_VERDE) { 415 if (adev->asic_type == CHIP_VERDE) {
413 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 416 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
414 switch (reg_offset) { 417 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
415 case 0: 418 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
416 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
417 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 420 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
418 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
420 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 423 NUM_BANKS(ADDR_SURF_16_BANK);
421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 424 tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 425 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
423 NUM_BANKS(ADDR_SURF_16_BANK)); 426 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
424 break; 427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
425 case 1: 428 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
426 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
427 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
428 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 431 NUM_BANKS(ADDR_SURF_16_BANK);
429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 432 tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
430 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 433 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 434 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
433 NUM_BANKS(ADDR_SURF_16_BANK)); 436 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
434 break; 437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
435 case 2: 438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
436 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 439 NUM_BANKS(ADDR_SURF_16_BANK);
437 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 440 tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
438 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 441 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
439 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 442 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
440 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 443 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 444 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 445 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
443 NUM_BANKS(ADDR_SURF_16_BANK)); 446 NUM_BANKS(ADDR_SURF_8_BANK) |
444 break; 447 TILE_SPLIT(split_equal_to_row_size);
445 case 3: 448 tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
446 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 449 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
447 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 450 PIPE_CONFIG(ADDR_SURF_P4_8x16);
448 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 451 tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
449 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 452 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 453 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 454 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
452 NUM_BANKS(ADDR_SURF_8_BANK) | 455 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
453 TILE_SPLIT(split_equal_to_row_size)); 456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
454 break; 457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
455 case 4: 458 NUM_BANKS(ADDR_SURF_4_BANK);
456 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 459 tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
457 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 460 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
458 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 461 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
459 break; 462 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
460 case 5: 463 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
461 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
462 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
463 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 466 NUM_BANKS(ADDR_SURF_4_BANK);
464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 467 tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
465 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 468 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 469 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 470 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
468 NUM_BANKS(ADDR_SURF_4_BANK)); 471 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
469 break; 472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
470 case 6: 473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
471 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 474 NUM_BANKS(ADDR_SURF_2_BANK);
472 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 475 tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
473 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 476 tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
474 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 477 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
475 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 478 PIPE_CONFIG(ADDR_SURF_P4_8x16);
476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 479 tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 480 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
478 NUM_BANKS(ADDR_SURF_4_BANK)); 481 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
479 break; 482 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
480 case 7: 483 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
481 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
482 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
483 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 486 NUM_BANKS(ADDR_SURF_16_BANK);
484 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 487 tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
485 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 488 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 489 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 490 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
488 NUM_BANKS(ADDR_SURF_2_BANK)); 491 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
489 break; 492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
490 case 8: 493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
491 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); 494 NUM_BANKS(ADDR_SURF_16_BANK);
492 break; 495 tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
493 case 9: 496 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
494 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 497 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
495 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 498 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
496 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 499 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
497 break; 500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
498 case 10: 501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
499 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 502 NUM_BANKS(ADDR_SURF_16_BANK);
500 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 503 tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
501 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 504 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
502 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 505 PIPE_CONFIG(ADDR_SURF_P4_8x16);
503 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 506 tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 507 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 508 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
506 NUM_BANKS(ADDR_SURF_16_BANK)); 509 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
507 break; 510 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
508 case 11: 511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
509 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
510 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 513 NUM_BANKS(ADDR_SURF_16_BANK);
511 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 514 tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 515 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
513 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 516 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 518 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
516 NUM_BANKS(ADDR_SURF_16_BANK)); 519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
517 break; 520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
518 case 12: 521 NUM_BANKS(ADDR_SURF_16_BANK);
519 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 522 tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
520 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 523 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
521 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 524 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
523 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 526 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
526 NUM_BANKS(ADDR_SURF_16_BANK)); 529 NUM_BANKS(ADDR_SURF_16_BANK);
527 break; 530 tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
528 case 13: 531 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 532 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
530 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 533 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
531 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
532 break; 535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
533 case 14: 536 NUM_BANKS(ADDR_SURF_16_BANK) |
534 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 537 TILE_SPLIT(split_equal_to_row_size);
535 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 538 tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 539 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 540 PIPE_CONFIG(ADDR_SURF_P4_8x16);
538 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 541 tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 542 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 543 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
541 NUM_BANKS(ADDR_SURF_16_BANK)); 544 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
542 break; 545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
543 case 15: 546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
544 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 547 NUM_BANKS(ADDR_SURF_16_BANK) |
545 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 548 TILE_SPLIT(split_equal_to_row_size);
546 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 549 tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 550 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
548 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 551 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 552 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
551 NUM_BANKS(ADDR_SURF_16_BANK)); 554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
552 break; 555 NUM_BANKS(ADDR_SURF_16_BANK) |
553 case 16: 556 TILE_SPLIT(split_equal_to_row_size);
554 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 557 tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
555 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 558 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
556 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 559 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
557 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 560 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
558 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 561 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
561 NUM_BANKS(ADDR_SURF_16_BANK)); 564 NUM_BANKS(ADDR_SURF_8_BANK);
562 break; 565 tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
563 case 17: 566 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
564 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 567 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
565 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 568 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
566 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 569 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
567 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 572 NUM_BANKS(ADDR_SURF_8_BANK);
570 NUM_BANKS(ADDR_SURF_16_BANK) | 573 tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
571 TILE_SPLIT(split_equal_to_row_size)); 574 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
572 break; 575 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
573 case 18: 576 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
574 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 577 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
575 ARRAY_MODE(ARRAY_1D_TILED_THICK) | 578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
576 PIPE_CONFIG(ADDR_SURF_P4_8x16)); 579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
577 break; 580 NUM_BANKS(ADDR_SURF_4_BANK);
578 case 19: 581 tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
579 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 582 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
580 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 583 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
581 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 584 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
582 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 585 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
585 NUM_BANKS(ADDR_SURF_16_BANK) | 588 NUM_BANKS(ADDR_SURF_4_BANK);
586 TILE_SPLIT(split_equal_to_row_size)); 589 tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
587 break; 590 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
588 case 20: 591 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
589 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
590 ARRAY_MODE(ARRAY_2D_TILED_THICK) | 593 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
591 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
592 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 596 NUM_BANKS(ADDR_SURF_2_BANK);
594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 597 tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
595 NUM_BANKS(ADDR_SURF_16_BANK) | 598 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
596 TILE_SPLIT(split_equal_to_row_size)); 599 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
597 break; 600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
598 case 21: 601 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
599 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
600 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
601 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 604 NUM_BANKS(ADDR_SURF_2_BANK);
602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 605 tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
603 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 606 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
604 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 607 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
605 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
606 NUM_BANKS(ADDR_SURF_8_BANK)); 609 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
607 break; 610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
608 case 22: 611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
609 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 612 NUM_BANKS(ADDR_SURF_2_BANK);
610 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 613 tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
611 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 614 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
612 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 615 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
613 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 617 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
616 NUM_BANKS(ADDR_SURF_8_BANK)); 619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
617 break; 620 NUM_BANKS(ADDR_SURF_2_BANK);
618 case 23: 621 tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
619 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 622 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
620 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 623 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
621 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 624 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 625 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
623 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
624 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
625 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 628 NUM_BANKS(ADDR_SURF_2_BANK);
626 NUM_BANKS(ADDR_SURF_4_BANK)); 629 tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
627 break; 630 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
628 case 24: 631 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
629 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 632 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
630 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 633 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
631 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
632 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
633 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 636 NUM_BANKS(ADDR_SURF_2_BANK);
634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
636 NUM_BANKS(ADDR_SURF_4_BANK)); 639 } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
637 break; 640 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
638 case 25: 641 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
639 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 642 PIPE_CONFIG(ADDR_SURF_P2) |
640 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
641 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 644 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
643 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 647 NUM_BANKS(ADDR_SURF_16_BANK);
645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 648 tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
646 NUM_BANKS(ADDR_SURF_2_BANK)); 649 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
647 break; 650 PIPE_CONFIG(ADDR_SURF_P2) |
648 case 26: 651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
649 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 652 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
650 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
651 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
652 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 655 NUM_BANKS(ADDR_SURF_16_BANK);
653 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 656 tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 657 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 658 PIPE_CONFIG(ADDR_SURF_P2) |
656 NUM_BANKS(ADDR_SURF_2_BANK)); 659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
657 break; 660 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
658 case 27: 661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
659 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
660 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 663 NUM_BANKS(ADDR_SURF_16_BANK);
661 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 664 tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 665 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
663 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 666 PIPE_CONFIG(ADDR_SURF_P2) |
664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 667 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
666 NUM_BANKS(ADDR_SURF_2_BANK)); 669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
667 break; 670 NUM_BANKS(ADDR_SURF_8_BANK) |
668 case 28: 671 TILE_SPLIT(split_equal_to_row_size);
669 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 672 tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
670 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 673 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
671 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 674 PIPE_CONFIG(ADDR_SURF_P2);
672 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 675 tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
673 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 676 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 677 PIPE_CONFIG(ADDR_SURF_P2) |
675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
676 NUM_BANKS(ADDR_SURF_2_BANK)); 679 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
677 break; 680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
678 case 29: 681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
679 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 682 NUM_BANKS(ADDR_SURF_8_BANK);
680 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 683 tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
681 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 684 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 685 PIPE_CONFIG(ADDR_SURF_P2) |
683 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 687 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
686 NUM_BANKS(ADDR_SURF_2_BANK)); 689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
687 break; 690 NUM_BANKS(ADDR_SURF_8_BANK);
688 case 30: 691 tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
689 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 692 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
690 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 693 PIPE_CONFIG(ADDR_SURF_P2) |
691 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 694 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
692 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 695 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
693 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 696 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 697 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 698 NUM_BANKS(ADDR_SURF_4_BANK);
696 NUM_BANKS(ADDR_SURF_2_BANK)); 699 tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
697 break; 700 tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
698 default: 701 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
699 continue; 702 PIPE_CONFIG(ADDR_SURF_P2);
700 } 703 tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
701 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; 704 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
702 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); 705 PIPE_CONFIG(ADDR_SURF_P2) |
703 } 706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
704 } else if (adev->asic_type == CHIP_OLAND || 707 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
705 adev->asic_type == CHIP_HAINAN) { 708 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
706 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 709 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
707 switch (reg_offset) { 710 NUM_BANKS(ADDR_SURF_16_BANK);
708 case 0: 711 tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
709 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 712 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
710 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 713 PIPE_CONFIG(ADDR_SURF_P2) |
711 PIPE_CONFIG(ADDR_SURF_P2) | 714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
712 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 715 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
713 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 718 NUM_BANKS(ADDR_SURF_16_BANK);
716 NUM_BANKS(ADDR_SURF_16_BANK)); 719 tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
717 break; 720 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
718 case 1: 721 PIPE_CONFIG(ADDR_SURF_P2) |
719 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
720 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 723 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
721 PIPE_CONFIG(ADDR_SURF_P2) | 724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
723 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 726 NUM_BANKS(ADDR_SURF_16_BANK);
724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 727 tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 728 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
726 NUM_BANKS(ADDR_SURF_16_BANK)); 729 PIPE_CONFIG(ADDR_SURF_P2);
727 break; 730 tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
728 case 2: 731 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 732 PIPE_CONFIG(ADDR_SURF_P2) |
730 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
731 PIPE_CONFIG(ADDR_SURF_P2) | 734 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
732 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 735 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
733 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 736 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 737 NUM_BANKS(ADDR_SURF_16_BANK);
735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 738 tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
736 NUM_BANKS(ADDR_SURF_16_BANK)); 739 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
737 break; 740 PIPE_CONFIG(ADDR_SURF_P2) |
738 case 3: 741 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
739 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 742 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
740 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
741 PIPE_CONFIG(ADDR_SURF_P2) | 744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
742 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 745 NUM_BANKS(ADDR_SURF_16_BANK);
743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 746 tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 747 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
745 NUM_BANKS(ADDR_SURF_8_BANK) | 748 PIPE_CONFIG(ADDR_SURF_P2) |
746 TILE_SPLIT(split_equal_to_row_size)); 749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
747 break; 750 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
748 case 4: 751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
749 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
750 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 753 NUM_BANKS(ADDR_SURF_16_BANK);
751 PIPE_CONFIG(ADDR_SURF_P2)); 754 tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
752 break; 755 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
753 case 5: 756 PIPE_CONFIG(ADDR_SURF_P2) |
754 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 757 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
755 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
756 PIPE_CONFIG(ADDR_SURF_P2) | 759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 760 NUM_BANKS(ADDR_SURF_16_BANK) |
758 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 761 TILE_SPLIT(split_equal_to_row_size);
759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 762 tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 763 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
761 NUM_BANKS(ADDR_SURF_8_BANK)); 764 PIPE_CONFIG(ADDR_SURF_P2);
762 break; 765 tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
763 case 6: 766 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
764 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 767 PIPE_CONFIG(ADDR_SURF_P2) |
765 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 768 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
766 PIPE_CONFIG(ADDR_SURF_P2) | 769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
768 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 771 NUM_BANKS(ADDR_SURF_16_BANK) |
769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 772 TILE_SPLIT(split_equal_to_row_size);
770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 773 tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
771 NUM_BANKS(ADDR_SURF_8_BANK)); 774 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
772 break; 775 PIPE_CONFIG(ADDR_SURF_P2) |
773 case 7: 776 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
774 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
775 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
776 PIPE_CONFIG(ADDR_SURF_P2) | 779 NUM_BANKS(ADDR_SURF_16_BANK) |
777 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 780 TILE_SPLIT(split_equal_to_row_size);
778 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 781 tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 782 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 783 PIPE_CONFIG(ADDR_SURF_P2) |
781 NUM_BANKS(ADDR_SURF_4_BANK)); 784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
782 break; 785 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
783 case 8: 786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
784 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); 787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
785 break; 788 NUM_BANKS(ADDR_SURF_8_BANK);
786 case 9: 789 tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
787 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 790 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
788 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 791 PIPE_CONFIG(ADDR_SURF_P2) |
789 PIPE_CONFIG(ADDR_SURF_P2)); 792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
790 break; 793 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
791 case 10: 794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
792 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
793 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 796 NUM_BANKS(ADDR_SURF_8_BANK);
794 PIPE_CONFIG(ADDR_SURF_P2) | 797 tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 798 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
796 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 799 PIPE_CONFIG(ADDR_SURF_P2) |
797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 800 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 801 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
799 NUM_BANKS(ADDR_SURF_16_BANK)); 802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
800 break; 803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
801 case 11: 804 NUM_BANKS(ADDR_SURF_8_BANK);
802 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 805 tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
803 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 806 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
804 PIPE_CONFIG(ADDR_SURF_P2) | 807 PIPE_CONFIG(ADDR_SURF_P2) |
805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
806 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 809 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 810 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 811 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
809 NUM_BANKS(ADDR_SURF_16_BANK)); 812 NUM_BANKS(ADDR_SURF_8_BANK);
810 break; 813 tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
811 case 12: 814 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
812 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 815 PIPE_CONFIG(ADDR_SURF_P2) |
813 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 816 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
814 PIPE_CONFIG(ADDR_SURF_P2) | 817 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
816 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 820 NUM_BANKS(ADDR_SURF_4_BANK);
818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 821 tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
819 NUM_BANKS(ADDR_SURF_16_BANK)); 822 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
820 break; 823 PIPE_CONFIG(ADDR_SURF_P2) |
821 case 13: 824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
822 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 825 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
823 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
824 PIPE_CONFIG(ADDR_SURF_P2)); 827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
825 break; 828 NUM_BANKS(ADDR_SURF_4_BANK);
826 case 14: 829 tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
827 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 830 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
828 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 831 PIPE_CONFIG(ADDR_SURF_P2) |
829 PIPE_CONFIG(ADDR_SURF_P2) | 832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
830 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 833 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
831 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 836 NUM_BANKS(ADDR_SURF_4_BANK);
834 NUM_BANKS(ADDR_SURF_16_BANK)); 837 tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
835 break; 838 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
836 case 15: 839 PIPE_CONFIG(ADDR_SURF_P2) |
837 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
838 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 841 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
839 PIPE_CONFIG(ADDR_SURF_P2) | 842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
841 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 844 NUM_BANKS(ADDR_SURF_4_BANK);
842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 845 tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 846 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
844 NUM_BANKS(ADDR_SURF_16_BANK)); 847 PIPE_CONFIG(ADDR_SURF_P2) |
845 break; 848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
846 case 16: 849 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
847 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
848 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
849 PIPE_CONFIG(ADDR_SURF_P2) | 852 NUM_BANKS(ADDR_SURF_4_BANK);
850 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 853 tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
851 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 854 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 855 PIPE_CONFIG(ADDR_SURF_P2) |
853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
854 NUM_BANKS(ADDR_SURF_16_BANK)); 857 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
855 break; 858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
856 case 17: 859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
857 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 860 NUM_BANKS(ADDR_SURF_4_BANK);
858 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 861 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
859 PIPE_CONFIG(ADDR_SURF_P2) | 862 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
860 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
861 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
862 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
863 NUM_BANKS(ADDR_SURF_16_BANK) |
864 TILE_SPLIT(split_equal_to_row_size));
865 break;
866 case 18:
867 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
868 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
869 PIPE_CONFIG(ADDR_SURF_P2));
870 break;
871 case 19:
872 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
873 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
874 PIPE_CONFIG(ADDR_SURF_P2) |
875 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
878 NUM_BANKS(ADDR_SURF_16_BANK) |
879 TILE_SPLIT(split_equal_to_row_size));
880 break;
881 case 20:
882 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
883 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
884 PIPE_CONFIG(ADDR_SURF_P2) |
885 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
888 NUM_BANKS(ADDR_SURF_16_BANK) |
889 TILE_SPLIT(split_equal_to_row_size));
890 break;
891 case 21:
892 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
893 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
894 PIPE_CONFIG(ADDR_SURF_P2) |
895 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
896 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
899 NUM_BANKS(ADDR_SURF_8_BANK));
900 break;
901 case 22:
902 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
903 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
904 PIPE_CONFIG(ADDR_SURF_P2) |
905 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
906 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
909 NUM_BANKS(ADDR_SURF_8_BANK));
910 break;
911 case 23:
912 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
913 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
914 PIPE_CONFIG(ADDR_SURF_P2) |
915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
916 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
919 NUM_BANKS(ADDR_SURF_8_BANK));
920 break;
921 case 24:
922 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
923 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
924 PIPE_CONFIG(ADDR_SURF_P2) |
925 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
926 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
929 NUM_BANKS(ADDR_SURF_8_BANK));
930 break;
931 case 25:
932 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
933 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934 PIPE_CONFIG(ADDR_SURF_P2) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
936 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
939 NUM_BANKS(ADDR_SURF_4_BANK));
940 break;
941 case 26:
942 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
943 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 PIPE_CONFIG(ADDR_SURF_P2) |
945 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
946 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
949 NUM_BANKS(ADDR_SURF_4_BANK));
950 break;
951 case 27:
952 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
953 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
954 PIPE_CONFIG(ADDR_SURF_P2) |
955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
956 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
959 NUM_BANKS(ADDR_SURF_4_BANK));
960 break;
961 case 28:
962 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
963 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
964 PIPE_CONFIG(ADDR_SURF_P2) |
965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
966 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
969 NUM_BANKS(ADDR_SURF_4_BANK));
970 break;
971 case 29:
972 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
973 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
974 PIPE_CONFIG(ADDR_SURF_P2) |
975 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
976 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
979 NUM_BANKS(ADDR_SURF_4_BANK));
980 break;
981 case 30:
982 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
983 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
984 PIPE_CONFIG(ADDR_SURF_P2) |
985 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
986 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
987 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
988 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
989 NUM_BANKS(ADDR_SURF_4_BANK));
990 break;
991 default:
992 continue;
993 }
994 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
995 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
996 }
997 } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { 863 } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) {
998 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 864 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
999 switch (reg_offset) { 865 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1000 case 0: 866 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1001 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 867 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1002 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 868 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 869 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 870 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1005 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 871 NUM_BANKS(ADDR_SURF_16_BANK);
1006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 872 tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 873 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1008 NUM_BANKS(ADDR_SURF_16_BANK)); 874 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1009 break; 875 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1010 case 1: 876 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1011 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1012 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1013 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 879 NUM_BANKS(ADDR_SURF_16_BANK);
1014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 880 tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1015 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 881 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 882 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 883 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1018 NUM_BANKS(ADDR_SURF_16_BANK)); 884 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019 break; 885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1020 case 2: 886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1021 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 887 NUM_BANKS(ADDR_SURF_16_BANK);
1022 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 888 tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1023 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 889 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 890 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1025 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 891 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1028 NUM_BANKS(ADDR_SURF_16_BANK)); 894 NUM_BANKS(ADDR_SURF_4_BANK) |
1029 break; 895 TILE_SPLIT(split_equal_to_row_size);
1030 case 3: 896 tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1031 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 897 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1032 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 898 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 899 tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1034 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 900 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 901 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 902 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1037 NUM_BANKS(ADDR_SURF_4_BANK) | 903 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1038 TILE_SPLIT(split_equal_to_row_size)); 904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1039 break; 905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1040 case 4: 906 NUM_BANKS(ADDR_SURF_2_BANK);
1041 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 907 tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1042 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 908 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1043 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); 909 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1044 break; 910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1045 case 5: 911 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1046 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1047 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1048 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 914 NUM_BANKS(ADDR_SURF_2_BANK);
1049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 915 tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1050 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 916 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 917 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1053 NUM_BANKS(ADDR_SURF_2_BANK)); 919 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1054 break; 920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1055 case 6: 921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1056 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 922 NUM_BANKS(ADDR_SURF_2_BANK);
1057 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 923 tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1058 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 924 tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 925 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1060 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 926 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
1061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 927 tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 928 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1063 NUM_BANKS(ADDR_SURF_2_BANK)); 929 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1064 break; 930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1065 case 7: 931 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1066 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1067 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1068 PIPE_CONFIG(ADDR_SURF_P4_8x16) | 934 NUM_BANKS(ADDR_SURF_16_BANK);
1069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 935 tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1070 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 936 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1071 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 937 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1072 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 938 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1073 NUM_BANKS(ADDR_SURF_2_BANK)); 939 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1074 break; 940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1075 case 8: 941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1076 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); 942 NUM_BANKS(ADDR_SURF_16_BANK);
1077 break; 943 tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1078 case 9: 944 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1079 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 945 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1080 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1081 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); 947 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 break; 948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 case 10: 949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 950 NUM_BANKS(ADDR_SURF_16_BANK);
1085 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 951 tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1086 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 952 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1087 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 953 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
1088 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 954 tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 955 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 956 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1091 NUM_BANKS(ADDR_SURF_16_BANK)); 957 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1092 break; 958 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 case 11: 959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1094 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1095 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 961 NUM_BANKS(ADDR_SURF_16_BANK);
1096 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 962 tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1097 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 963 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1098 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 964 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 965 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 966 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101 NUM_BANKS(ADDR_SURF_16_BANK)); 967 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1102 break; 968 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1103 case 12: 969 NUM_BANKS(ADDR_SURF_16_BANK);
1104 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 970 tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1105 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 971 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1106 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 972 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1108 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 974 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1111 NUM_BANKS(ADDR_SURF_16_BANK)); 977 NUM_BANKS(ADDR_SURF_16_BANK);
1112 break; 978 tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1113 case 13: 979 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1114 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 980 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1115 ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 981 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1116 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); 982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1117 break; 983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1118 case 14: 984 NUM_BANKS(ADDR_SURF_16_BANK) |
1119 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 985 TILE_SPLIT(split_equal_to_row_size);
1120 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 986 tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1121 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 987 ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 988 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16);
1123 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 989 tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 990 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 991 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1126 NUM_BANKS(ADDR_SURF_16_BANK)); 992 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1127 break; 993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1128 case 15: 994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1129 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 995 NUM_BANKS(ADDR_SURF_16_BANK) |
1130 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 996 TILE_SPLIT(split_equal_to_row_size);
1131 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 997 tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1132 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 998 ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1133 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 999 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 1000 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136 NUM_BANKS(ADDR_SURF_16_BANK)); 1002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1137 break; 1003 NUM_BANKS(ADDR_SURF_16_BANK) |
1138 case 16: 1004 TILE_SPLIT(split_equal_to_row_size);
1139 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1005 tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1140 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1006 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1141 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1007 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1143 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1009 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1146 NUM_BANKS(ADDR_SURF_16_BANK)); 1012 NUM_BANKS(ADDR_SURF_4_BANK);
1147 break; 1013 tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1148 case 17: 1014 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1015 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1150 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1016 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1151 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1017 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1152 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1020 NUM_BANKS(ADDR_SURF_4_BANK);
1155 NUM_BANKS(ADDR_SURF_16_BANK) | 1021 tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1156 TILE_SPLIT(split_equal_to_row_size)); 1022 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1157 break; 1023 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1158 case 18: 1024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1159 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1025 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1160 ARRAY_MODE(ARRAY_1D_TILED_THICK) | 1026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1161 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); 1027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1162 break; 1028 NUM_BANKS(ADDR_SURF_2_BANK);
1163 case 19: 1029 tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1164 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1030 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165 ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 1031 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1166 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1032 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1167 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1033 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1170 NUM_BANKS(ADDR_SURF_16_BANK) | 1036 NUM_BANKS(ADDR_SURF_2_BANK);
1171 TILE_SPLIT(split_equal_to_row_size)); 1037 tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1172 break; 1038 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1173 case 20: 1039 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1174 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1040 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1175 ARRAY_MODE(ARRAY_2D_TILED_THICK) | 1041 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1176 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1042 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1177 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1043 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 1044 NUM_BANKS(ADDR_SURF_2_BANK);
1179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1045 tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1180 NUM_BANKS(ADDR_SURF_16_BANK) | 1046 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181 TILE_SPLIT(split_equal_to_row_size)); 1047 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1182 break; 1048 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1183 case 21: 1049 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1184 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1185 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1186 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1052 NUM_BANKS(ADDR_SURF_2_BANK);
1187 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1053 tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1188 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1054 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1055 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1056 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1191 NUM_BANKS(ADDR_SURF_4_BANK)); 1057 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1192 break; 1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1193 case 22: 1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1194 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1060 NUM_BANKS(ADDR_SURF_2_BANK);
1195 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1061 tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1196 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1062 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1197 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1063 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1198 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1064 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1065 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1201 NUM_BANKS(ADDR_SURF_4_BANK)); 1067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1202 break; 1068 NUM_BANKS(ADDR_SURF_2_BANK);
1203 case 23: 1069 tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1204 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1070 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1205 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1071 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1206 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1072 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1207 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 1073 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1208 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 1075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1076 NUM_BANKS(ADDR_SURF_2_BANK);
1211 NUM_BANKS(ADDR_SURF_2_BANK)); 1077 tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1212 break; 1078 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1213 case 24: 1079 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1215 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 1081 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1216 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1218 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 1084 NUM_BANKS(ADDR_SURF_2_BANK);
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 1085 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 1086 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
1221 NUM_BANKS(ADDR_SURF_2_BANK)); 1087 } else {
1222 break;
1223 case 25:
1224 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1225 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1226 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1228 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1231 NUM_BANKS(ADDR_SURF_2_BANK));
1232 break;
1233 case 26:
1234 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1235 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1236 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1237 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1238 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1241 NUM_BANKS(ADDR_SURF_2_BANK));
1242 break;
1243 case 27:
1244 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1245 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1246 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1248 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1251 NUM_BANKS(ADDR_SURF_2_BANK));
1252 break;
1253 case 28:
1254 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1255 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1256 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1257 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1258 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1261 NUM_BANKS(ADDR_SURF_2_BANK));
1262 break;
1263 case 29:
1264 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1265 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1266 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1268 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1271 NUM_BANKS(ADDR_SURF_2_BANK));
1272 break;
1273 case 30:
1274 gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1275 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1276 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1278 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1281 NUM_BANKS(ADDR_SURF_2_BANK));
1282 break;
1283 default:
1284 continue;
1285 }
1286 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1287 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1288 }
1289 } else{
1290
1291 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 1088 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
1292 } 1089 }
1293
1294} 1090}
1295 1091
1296static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1092static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
@@ -1318,11 +1114,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1318 WREG32(mmGRBM_GFX_INDEX, data); 1114 WREG32(mmGRBM_GFX_INDEX, data);
1319} 1115}
1320 1116
1321static u32 gfx_v6_0_create_bitmask(u32 bit_width)
1322{
1323 return (u32)(((u64)1 << bit_width) - 1);
1324}
1325
1326static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1117static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1327{ 1118{
1328 u32 data, mask; 1119 u32 data, mask;
@@ -1332,8 +1123,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1332 1123
1333 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 1124 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
1334 1125
1335 mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/ 1126 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/
1336 adev->gfx.config.max_sh_per_se); 1127 adev->gfx.config.max_sh_per_se);
1337 1128
1338 return ~data & mask; 1129 return ~data & mask;
1339} 1130}
@@ -1399,11 +1190,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1399 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 1190 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
1400 raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; 1191 raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK;
1401 1192
1402 if (!se_mask[idx]) { 1193 if (!se_mask[idx])
1403 raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; 1194 raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
1404 } else { 1195 else
1405 raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; 1196 raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT;
1406 }
1407 } 1197 }
1408 1198
1409 pkr0_mask &= rb_mask; 1199 pkr0_mask &= rb_mask;
@@ -1411,11 +1201,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1411 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 1201 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
1412 raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; 1202 raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK;
1413 1203
1414 if (!pkr0_mask) { 1204 if (!pkr0_mask)
1415 raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; 1205 raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
1416 } else { 1206 else
1417 raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; 1207 raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT;
1418 }
1419 } 1208 }
1420 1209
1421 if (rb_per_se >= 2) { 1210 if (rb_per_se >= 2) {
@@ -1427,13 +1216,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1427 if (!rb0_mask || !rb1_mask) { 1216 if (!rb0_mask || !rb1_mask) {
1428 raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; 1217 raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK;
1429 1218
1430 if (!rb0_mask) { 1219 if (!rb0_mask)
1431 raster_config_se |= 1220 raster_config_se |=
1432 RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; 1221 RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
1433 } else { 1222 else
1434 raster_config_se |= 1223 raster_config_se |=
1435 RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; 1224 RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT;
1436 }
1437 } 1225 }
1438 1226
1439 if (rb_per_se > 2) { 1227 if (rb_per_se > 2) {
@@ -1444,13 +1232,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
1444 if (!rb0_mask || !rb1_mask) { 1232 if (!rb0_mask || !rb1_mask) {
1445 raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; 1233 raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK;
1446 1234
1447 if (!rb0_mask) { 1235 if (!rb0_mask)
1448 raster_config_se |= 1236 raster_config_se |=
1449 RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; 1237 RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
1450 } else { 1238 else
1451 raster_config_se |= 1239 raster_config_se |=
1452 RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; 1240 RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT;
1453 }
1454 } 1241 }
1455 } 1242 }
1456 } 1243 }
@@ -1479,8 +1266,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
1479 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1266 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1480 gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); 1267 gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
1481 data = gfx_v6_0_get_rb_active_bitmap(adev); 1268 data = gfx_v6_0_get_rb_active_bitmap(adev);
1482 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1269 active_rbs |= data <<
1483 rb_bitmap_width_per_sh); 1270 ((i * adev->gfx.config.max_sh_per_se + j) *
1271 rb_bitmap_width_per_sh);
1484 } 1272 }
1485 } 1273 }
1486 gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1274 gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -1494,13 +1282,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
1494 gfx_v6_0_raster_config(adev, &raster_config); 1282 gfx_v6_0_raster_config(adev, &raster_config);
1495 1283
1496 if (!adev->gfx.config.backend_enable_mask || 1284 if (!adev->gfx.config.backend_enable_mask ||
1497 adev->gfx.config.num_rbs >= num_rb_pipes) { 1285 adev->gfx.config.num_rbs >= num_rb_pipes)
1498 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 1286 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
1499 } else { 1287 else
1500 gfx_v6_0_write_harvested_raster_configs(adev, raster_config, 1288 gfx_v6_0_write_harvested_raster_configs(adev, raster_config,
1501 adev->gfx.config.backend_enable_mask, 1289 adev->gfx.config.backend_enable_mask,
1502 num_rb_pipes); 1290 num_rb_pipes);
1503 }
1504 1291
1505 /* cache the values for userspace */ 1292 /* cache the values for userspace */
1506 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1293 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -1517,11 +1304,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
1517 gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1304 gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1518 mutex_unlock(&adev->grbm_idx_mutex); 1305 mutex_unlock(&adev->grbm_idx_mutex);
1519} 1306}
1520/*
1521static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev)
1522{
1523}
1524*/
1525 1307
1526static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 1308static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
1527 u32 bitmap) 1309 u32 bitmap)
@@ -1544,7 +1326,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev)
1544 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 1326 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
1545 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 1327 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
1546 1328
1547 mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 1329 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
1548 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 1330 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
1549} 1331}
1550 1332
@@ -1688,7 +1470,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
1688 WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); 1470 WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
1689 1471
1690 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1472 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1691 mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1473 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1474 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1692 1475
1693 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1476 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1694 adev->gfx.config.mem_max_burst_length_bytes = 256; 1477 adev->gfx.config.mem_max_burst_length_bytes = 256;
@@ -3719,6 +3502,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
3719 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 3502 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
3720 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 3503 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
3721 unsigned disable_masks[4 * 2]; 3504 unsigned disable_masks[4 * 2];
3505 u32 ao_cu_num;
3506
3507 if (adev->flags & AMD_IS_APU)
3508 ao_cu_num = 2;
3509 else
3510 ao_cu_num = adev->gfx.config.max_cu_per_sh;
3722 3511
3723 memset(cu_info, 0, sizeof(*cu_info)); 3512 memset(cu_info, 0, sizeof(*cu_info));
3724 3513
@@ -3737,16 +3526,18 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
3737 bitmap = gfx_v6_0_get_cu_enabled(adev); 3526 bitmap = gfx_v6_0_get_cu_enabled(adev);
3738 cu_info->bitmap[i][j] = bitmap; 3527 cu_info->bitmap[i][j] = bitmap;
3739 3528
3740 for (k = 0; k < 16; k++) { 3529 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
3741 if (bitmap & mask) { 3530 if (bitmap & mask) {
3742 if (counter < 2) 3531 if (counter < ao_cu_num)
3743 ao_bitmap |= mask; 3532 ao_bitmap |= mask;
3744 counter ++; 3533 counter ++;
3745 } 3534 }
3746 mask <<= 1; 3535 mask <<= 1;
3747 } 3536 }
3748 active_cu_number += counter; 3537 active_cu_number += counter;
3749 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 3538 if (i < 2 && j < 2)
3539 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
3540 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
3750 } 3541 }
3751 } 3542 }
3752 3543
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index ee2f2139e2eb..37b45e4403d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -21,12 +21,13 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_ih.h" 26#include "amdgpu_ih.h"
27#include "amdgpu_gfx.h" 27#include "amdgpu_gfx.h"
28#include "cikd.h" 28#include "cikd.h"
29#include "cik.h" 29#include "cik.h"
30#include "cik_structs.h"
30#include "atom.h" 31#include "atom.h"
31#include "amdgpu_ucode.h" 32#include "amdgpu_ucode.h"
32#include "clearstate_ci.h" 33#include "clearstate_ci.h"
@@ -48,7 +49,7 @@
48#include "oss/oss_2_0_sh_mask.h" 49#include "oss/oss_2_0_sh_mask.h"
49 50
50#define GFX7_NUM_GFX_RINGS 1 51#define GFX7_NUM_GFX_RINGS 1
51#define GFX7_NUM_COMPUTE_RINGS 8 52#define GFX7_MEC_HPD_SIZE 2048
52 53
53static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); 54static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
54static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); 55static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
1607} 1608}
1608 1609
1609/** 1610/**
1610 * gfx_v7_0_create_bitmask - create a bitmask
1611 *
1612 * @bit_width: length of the mask
1613 *
1614 * create a variable length bit mask (CIK).
1615 * Returns the bitmask.
1616 */
1617static u32 gfx_v7_0_create_bitmask(u32 bit_width)
1618{
1619 return (u32)((1ULL << bit_width) - 1);
1620}
1621
1622/**
1623 * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs 1611 * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
1624 * 1612 *
1625 * @adev: amdgpu_device pointer 1613 * @adev: amdgpu_device pointer
@@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1637 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1625 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1638 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1626 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1639 1627
1640 mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / 1628 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1641 adev->gfx.config.max_sh_per_se); 1629 adev->gfx.config.max_sh_per_se);
1642 1630
1643 return (~data) & mask; 1631 return (~data) & mask;
1644} 1632}
@@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
1837/** 1825/**
1838 * gmc_v7_0_init_compute_vmid - gart enable 1826 * gmc_v7_0_init_compute_vmid - gart enable
1839 * 1827 *
1840 * @rdev: amdgpu_device pointer 1828 * @adev: amdgpu_device pointer
1841 * 1829 *
1842 * Initialize compute vmid sh_mem registers 1830 * Initialize compute vmid sh_mem registers
1843 * 1831 *
@@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
2821 } 2809 }
2822} 2810}
2823 2811
2824#define MEC_HPD_SIZE 2048
2825
2826static int gfx_v7_0_mec_init(struct amdgpu_device *adev) 2812static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2827{ 2813{
2828 int r; 2814 int r;
2829 u32 *hpd; 2815 u32 *hpd;
2816 size_t mec_hpd_size;
2830 2817
2831 /* 2818 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2832 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total 2819
2833 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total 2820 /* take ownership of the relevant compute queues */
2834 * Nonetheless, we assign only 1 pipe because all other pipes will 2821 amdgpu_gfx_compute_queue_acquire(adev);
2835 * be handled by KFD
2836 */
2837 adev->gfx.mec.num_mec = 1;
2838 adev->gfx.mec.num_pipe = 1;
2839 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
2840 2822
2823 /* allocate space for ALL pipes (even the ones we don't own) */
2824 mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
2825 * GFX7_MEC_HPD_SIZE * 2;
2841 if (adev->gfx.mec.hpd_eop_obj == NULL) { 2826 if (adev->gfx.mec.hpd_eop_obj == NULL) {
2842 r = amdgpu_bo_create(adev, 2827 r = amdgpu_bo_create(adev,
2843 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 2828 mec_hpd_size,
2844 PAGE_SIZE, true, 2829 PAGE_SIZE, true,
2845 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 2830 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
2846 &adev->gfx.mec.hpd_eop_obj); 2831 &adev->gfx.mec.hpd_eop_obj);
@@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2870 } 2855 }
2871 2856
2872 /* clear memory. Not sure if this is required or not */ 2857 /* clear memory. Not sure if this is required or not */
2873 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 2858 memset(hpd, 0, mec_hpd_size);
2874 2859
2875 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2860 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2876 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2861 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -2917,275 +2902,296 @@ struct hqd_registers
2917 u32 cp_mqd_control; 2902 u32 cp_mqd_control;
2918}; 2903};
2919 2904
2920struct bonaire_mqd 2905static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
2906 int mec, int pipe)
2921{ 2907{
2922 u32 header;
2923 u32 dispatch_initiator;
2924 u32 dimensions[3];
2925 u32 start_idx[3];
2926 u32 num_threads[3];
2927 u32 pipeline_stat_enable;
2928 u32 perf_counter_enable;
2929 u32 pgm[2];
2930 u32 tba[2];
2931 u32 tma[2];
2932 u32 pgm_rsrc[2];
2933 u32 vmid;
2934 u32 resource_limits;
2935 u32 static_thread_mgmt01[2];
2936 u32 tmp_ring_size;
2937 u32 static_thread_mgmt23[2];
2938 u32 restart[3];
2939 u32 thread_trace_enable;
2940 u32 reserved1;
2941 u32 user_data[16];
2942 u32 vgtcs_invoke_count[2];
2943 struct hqd_registers queue_state;
2944 u32 dequeue_cntr;
2945 u32 interrupt_queue[64];
2946};
2947
2948/**
2949 * gfx_v7_0_cp_compute_resume - setup the compute queue registers
2950 *
2951 * @adev: amdgpu_device pointer
2952 *
2953 * Program the compute queues and test them to make sure they
2954 * are working.
2955 * Returns 0 for success, error for failure.
2956 */
2957static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
2958{
2959 int r, i, j;
2960 u32 tmp;
2961 bool use_doorbell = true;
2962 u64 hqd_gpu_addr;
2963 u64 mqd_gpu_addr;
2964 u64 eop_gpu_addr; 2908 u64 eop_gpu_addr;
2965 u64 wb_gpu_addr; 2909 u32 tmp;
2966 u32 *buf; 2910 size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe)
2967 struct bonaire_mqd *mqd; 2911 * GFX7_MEC_HPD_SIZE * 2;
2968 struct amdgpu_ring *ring;
2969
2970 /* fix up chicken bits */
2971 tmp = RREG32(mmCP_CPF_DEBUG);
2972 tmp |= (1 << 23);
2973 WREG32(mmCP_CPF_DEBUG, tmp);
2974 2912
2975 /* init the pipes */
2976 mutex_lock(&adev->srbm_mutex); 2913 mutex_lock(&adev->srbm_mutex);
2977 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 2914 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
2978 int me = (i < 4) ? 1 : 2;
2979 int pipe = (i < 4) ? i : (i - 4);
2980 2915
2981 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); 2916 cik_srbm_select(adev, mec + 1, pipe, 0, 0);
2982 2917
2983 cik_srbm_select(adev, me, pipe, 0, 0); 2918 /* write the EOP addr */
2919 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2920 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2984 2921
2985 /* write the EOP addr */ 2922 /* set the VMID assigned */
2986 WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); 2923 WREG32(mmCP_HPD_EOP_VMID, 0);
2987 WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2988 2924
2989 /* set the VMID assigned */ 2925 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2990 WREG32(mmCP_HPD_EOP_VMID, 0); 2926 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2927 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2928 tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
2929 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2991 2930
2992 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2993 tmp = RREG32(mmCP_HPD_EOP_CONTROL);
2994 tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
2995 tmp |= order_base_2(MEC_HPD_SIZE / 8);
2996 WREG32(mmCP_HPD_EOP_CONTROL, tmp);
2997 }
2998 cik_srbm_select(adev, 0, 0, 0, 0); 2931 cik_srbm_select(adev, 0, 0, 0, 0);
2999 mutex_unlock(&adev->srbm_mutex); 2932 mutex_unlock(&adev->srbm_mutex);
2933}
3000 2934
3001 /* init the queues. Just two for now. */ 2935static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
3002 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2936{
3003 ring = &adev->gfx.compute_ring[i]; 2937 int i;
3004 2938
3005 if (ring->mqd_obj == NULL) { 2939 /* disable the queue if it's active */
3006 r = amdgpu_bo_create(adev, 2940 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3007 sizeof(struct bonaire_mqd), 2941 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3008 PAGE_SIZE, true, 2942 for (i = 0; i < adev->usec_timeout; i++) {
3009 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 2943 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3010 &ring->mqd_obj); 2944 break;
3011 if (r) { 2945 udelay(1);
3012 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3013 return r;
3014 }
3015 } 2946 }
3016 2947
3017 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2948 if (i == adev->usec_timeout)
3018 if (unlikely(r != 0)) { 2949 return -ETIMEDOUT;
3019 gfx_v7_0_cp_compute_fini(adev);
3020 return r;
3021 }
3022 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3023 &mqd_gpu_addr);
3024 if (r) {
3025 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3026 gfx_v7_0_cp_compute_fini(adev);
3027 return r;
3028 }
3029 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3030 if (r) {
3031 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3032 gfx_v7_0_cp_compute_fini(adev);
3033 return r;
3034 }
3035 2950
3036 /* init the mqd struct */ 2951 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
3037 memset(buf, 0, sizeof(struct bonaire_mqd)); 2952 WREG32(mmCP_HQD_PQ_RPTR, 0);
2953 WREG32(mmCP_HQD_PQ_WPTR, 0);
2954 }
3038 2955
3039 mqd = (struct bonaire_mqd *)buf; 2956 return 0;
3040 mqd->header = 0xC0310800; 2957}
3041 mqd->static_thread_mgmt01[0] = 0xffffffff;
3042 mqd->static_thread_mgmt01[1] = 0xffffffff;
3043 mqd->static_thread_mgmt23[0] = 0xffffffff;
3044 mqd->static_thread_mgmt23[1] = 0xffffffff;
3045 2958
3046 mutex_lock(&adev->srbm_mutex); 2959static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
3047 cik_srbm_select(adev, ring->me, 2960 struct cik_mqd *mqd,
3048 ring->pipe, 2961 uint64_t mqd_gpu_addr,
3049 ring->queue, 0); 2962 struct amdgpu_ring *ring)
2963{
2964 u64 hqd_gpu_addr;
2965 u64 wb_gpu_addr;
3050 2966
3051 /* disable wptr polling */ 2967 /* init the mqd struct */
3052 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 2968 memset(mqd, 0, sizeof(struct cik_mqd));
3053 tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
3054 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3055 2969
3056 /* enable doorbell? */ 2970 mqd->header = 0xC0310800;
3057 mqd->queue_state.cp_hqd_pq_doorbell_control = 2971 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3058 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 2972 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3059 if (use_doorbell) 2973 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3060 mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; 2974 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3061 else
3062 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3063 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3064 mqd->queue_state.cp_hqd_pq_doorbell_control);
3065
3066 /* disable the queue if it's active */
3067 mqd->queue_state.cp_hqd_dequeue_request = 0;
3068 mqd->queue_state.cp_hqd_pq_rptr = 0;
3069 mqd->queue_state.cp_hqd_pq_wptr= 0;
3070 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3071 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3072 for (j = 0; j < adev->usec_timeout; j++) {
3073 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3074 break;
3075 udelay(1);
3076 }
3077 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3078 WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3079 WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3080 }
3081 2975
3082 /* set the pointer to the MQD */ 2976 /* enable doorbell? */
3083 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; 2977 mqd->cp_hqd_pq_doorbell_control =
3084 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 2978 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3085 WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); 2979 if (ring->use_doorbell)
3086 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); 2980 mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3087 /* set MQD vmid to 0 */ 2981 else
3088 mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); 2982 mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3089 mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; 2983
3090 WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); 2984 /* set the pointer to the MQD */
3091 2985 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3092 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2986 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3093 hqd_gpu_addr = ring->gpu_addr >> 8; 2987
3094 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; 2988 /* set MQD vmid to 0 */
3095 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2989 mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
3096 WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); 2990 mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
3097 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); 2991
3098 2992 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3099 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2993 hqd_gpu_addr = ring->gpu_addr >> 8;
3100 mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); 2994 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3101 mqd->queue_state.cp_hqd_pq_control &= 2995 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3102 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | 2996
3103 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); 2997 /* set up the HQD, this is similar to CP_RB0_CNTL */
3104 2998 mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
3105 mqd->queue_state.cp_hqd_pq_control |= 2999 mqd->cp_hqd_pq_control &=
3106 order_base_2(ring->ring_size / 8); 3000 ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
3107 mqd->queue_state.cp_hqd_pq_control |= 3001 CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
3108 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); 3002
3003 mqd->cp_hqd_pq_control |=
3004 order_base_2(ring->ring_size / 8);
3005 mqd->cp_hqd_pq_control |=
3006 (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
3109#ifdef __BIG_ENDIAN 3007#ifdef __BIG_ENDIAN
3110 mqd->queue_state.cp_hqd_pq_control |= 3008 mqd->cp_hqd_pq_control |=
3111 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; 3009 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
3112#endif 3010#endif
3113 mqd->queue_state.cp_hqd_pq_control &= 3011 mqd->cp_hqd_pq_control &=
3114 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | 3012 ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
3115 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | 3013 CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
3116 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); 3014 CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
3117 mqd->queue_state.cp_hqd_pq_control |= 3015 mqd->cp_hqd_pq_control |=
3118 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | 3016 CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
3119 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ 3017 CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
3120 WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3121
3122 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3123 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3124 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3125 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3126 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3127 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3128 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3129
3130 /* set the wb address wether it's enabled or not */
3131 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3132 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3133 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3134 upper_32_bits(wb_gpu_addr) & 0xffff;
3135 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3136 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3137 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3138 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3139
3140 /* enable the doorbell if requested */
3141 if (use_doorbell) {
3142 mqd->queue_state.cp_hqd_pq_doorbell_control =
3143 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3144 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3145 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3146 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3147 (ring->doorbell_index <<
3148 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3149 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3150 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3151 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3152 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3153 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3154 3018
3155 } else { 3019 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3156 mqd->queue_state.cp_hqd_pq_doorbell_control = 0; 3020 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3021 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3022 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3023
3024 /* set the wb address wether it's enabled or not */
3025 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3026 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3027 mqd->cp_hqd_pq_rptr_report_addr_hi =
3028 upper_32_bits(wb_gpu_addr) & 0xffff;
3029
3030 /* enable the doorbell if requested */
3031 if (ring->use_doorbell) {
3032 mqd->cp_hqd_pq_doorbell_control =
3033 RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3034 mqd->cp_hqd_pq_doorbell_control &=
3035 ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
3036 mqd->cp_hqd_pq_doorbell_control |=
3037 (ring->doorbell_index <<
3038 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
3039 mqd->cp_hqd_pq_doorbell_control |=
3040 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
3041 mqd->cp_hqd_pq_doorbell_control &=
3042 ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
3043 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
3044
3045 } else {
3046 mqd->cp_hqd_pq_doorbell_control = 0;
3047 }
3048
3049 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3050 ring->wptr = 0;
3051 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
3052 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3053
3054 /* set the vmid for the queue */
3055 mqd->cp_hqd_vmid = 0;
3056
3057 /* defaults */
3058 mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
3059 mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
3060 mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
3061 mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
3062 mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
3063 mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
3064 mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
3065 mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
3066 mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
3067 mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
3068 mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
3069 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3070 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
3071 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
3072 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
3073 mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
3074
3075 /* activate the queue */
3076 mqd->cp_hqd_active = 1;
3077}
3078
3079int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
3080{
3081 uint32_t tmp;
3082 uint32_t mqd_reg;
3083 uint32_t *mqd_data;
3084
3085 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
3086 mqd_data = &mqd->cp_mqd_base_addr_lo;
3087
3088 /* disable wptr polling */
3089 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3090 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3091 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3092
3093 /* program all HQD registers */
3094 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
3095 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3096
3097 /* activate the HQD */
3098 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
3099 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
3100
3101 return 0;
3102}
3103
3104static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
3105{
3106 int r;
3107 u64 mqd_gpu_addr;
3108 struct cik_mqd *mqd;
3109 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
3110
3111 if (ring->mqd_obj == NULL) {
3112 r = amdgpu_bo_create(adev,
3113 sizeof(struct cik_mqd),
3114 PAGE_SIZE, true,
3115 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
3116 &ring->mqd_obj);
3117 if (r) {
3118 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3119 return r;
3157 } 3120 }
3158 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 3121 }
3159 mqd->queue_state.cp_hqd_pq_doorbell_control); 3122
3123 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3124 if (unlikely(r != 0))
3125 goto out;
3160 3126
3161 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3127 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3162 ring->wptr = 0; 3128 &mqd_gpu_addr);
3163 mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); 3129 if (r) {
3164 WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); 3130 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3165 mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 3131 goto out_unreserve;
3132 }
3133 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
3134 if (r) {
3135 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3136 goto out_unreserve;
3137 }
3166 3138
3167 /* set the vmid for the queue */ 3139 mutex_lock(&adev->srbm_mutex);
3168 mqd->queue_state.cp_hqd_vmid = 0; 3140 cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3169 WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3170 3141
3171 /* activate the queue */ 3142 gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
3172 mqd->queue_state.cp_hqd_active = 1; 3143 gfx_v7_0_mqd_deactivate(adev);
3173 WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); 3144 gfx_v7_0_mqd_commit(adev, mqd);
3174 3145
3175 cik_srbm_select(adev, 0, 0, 0, 0); 3146 cik_srbm_select(adev, 0, 0, 0, 0);
3176 mutex_unlock(&adev->srbm_mutex); 3147 mutex_unlock(&adev->srbm_mutex);
3177 3148
3178 amdgpu_bo_kunmap(ring->mqd_obj); 3149 amdgpu_bo_kunmap(ring->mqd_obj);
3179 amdgpu_bo_unreserve(ring->mqd_obj); 3150out_unreserve:
3151 amdgpu_bo_unreserve(ring->mqd_obj);
3152out:
3153 return 0;
3154}
3180 3155
3181 ring->ready = true; 3156/**
3157 * gfx_v7_0_cp_compute_resume - setup the compute queue registers
3158 *
3159 * @adev: amdgpu_device pointer
3160 *
3161 * Program the compute queues and test them to make sure they
3162 * are working.
3163 * Returns 0 for success, error for failure.
3164 */
3165static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3166{
3167 int r, i, j;
3168 u32 tmp;
3169 struct amdgpu_ring *ring;
3170
3171 /* fix up chicken bits */
3172 tmp = RREG32(mmCP_CPF_DEBUG);
3173 tmp |= (1 << 23);
3174 WREG32(mmCP_CPF_DEBUG, tmp);
3175
3176 /* init all pipes (even the ones we don't own) */
3177 for (i = 0; i < adev->gfx.mec.num_mec; i++)
3178 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
3179 gfx_v7_0_compute_pipe_init(adev, i, j);
3180
3181 /* init the queues */
3182 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3183 r = gfx_v7_0_compute_queue_init(adev, i);
3184 if (r) {
3185 gfx_v7_0_cp_compute_fini(adev);
3186 return r;
3187 }
3182 } 3188 }
3183 3189
3184 gfx_v7_0_cp_compute_enable(adev, true); 3190 gfx_v7_0_cp_compute_enable(adev, true);
3185 3191
3186 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3192 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3187 ring = &adev->gfx.compute_ring[i]; 3193 ring = &adev->gfx.compute_ring[i];
3188 3194 ring->ready = true;
3189 r = amdgpu_ring_test_ring(ring); 3195 r = amdgpu_ring_test_ring(ring);
3190 if (r) 3196 if (r)
3191 ring->ready = false; 3197 ring->ready = false;
@@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3797 gfx_v7_0_update_rlc(adev, tmp); 3803 gfx_v7_0_update_rlc(adev, tmp);
3798 3804
3799 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 3805 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3806 if (orig != data)
3807 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3808
3800 } else { 3809 } else {
3801 gfx_v7_0_enable_gui_idle_interrupt(adev, false); 3810 gfx_v7_0_enable_gui_idle_interrupt(adev, false);
3802 3811
@@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
3806 RREG32(mmCB_CGTT_SCLK_CTRL); 3815 RREG32(mmCB_CGTT_SCLK_CTRL);
3807 3816
3808 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 3817 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3809 } 3818 if (orig != data)
3810 3819 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3811 if (orig != data)
3812 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
3813 3820
3821 gfx_v7_0_enable_gui_idle_interrupt(adev, true);
3822 }
3814} 3823}
3815 3824
3816static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) 3825static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
@@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
4089 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 4098 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4090 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 4099 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4091 4100
4092 mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 4101 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4093 4102
4094 return (~data) & mask; 4103 return (~data) & mask;
4095} 4104}
@@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle)
4470 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4479 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4471 4480
4472 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; 4481 adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
4473 adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; 4482 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4474 adev->gfx.funcs = &gfx_v7_0_gfx_funcs; 4483 adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
4475 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; 4484 adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
4476 gfx_v7_0_set_ring_funcs(adev); 4485 gfx_v7_0_set_ring_funcs(adev);
@@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4662 adev->gfx.config.gb_addr_config = gb_addr_config; 4671 adev->gfx.config.gb_addr_config = gb_addr_config;
4663} 4672}
4664 4673
4674static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
4675 int mec, int pipe, int queue)
4676{
4677 int r;
4678 unsigned irq_type;
4679 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
4680
4681 /* mec0 is me1 */
4682 ring->me = mec + 1;
4683 ring->pipe = pipe;
4684 ring->queue = queue;
4685
4686 ring->ring_obj = NULL;
4687 ring->use_doorbell = true;
4688 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
4689 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4690
4691 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4692 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
4693 + ring->pipe;
4694
4695 /* type-2 packets are deprecated on MEC, use type-3 instead */
4696 r = amdgpu_ring_init(adev, ring, 1024,
4697 &adev->gfx.eop_irq, irq_type);
4698 if (r)
4699 return r;
4700
4701
4702 return 0;
4703}
4704
4665static int gfx_v7_0_sw_init(void *handle) 4705static int gfx_v7_0_sw_init(void *handle)
4666{ 4706{
4667 struct amdgpu_ring *ring; 4707 struct amdgpu_ring *ring;
4668 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4708 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4669 int i, r; 4709 int i, j, k, r, ring_id;
4710
4711 switch (adev->asic_type) {
4712 case CHIP_KAVERI:
4713 adev->gfx.mec.num_mec = 2;
4714 break;
4715 case CHIP_BONAIRE:
4716 case CHIP_HAWAII:
4717 case CHIP_KABINI:
4718 case CHIP_MULLINS:
4719 default:
4720 adev->gfx.mec.num_mec = 1;
4721 break;
4722 }
4723 adev->gfx.mec.num_pipe_per_mec = 4;
4724 adev->gfx.mec.num_queue_per_pipe = 8;
4670 4725
4671 /* EOP Event */ 4726 /* EOP Event */
4672 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 4727 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle)
4716 return r; 4771 return r;
4717 } 4772 }
4718 4773
4719 /* set up the compute queues */ 4774 /* set up the compute queues - allocate horizontally across pipes */
4720 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4775 ring_id = 0;
4721 unsigned irq_type; 4776 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4722 4777 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4723 /* max 32 queues per MEC */ 4778 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4724 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 4779 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
4725 DRM_ERROR("Too many (%d) compute rings!\n", i); 4780 continue;
4726 break; 4781
4782 r = gfx_v7_0_compute_ring_init(adev,
4783 ring_id,
4784 i, k, j);
4785 if (r)
4786 return r;
4787
4788 ring_id++;
4789 }
4727 } 4790 }
4728 ring = &adev->gfx.compute_ring[i];
4729 ring->ring_obj = NULL;
4730 ring->use_doorbell = true;
4731 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
4732 ring->me = 1; /* first MEC */
4733 ring->pipe = i / 8;
4734 ring->queue = i % 8;
4735 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
4736 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
4737 /* type-2 packets are deprecated on MEC, use type-3 instead */
4738 r = amdgpu_ring_init(adev, ring, 1024,
4739 &adev->gfx.eop_irq, irq_type);
4740 if (r)
4741 return r;
4742 } 4791 }
4743 4792
4744 /* reserve GDS, GWS and OA resource for gfx */ 4793 /* reserve GDS, GWS and OA resource for gfx */
@@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4969 u32 mec_int_cntl, mec_int_cntl_reg; 5018 u32 mec_int_cntl, mec_int_cntl_reg;
4970 5019
4971 /* 5020 /*
4972 * amdgpu controls only pipe 0 of MEC1. That's why this function only 5021 * amdgpu controls only the first MEC. That's why this function only
4973 * handles the setting of interrupts for this specific pipe. All other 5022 * handles the setting of interrupts for this specific MEC. All other
4974 * pipes' interrupts are set by amdkfd. 5023 * pipes' interrupts are set by amdkfd.
4975 */ 5024 */
4976 5025
@@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4979 case 0: 5028 case 0:
4980 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 5029 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4981 break; 5030 break;
5031 case 1:
5032 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
5033 break;
5034 case 2:
5035 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
5036 break;
5037 case 3:
5038 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
5039 break;
4982 default: 5040 default:
4983 DRM_DEBUG("invalid pipe %d\n", pipe); 5041 DRM_DEBUG("invalid pipe %d\n", pipe);
4984 return; 5042 return;
@@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5336 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5394 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5337 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 5395 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
5338 unsigned disable_masks[4 * 2]; 5396 unsigned disable_masks[4 * 2];
5397 u32 ao_cu_num;
5398
5399 if (adev->flags & AMD_IS_APU)
5400 ao_cu_num = 2;
5401 else
5402 ao_cu_num = adev->gfx.config.max_cu_per_sh;
5339 5403
5340 memset(cu_info, 0, sizeof(*cu_info)); 5404 memset(cu_info, 0, sizeof(*cu_info));
5341 5405
@@ -5354,16 +5418,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
5354 bitmap = gfx_v7_0_get_cu_active_bitmap(adev); 5418 bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
5355 cu_info->bitmap[i][j] = bitmap; 5419 cu_info->bitmap[i][j] = bitmap;
5356 5420
5357 for (k = 0; k < 16; k ++) { 5421 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5358 if (bitmap & mask) { 5422 if (bitmap & mask) {
5359 if (counter < 2) 5423 if (counter < ao_cu_num)
5360 ao_bitmap |= mask; 5424 ao_bitmap |= mask;
5361 counter ++; 5425 counter ++;
5362 } 5426 }
5363 mask <<= 1; 5427 mask <<= 1;
5364 } 5428 }
5365 active_cu_number += counter; 5429 active_cu_number += counter;
5366 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5430 if (i < 2 && j < 2)
5431 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5432 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5367 } 5433 }
5368 } 5434 }
5369 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5435 gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index 2f5164cc0e53..6fb9c1524691 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
29extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; 29extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
30extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; 30extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
31 31
32struct amdgpu_device;
33struct cik_mqd;
34
35int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
36
32#endif 37#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 758d636a6f52..aa5a50f5eac8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_gfx.h" 26#include "amdgpu_gfx.h"
27#include "vi.h" 27#include "vi.h"
@@ -40,7 +40,6 @@
40 40
41#include "bif/bif_5_0_d.h" 41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h" 42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h" 43#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h" 44#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h" 45#include "gca/gfx_8_0_sh_mask.h"
@@ -52,7 +51,7 @@
52#include "smu/smu_7_1_3_d.h" 51#include "smu/smu_7_1_3_d.h"
53 52
54#define GFX8_NUM_GFX_RINGS 1 53#define GFX8_NUM_GFX_RINGS 1
55#define GFX8_NUM_COMPUTE_RINGS 8 54#define GFX8_MEC_HPD_SIZE 2048
56 55
57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -657,10 +656,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 656static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 657static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 658static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); 659static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
661static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); 660static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
662static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev);
663static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev);
664 661
665static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 662static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
666{ 663{
@@ -859,7 +856,8 @@ err1:
859} 856}
860 857
861 858
862static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 859static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
860{
863 release_firmware(adev->gfx.pfp_fw); 861 release_firmware(adev->gfx.pfp_fw);
864 adev->gfx.pfp_fw = NULL; 862 adev->gfx.pfp_fw = NULL;
865 release_firmware(adev->gfx.me_fw); 863 release_firmware(adev->gfx.me_fw);
@@ -941,12 +939,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 939 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 940 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943 941
944 /* chain ib ucode isn't formal released, just disable it by far
945 * TODO: when ucod ready we should use ucode version to judge if
946 * chain-ib support or not.
947 */
948 adev->virt.chained_ib_support = false;
949
950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
951 943
952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
@@ -960,6 +952,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 952 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 953 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
962 954
955 /*
956 * Support for MCBP/Virtualization in combination with chained IBs is
957 * formal released on feature version #46
958 */
959 if (adev->gfx.ce_feature_version >= 46 &&
960 adev->gfx.pfp_feature_version >= 46) {
961 adev->virt.chained_ib_support = true;
962 DRM_INFO("Chained IB support enabled!\n");
963 } else
964 adev->virt.chained_ib_support = false;
965
963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 966 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 967 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
965 if (err) 968 if (err)
@@ -1373,64 +1376,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1373 } 1376 }
1374} 1377}
1375 1378
1376static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1377 struct amdgpu_ring *ring,
1378 struct amdgpu_irq_src *irq)
1379{
1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1381 int r = 0;
1382
1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1384 if (r)
1385 return r;
1386
1387 ring->adev = NULL;
1388 ring->ring_obj = NULL;
1389 ring->use_doorbell = true;
1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1391 if (adev->gfx.mec2_fw) {
1392 ring->me = 2;
1393 ring->pipe = 0;
1394 } else {
1395 ring->me = 1;
1396 ring->pipe = 1;
1397 }
1398
1399 ring->queue = 0;
1400 ring->eop_gpu_addr = kiq->eop_gpu_addr;
1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1402 r = amdgpu_ring_init(adev, ring, 1024,
1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1404 if (r)
1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1406
1407 return r;
1408}
1409static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410 struct amdgpu_irq_src *irq)
1411{
1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1413 amdgpu_ring_fini(ring);
1414}
1415
1416#define MEC_HPD_SIZE 2048
1417
1418static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1379static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1419{ 1380{
1420 int r; 1381 int r;
1421 u32 *hpd; 1382 u32 *hpd;
1383 size_t mec_hpd_size;
1422 1384
1423 /* 1385 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1424 * we assign only 1 pipe because all other pipes will 1386
1425 * be handled by KFD 1387 /* take ownership of the relevant compute queues */
1426 */ 1388 amdgpu_gfx_compute_queue_acquire(adev);
1427 adev->gfx.mec.num_mec = 1; 1389
1428 adev->gfx.mec.num_pipe = 1; 1390 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1430 1391
1431 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1392 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1432 r = amdgpu_bo_create(adev, 1393 r = amdgpu_bo_create(adev,
1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 1394 mec_hpd_size,
1434 PAGE_SIZE, true, 1395 PAGE_SIZE, true,
1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1396 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1436 &adev->gfx.mec.hpd_eop_obj); 1397 &adev->gfx.mec.hpd_eop_obj);
@@ -1459,7 +1420,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1459 return r; 1420 return r;
1460 } 1421 }
1461 1422
1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); 1423 memset(hpd, 0, mec_hpd_size);
1463 1424
1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1425 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1426 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1467,38 +1428,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1467 return 0; 1428 return 0;
1468} 1429}
1469 1430
1470static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1471{
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1475}
1476
1477static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1478{
1479 int r;
1480 u32 *hpd;
1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1482
1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1485 &kiq->eop_gpu_addr, (void **)&hpd);
1486 if (r) {
1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1488 return r;
1489 }
1490
1491 memset(hpd, 0, MEC_HPD_SIZE);
1492
1493 r = amdgpu_bo_reserve(kiq->eop_obj, true);
1494 if (unlikely(r != 0))
1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
1496 amdgpu_bo_kunmap(kiq->eop_obj);
1497 amdgpu_bo_unreserve(kiq->eop_obj);
1498
1499 return 0;
1500}
1501
1502static const u32 vgpr_init_compute_shader[] = 1431static const u32 vgpr_init_compute_shader[] =
1503{ 1432{
1504 0x7e000209, 0x7e020208, 1433 0x7e000209, 0x7e020208,
@@ -1907,46 +1836,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1907 adev->gfx.config.max_tile_pipes = 2; 1836 adev->gfx.config.max_tile_pipes = 2;
1908 adev->gfx.config.max_sh_per_se = 1; 1837 adev->gfx.config.max_sh_per_se = 1;
1909 adev->gfx.config.max_backends_per_se = 2; 1838 adev->gfx.config.max_backends_per_se = 2;
1910 1839 adev->gfx.config.max_cu_per_sh = 8;
1911 switch (adev->pdev->revision) {
1912 case 0xc4:
1913 case 0x84:
1914 case 0xc8:
1915 case 0xcc:
1916 case 0xe1:
1917 case 0xe3:
1918 /* B10 */
1919 adev->gfx.config.max_cu_per_sh = 8;
1920 break;
1921 case 0xc5:
1922 case 0x81:
1923 case 0x85:
1924 case 0xc9:
1925 case 0xcd:
1926 case 0xe2:
1927 case 0xe4:
1928 /* B8 */
1929 adev->gfx.config.max_cu_per_sh = 6;
1930 break;
1931 case 0xc6:
1932 case 0xca:
1933 case 0xce:
1934 case 0x88:
1935 case 0xe6:
1936 /* B6 */
1937 adev->gfx.config.max_cu_per_sh = 6;
1938 break;
1939 case 0xc7:
1940 case 0x87:
1941 case 0xcb:
1942 case 0xe5:
1943 case 0x89:
1944 default:
1945 /* B4 */
1946 adev->gfx.config.max_cu_per_sh = 4;
1947 break;
1948 }
1949
1950 adev->gfx.config.max_texture_channel_caches = 2; 1840 adev->gfx.config.max_texture_channel_caches = 2;
1951 adev->gfx.config.max_gprs = 256; 1841 adev->gfx.config.max_gprs = 256;
1952 adev->gfx.config.max_gs_threads = 32; 1842 adev->gfx.config.max_gs_threads = 32;
@@ -1963,35 +1853,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1963 adev->gfx.config.max_tile_pipes = 2; 1853 adev->gfx.config.max_tile_pipes = 2;
1964 adev->gfx.config.max_sh_per_se = 1; 1854 adev->gfx.config.max_sh_per_se = 1;
1965 adev->gfx.config.max_backends_per_se = 1; 1855 adev->gfx.config.max_backends_per_se = 1;
1966 1856 adev->gfx.config.max_cu_per_sh = 3;
1967 switch (adev->pdev->revision) {
1968 case 0x80:
1969 case 0x81:
1970 case 0xc0:
1971 case 0xc1:
1972 case 0xc2:
1973 case 0xc4:
1974 case 0xc8:
1975 case 0xc9:
1976 case 0xd6:
1977 case 0xda:
1978 case 0xe9:
1979 case 0xea:
1980 adev->gfx.config.max_cu_per_sh = 3;
1981 break;
1982 case 0x83:
1983 case 0xd0:
1984 case 0xd1:
1985 case 0xd2:
1986 case 0xd4:
1987 case 0xdb:
1988 case 0xe1:
1989 case 0xe2:
1990 default:
1991 adev->gfx.config.max_cu_per_sh = 2;
1992 break;
1993 }
1994
1995 adev->gfx.config.max_texture_channel_caches = 2; 1857 adev->gfx.config.max_texture_channel_caches = 2;
1996 adev->gfx.config.max_gprs = 256; 1858 adev->gfx.config.max_gprs = 256;
1997 adev->gfx.config.max_gs_threads = 16; 1859 adev->gfx.config.max_gs_threads = 16;
@@ -2083,13 +1945,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
2083 return 0; 1945 return 0;
2084} 1946}
2085 1947
1948static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1949 int mec, int pipe, int queue)
1950{
1951 int r;
1952 unsigned irq_type;
1953 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1954
1955 ring = &adev->gfx.compute_ring[ring_id];
1956
1957 /* mec0 is me1 */
1958 ring->me = mec + 1;
1959 ring->pipe = pipe;
1960 ring->queue = queue;
1961
1962 ring->ring_obj = NULL;
1963 ring->use_doorbell = true;
1964 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1965 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1966 + (ring_id * GFX8_MEC_HPD_SIZE);
1967 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1968
1969 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1970 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1971 + ring->pipe;
1972
1973 /* type-2 packets are deprecated on MEC, use type-3 instead */
1974 r = amdgpu_ring_init(adev, ring, 1024,
1975 &adev->gfx.eop_irq, irq_type);
1976 if (r)
1977 return r;
1978
1979
1980 return 0;
1981}
1982
2086static int gfx_v8_0_sw_init(void *handle) 1983static int gfx_v8_0_sw_init(void *handle)
2087{ 1984{
2088 int i, r; 1985 int i, j, k, r, ring_id;
2089 struct amdgpu_ring *ring; 1986 struct amdgpu_ring *ring;
2090 struct amdgpu_kiq *kiq; 1987 struct amdgpu_kiq *kiq;
2091 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1988 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2092 1989
1990 switch (adev->asic_type) {
1991 case CHIP_FIJI:
1992 case CHIP_TONGA:
1993 case CHIP_POLARIS11:
1994 case CHIP_POLARIS12:
1995 case CHIP_POLARIS10:
1996 case CHIP_CARRIZO:
1997 adev->gfx.mec.num_mec = 2;
1998 break;
1999 case CHIP_TOPAZ:
2000 case CHIP_STONEY:
2001 default:
2002 adev->gfx.mec.num_mec = 1;
2003 break;
2004 }
2005
2006 adev->gfx.mec.num_pipe_per_mec = 4;
2007 adev->gfx.mec.num_queue_per_pipe = 8;
2008
2093 /* KIQ event */ 2009 /* KIQ event */
2094 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2010 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
2095 if (r) 2011 if (r)
@@ -2151,49 +2067,41 @@ static int gfx_v8_0_sw_init(void *handle)
2151 return r; 2067 return r;
2152 } 2068 }
2153 2069
2154 /* set up the compute queues */
2155 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2156 unsigned irq_type;
2157 2070
2158 /* max 32 queues per MEC */ 2071 /* set up the compute queues - allocate horizontally across pipes */
2159 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2072 ring_id = 0;
2160 DRM_ERROR("Too many (%d) compute rings!\n", i); 2073 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2161 break; 2074 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2075 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2076 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2077 continue;
2078
2079 r = gfx_v8_0_compute_ring_init(adev,
2080 ring_id,
2081 i, k, j);
2082 if (r)
2083 return r;
2084
2085 ring_id++;
2086 }
2162 } 2087 }
2163 ring = &adev->gfx.compute_ring[i];
2164 ring->ring_obj = NULL;
2165 ring->use_doorbell = true;
2166 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2167 ring->me = 1; /* first MEC */
2168 ring->pipe = i / 8;
2169 ring->queue = i % 8;
2170 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2171 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2172 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2173 /* type-2 packets are deprecated on MEC, use type-3 instead */
2174 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2175 irq_type);
2176 if (r)
2177 return r;
2178 } 2088 }
2179 2089
2180 if (amdgpu_sriov_vf(adev)) { 2090 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2181 r = gfx_v8_0_kiq_init(adev); 2091 if (r) {
2182 if (r) { 2092 DRM_ERROR("Failed to init KIQ BOs!\n");
2183 DRM_ERROR("Failed to init KIQ BOs!\n"); 2093 return r;
2184 return r; 2094 }
2185 }
2186 2095
2187 kiq = &adev->gfx.kiq; 2096 kiq = &adev->gfx.kiq;
2188 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2097 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2189 if (r) 2098 if (r)
2190 return r; 2099 return r;
2191 2100
2192 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2101 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2193 r = gfx_v8_0_compute_mqd_sw_init(adev); 2102 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2194 if (r) 2103 if (r)
2195 return r; 2104 return r;
2196 }
2197 2105
2198 /* reserve GDS, GWS and OA resource for gfx */ 2106 /* reserve GDS, GWS and OA resource for gfx */
2199 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2107 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -2237,11 +2145,9 @@ static int gfx_v8_0_sw_fini(void *handle)
2237 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2145 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2238 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2146 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2239 2147
2240 if (amdgpu_sriov_vf(adev)) { 2148 amdgpu_gfx_compute_mqd_sw_fini(adev);
2241 gfx_v8_0_compute_mqd_sw_fini(adev); 2149 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2242 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2150 amdgpu_gfx_kiq_fini(adev);
2243 gfx_v8_0_kiq_fini(adev);
2244 }
2245 2151
2246 gfx_v8_0_mec_fini(adev); 2152 gfx_v8_0_mec_fini(adev);
2247 gfx_v8_0_rlc_fini(adev); 2153 gfx_v8_0_rlc_fini(adev);
@@ -3594,11 +3500,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3594 WREG32(mmGRBM_GFX_INDEX, data); 3500 WREG32(mmGRBM_GFX_INDEX, data);
3595} 3501}
3596 3502
3597static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3598{
3599 return (u32)((1ULL << bit_width) - 1);
3600}
3601
3602static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3503static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3603{ 3504{
3604 u32 data, mask; 3505 u32 data, mask;
@@ -3608,8 +3509,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3608 3509
3609 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3510 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3610 3511
3611 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3612 adev->gfx.config.max_sh_per_se); 3513 adev->gfx.config.max_sh_per_se);
3613 3514
3614 return (~data) & mask; 3515 return (~data) & mask;
3615} 3516}
@@ -3823,7 +3724,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3823/** 3724/**
3824 * gfx_v8_0_init_compute_vmid - gart enable 3725 * gfx_v8_0_init_compute_vmid - gart enable
3825 * 3726 *
3826 * @rdev: amdgpu_device pointer 3727 * @adev: amdgpu_device pointer
3827 * 3728 *
3828 * Initialize compute vmid sh_mem registers 3729 * Initialize compute vmid sh_mem registers
3829 * 3730 *
@@ -4481,6 +4382,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4481 4382
4482 return 0; 4383 return 0;
4483} 4384}
4385static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4386{
4387 u32 tmp;
4388 /* no gfx doorbells on iceland */
4389 if (adev->asic_type == CHIP_TOPAZ)
4390 return;
4391
4392 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393
4394 if (ring->use_doorbell) {
4395 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4396 DOORBELL_OFFSET, ring->doorbell_index);
4397 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4398 DOORBELL_HIT, 0);
4399 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4400 DOORBELL_EN, 1);
4401 } else {
4402 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4403 }
4404
4405 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4406
4407 if (adev->flags & AMD_IS_APU)
4408 return;
4409
4410 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4411 DOORBELL_RANGE_LOWER,
4412 AMDGPU_DOORBELL_GFX_RING0);
4413 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4414
4415 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4416 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4417}
4484 4418
4485static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4419static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4486{ 4420{
@@ -4528,34 +4462,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4528 WREG32(mmCP_RB0_BASE, rb_addr); 4462 WREG32(mmCP_RB0_BASE, rb_addr);
4529 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4463 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4530 4464
4531 /* no gfx doorbells on iceland */ 4465 gfx_v8_0_set_cpg_door_bell(adev, ring);
4532 if (adev->asic_type != CHIP_TOPAZ) {
4533 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4534 if (ring->use_doorbell) {
4535 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4536 DOORBELL_OFFSET, ring->doorbell_index);
4537 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4538 DOORBELL_HIT, 0);
4539 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4540 DOORBELL_EN, 1);
4541 } else {
4542 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4543 DOORBELL_EN, 0);
4544 }
4545 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4546
4547 if (adev->asic_type == CHIP_TONGA) {
4548 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4549 DOORBELL_RANGE_LOWER,
4550 AMDGPU_DOORBELL_GFX_RING0);
4551 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4552
4553 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4554 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4555 }
4556
4557 }
4558
4559 /* start the ring */ 4466 /* start the ring */
4560 amdgpu_ring_clear_ring(ring); 4467 amdgpu_ring_clear_ring(ring);
4561 gfx_v8_0_cp_gfx_start(adev); 4468 gfx_v8_0_cp_gfx_start(adev);
@@ -4628,29 +4535,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4628 return 0; 4535 return 0;
4629} 4536}
4630 4537
4631static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4632{
4633 int i, r;
4634
4635 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4636 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4637
4638 if (ring->mqd_obj) {
4639 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4640 if (unlikely(r != 0))
4641 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4642
4643 amdgpu_bo_unpin(ring->mqd_obj);
4644 amdgpu_bo_unreserve(ring->mqd_obj);
4645
4646 amdgpu_bo_unref(&ring->mqd_obj);
4647 ring->mqd_obj = NULL;
4648 ring->mqd_ptr = NULL;
4649 ring->mqd_gpu_addr = 0;
4650 }
4651 }
4652}
4653
4654/* KIQ functions */ 4538/* KIQ functions */
4655static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4539static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4656{ 4540{
@@ -4666,45 +4550,111 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4666 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4550 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4667} 4551}
4668 4552
4669static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) 4553static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4670{ 4554{
4671 amdgpu_ring_alloc(ring, 8); 4555 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4556 uint32_t scratch, tmp = 0;
4557 uint64_t queue_mask = 0;
4558 int r, i;
4559
4560 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4561 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4562 continue;
4563
4564 /* This situation may be hit in the future if a new HW
4565 * generation exposes more than 64 queues. If so, the
4566 * definition of queue_mask needs updating */
4567 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
4568 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4569 break;
4570 }
4571
4572 queue_mask |= (1ull << i);
4573 }
4574
4575 r = amdgpu_gfx_scratch_get(adev, &scratch);
4576 if (r) {
4577 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4578 return r;
4579 }
4580 WREG32(scratch, 0xCAFEDEAD);
4581
4582 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4583 if (r) {
4584 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4585 amdgpu_gfx_scratch_free(adev, scratch);
4586 return r;
4587 }
4672 /* set resources */ 4588 /* set resources */
4673 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4674 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4590 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4675 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 4591 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4676 amdgpu_ring_write(ring, 0); /* queue mask hi */ 4592 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4677 amdgpu_ring_write(ring, 0); /* gws mask lo */ 4593 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4678 amdgpu_ring_write(ring, 0); /* gws mask hi */ 4594 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4679 amdgpu_ring_write(ring, 0); /* oac mask */ 4595 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4680 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 4596 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4681 amdgpu_ring_commit(ring); 4597 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4682 udelay(50); 4598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4599 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4600 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4601
4602 /* map queues */
4603 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4604 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4605 amdgpu_ring_write(kiq_ring,
4606 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4607 amdgpu_ring_write(kiq_ring,
4608 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4609 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4610 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4611 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4612 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4613 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4614 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4615 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4616 }
4617 /* write to scratch for completion */
4618 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4619 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4620 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4621 amdgpu_ring_commit(kiq_ring);
4622
4623 for (i = 0; i < adev->usec_timeout; i++) {
4624 tmp = RREG32(scratch);
4625 if (tmp == 0xDEADBEEF)
4626 break;
4627 DRM_UDELAY(1);
4628 }
4629 if (i >= adev->usec_timeout) {
4630 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4631 scratch, tmp);
4632 r = -EINVAL;
4633 }
4634 amdgpu_gfx_scratch_free(adev, scratch);
4635
4636 return r;
4683} 4637}
4684 4638
4685static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 4639static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4686 struct amdgpu_ring *ring)
4687{ 4640{
4688 struct amdgpu_device *adev = kiq_ring->adev; 4641 int i, r = 0;
4689 uint64_t mqd_addr, wptr_addr;
4690 4642
4691 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4643 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4692 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4644 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4693 amdgpu_ring_alloc(kiq_ring, 8); 4645 for (i = 0; i < adev->usec_timeout; i++) {
4646 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4647 break;
4648 udelay(1);
4649 }
4650 if (i == adev->usec_timeout)
4651 r = -ETIMEDOUT;
4652 }
4653 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4654 WREG32(mmCP_HQD_PQ_RPTR, 0);
4655 WREG32(mmCP_HQD_PQ_WPTR, 0);
4694 4656
4695 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4657 return r;
4696 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4697 amdgpu_ring_write(kiq_ring, 0x21010000);
4698 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4699 (ring->queue << 26) |
4700 (ring->pipe << 29) |
4701 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4702 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4703 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4704 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4705 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4706 amdgpu_ring_commit(kiq_ring);
4707 udelay(50);
4708} 4658}
4709 4659
4710static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4660static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
@@ -4721,7 +4671,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4721 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4671 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4722 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4672 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4723 mqd->compute_misc_reserved = 0x00000003; 4673 mqd->compute_misc_reserved = 0x00000003;
4724 4674 if (!(adev->flags & AMD_IS_APU)) {
4675 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4676 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4677 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4678 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4679 }
4725 eop_base_addr = ring->eop_gpu_addr >> 8; 4680 eop_base_addr = ring->eop_gpu_addr >> 8;
4726 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4681 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4727 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4682 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4729,7 +4684,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4729 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4684 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4730 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4685 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4731 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4686 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4732 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4687 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4733 4688
4734 mqd->cp_hqd_eop_control = tmp; 4689 mqd->cp_hqd_eop_control = tmp;
4735 4690
@@ -4741,11 +4696,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4741 4696
4742 mqd->cp_hqd_pq_doorbell_control = tmp; 4697 mqd->cp_hqd_pq_doorbell_control = tmp;
4743 4698
4744 /* disable the queue if it's active */
4745 mqd->cp_hqd_dequeue_request = 0;
4746 mqd->cp_hqd_pq_rptr = 0;
4747 mqd->cp_hqd_pq_wptr = 0;
4748
4749 /* set the pointer to the MQD */ 4699 /* set the pointer to the MQD */
4750 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4700 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4751 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4701 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
@@ -4815,149 +4765,160 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4815 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4765 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4816 mqd->cp_hqd_persistent_state = tmp; 4766 mqd->cp_hqd_persistent_state = tmp;
4817 4767
4768 /* set MTYPE */
4769 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4770 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4771 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4772 mqd->cp_hqd_ib_control = tmp;
4773
4774 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4775 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4776 mqd->cp_hqd_iq_timer = tmp;
4777
4778 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4780 mqd->cp_hqd_ctx_save_control = tmp;
4781
4782 /* defaults */
4783 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4784 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4785 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4786 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4787 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4788 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4789 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4790 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4791 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4792 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4793 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4794 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4795 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4796 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4797 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4798
4818 /* activate the queue */ 4799 /* activate the queue */
4819 mqd->cp_hqd_active = 1; 4800 mqd->cp_hqd_active = 1;
4820 4801
4821 return 0; 4802 return 0;
4822} 4803}
4823 4804
4824static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) 4805int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4806 struct vi_mqd *mqd)
4825{ 4807{
4826 struct amdgpu_device *adev = ring->adev; 4808 uint32_t mqd_reg;
4827 struct vi_mqd *mqd = ring->mqd_ptr; 4809 uint32_t *mqd_data;
4828 int j; 4810
4811 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4812 mqd_data = &mqd->cp_mqd_base_addr_lo;
4829 4813
4830 /* disable wptr polling */ 4814 /* disable wptr polling */
4831 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4815 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4832 4816
4833 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); 4817 /* program all HQD registers */
4834 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); 4818 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4819 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4835 4820
4836 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4821 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4837 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); 4822 * This is safe since EOP RPTR==WPTR for any inactive HQD
4838 4823 * on ASICs that do not support context-save.
4839 /* enable doorbell? */ 4824 * EOP writes/reads can start anywhere in the ring.
4840 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4825 */
4841 4826 if (adev->asic_type != CHIP_TONGA) {
4842 /* disable the queue if it's active */ 4827 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4843 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4828 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4844 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4829 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4845 for (j = 0; j < adev->usec_timeout; j++) {
4846 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4847 break;
4848 udelay(1);
4849 }
4850 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4851 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4852 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4853 } 4830 }
4854 4831
4855 /* set the pointer to the MQD */ 4832 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4856 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4833 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4857 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4858
4859 /* set MQD vmid to 0 */
4860 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4861 4834
4862 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4835 /* activate the HQD */
4863 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4836 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4864 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4837 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4865 4838
4866 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4839 return 0;
4867 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 4840}
4868
4869 /* set the wb address whether it's enabled or not */
4870 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4871 mqd->cp_hqd_pq_rptr_report_addr_lo);
4872 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4873 mqd->cp_hqd_pq_rptr_report_addr_hi);
4874
4875 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4876 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4877 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4878
4879 /* enable the doorbell if requested */
4880 if (ring->use_doorbell) {
4881 if ((adev->asic_type == CHIP_CARRIZO) ||
4882 (adev->asic_type == CHIP_FIJI) ||
4883 (adev->asic_type == CHIP_STONEY)) {
4884 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4885 AMDGPU_DOORBELL_KIQ << 2);
4886 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4887 AMDGPU_DOORBELL_MEC_RING7 << 2);
4888 }
4889 }
4890 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4891 4841
4892 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4842static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4893 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4843{
4844 struct amdgpu_device *adev = ring->adev;
4845 struct vi_mqd *mqd = ring->mqd_ptr;
4846 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4894 4847
4895 /* set the vmid for the queue */ 4848 gfx_v8_0_kiq_setting(ring);
4896 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4897 4849
4898 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 4850 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4851 /* reset MQD to a clean status */
4852 if (adev->gfx.mec.mqd_backup[mqd_idx])
4853 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4899 4854
4900 /* activate the queue */ 4855 /* reset ring buffer */
4901 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4856 ring->wptr = 0;
4857 amdgpu_ring_clear_ring(ring);
4858 mutex_lock(&adev->srbm_mutex);
4859 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4860 gfx_v8_0_mqd_commit(adev, mqd);
4861 vi_srbm_select(adev, 0, 0, 0, 0);
4862 mutex_unlock(&adev->srbm_mutex);
4863 } else {
4864 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4865 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4866 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4867 mutex_lock(&adev->srbm_mutex);
4868 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4869 gfx_v8_0_mqd_init(ring);
4870 gfx_v8_0_mqd_commit(adev, mqd);
4871 vi_srbm_select(adev, 0, 0, 0, 0);
4872 mutex_unlock(&adev->srbm_mutex);
4902 4873
4903 if (ring->use_doorbell) 4874 if (adev->gfx.mec.mqd_backup[mqd_idx])
4904 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4876 }
4905 4877
4906 return 0; 4878 return 0;
4907} 4879}
4908 4880
4909static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4881static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4910{ 4882{
4911 struct amdgpu_device *adev = ring->adev; 4883 struct amdgpu_device *adev = ring->adev;
4912 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4913 struct vi_mqd *mqd = ring->mqd_ptr; 4884 struct vi_mqd *mqd = ring->mqd_ptr;
4914 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); 4885 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4915 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4916 4886
4917 if (is_kiq) { 4887 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4918 gfx_v8_0_kiq_setting(&kiq->ring); 4888 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4919 } else { 4889 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4920 mqd_idx = ring - &adev->gfx.compute_ring[0]; 4890 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4921 }
4922
4923 if (!adev->gfx.in_reset) {
4924 memset((void *)mqd, 0, sizeof(*mqd));
4925 mutex_lock(&adev->srbm_mutex); 4891 mutex_lock(&adev->srbm_mutex);
4926 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4892 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4927 gfx_v8_0_mqd_init(ring); 4893 gfx_v8_0_mqd_init(ring);
4928 if (is_kiq)
4929 gfx_v8_0_kiq_init_register(ring);
4930 vi_srbm_select(adev, 0, 0, 0, 0); 4894 vi_srbm_select(adev, 0, 0, 0, 0);
4931 mutex_unlock(&adev->srbm_mutex); 4895 mutex_unlock(&adev->srbm_mutex);
4932 4896
4933 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4897 if (adev->gfx.mec.mqd_backup[mqd_idx])
4934 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4898 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4935 } else { /* for GPU_RESET case */ 4899 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4936 /* reset MQD to a clean status */ 4900 /* reset MQD to a clean status */
4937 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4901 if (adev->gfx.mec.mqd_backup[mqd_idx])
4938 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4939
4940 /* reset ring buffer */ 4903 /* reset ring buffer */
4941 ring->wptr = 0; 4904 ring->wptr = 0;
4942 amdgpu_ring_clear_ring(ring); 4905 amdgpu_ring_clear_ring(ring);
4943 4906 } else {
4944 if (is_kiq) { 4907 amdgpu_ring_clear_ring(ring);
4945 mutex_lock(&adev->srbm_mutex);
4946 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4947 gfx_v8_0_kiq_init_register(ring);
4948 vi_srbm_select(adev, 0, 0, 0, 0);
4949 mutex_unlock(&adev->srbm_mutex);
4950 }
4951 } 4908 }
4952
4953 if (is_kiq)
4954 gfx_v8_0_kiq_enable(ring);
4955 else
4956 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4957
4958 return 0; 4909 return 0;
4959} 4910}
4960 4911
4912static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4913{
4914 if (adev->asic_type > CHIP_TONGA) {
4915 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4916 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4917 }
4918 /* enable doorbells */
4919 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4920}
4921
4961static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4922static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4962{ 4923{
4963 struct amdgpu_ring *ring = NULL; 4924 struct amdgpu_ring *ring = NULL;
@@ -4981,13 +4942,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4981 if (r) 4942 if (r)
4982 goto done; 4943 goto done;
4983 4944
4984 ring->ready = true;
4985 r = amdgpu_ring_test_ring(ring);
4986 if (r) {
4987 ring->ready = false;
4988 goto done;
4989 }
4990
4991 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4945 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992 ring = &adev->gfx.compute_ring[i]; 4946 ring = &adev->gfx.compute_ring[i];
4993 4947
@@ -4996,272 +4950,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4996 goto done; 4950 goto done;
4997 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4951 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4998 if (!r) { 4952 if (!r) {
4999 r = gfx_v8_0_kiq_init_queue(ring); 4953 r = gfx_v8_0_kcq_init_queue(ring);
5000 amdgpu_bo_kunmap(ring->mqd_obj); 4954 amdgpu_bo_kunmap(ring->mqd_obj);
5001 ring->mqd_ptr = NULL; 4955 ring->mqd_ptr = NULL;
5002 } 4956 }
5003 amdgpu_bo_unreserve(ring->mqd_obj); 4957 amdgpu_bo_unreserve(ring->mqd_obj);
5004 if (r) 4958 if (r)
5005 goto done; 4959 goto done;
5006
5007 ring->ready = true;
5008 r = amdgpu_ring_test_ring(ring);
5009 if (r)
5010 ring->ready = false;
5011 } 4960 }
5012 4961
5013done: 4962 gfx_v8_0_set_mec_doorbell_range(adev);
5014 return r;
5015}
5016
5017static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
5018{
5019 int r, i, j;
5020 u32 tmp;
5021 bool use_doorbell = true;
5022 u64 hqd_gpu_addr;
5023 u64 mqd_gpu_addr;
5024 u64 eop_gpu_addr;
5025 u64 wb_gpu_addr;
5026 u32 *buf;
5027 struct vi_mqd *mqd;
5028
5029 /* init the queues. */
5030 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5031 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5032
5033 if (ring->mqd_obj == NULL) {
5034 r = amdgpu_bo_create(adev,
5035 sizeof(struct vi_mqd),
5036 PAGE_SIZE, true,
5037 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5038 NULL, &ring->mqd_obj);
5039 if (r) {
5040 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5041 return r;
5042 }
5043 }
5044
5045 r = amdgpu_bo_reserve(ring->mqd_obj, false);
5046 if (unlikely(r != 0)) {
5047 gfx_v8_0_cp_compute_fini(adev);
5048 return r;
5049 }
5050 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5051 &mqd_gpu_addr);
5052 if (r) {
5053 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5054 gfx_v8_0_cp_compute_fini(adev);
5055 return r;
5056 }
5057 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5058 if (r) {
5059 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5060 gfx_v8_0_cp_compute_fini(adev);
5061 return r;
5062 }
5063
5064 /* init the mqd struct */
5065 memset(buf, 0, sizeof(struct vi_mqd));
5066
5067 mqd = (struct vi_mqd *)buf;
5068 mqd->header = 0xC0310800;
5069 mqd->compute_pipelinestat_enable = 0x00000001;
5070 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5071 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5072 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5073 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5074 mqd->compute_misc_reserved = 0x00000003;
5075
5076 mutex_lock(&adev->srbm_mutex);
5077 vi_srbm_select(adev, ring->me,
5078 ring->pipe,
5079 ring->queue, 0);
5080
5081 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5082 eop_gpu_addr >>= 8;
5083
5084 /* write the EOP addr */
5085 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5086 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5087
5088 /* set the VMID assigned */
5089 WREG32(mmCP_HQD_VMID, 0);
5090
5091 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5092 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5093 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5094 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5095 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5096
5097 /* disable wptr polling */
5098 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5099 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5100 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5101
5102 mqd->cp_hqd_eop_base_addr_lo =
5103 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5104 mqd->cp_hqd_eop_base_addr_hi =
5105 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5106
5107 /* enable doorbell? */
5108 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5109 if (use_doorbell) {
5110 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5111 } else {
5112 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5113 }
5114 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5115 mqd->cp_hqd_pq_doorbell_control = tmp;
5116
5117 /* disable the queue if it's active */
5118 mqd->cp_hqd_dequeue_request = 0;
5119 mqd->cp_hqd_pq_rptr = 0;
5120 mqd->cp_hqd_pq_wptr= 0;
5121 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5122 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5123 for (j = 0; j < adev->usec_timeout; j++) {
5124 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5125 break;
5126 udelay(1);
5127 }
5128 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5129 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5130 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5131 }
5132
5133 /* set the pointer to the MQD */
5134 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5135 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5136 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5137 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5138
5139 /* set MQD vmid to 0 */
5140 tmp = RREG32(mmCP_MQD_CONTROL);
5141 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5142 WREG32(mmCP_MQD_CONTROL, tmp);
5143 mqd->cp_mqd_control = tmp;
5144
5145 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5146 hqd_gpu_addr = ring->gpu_addr >> 8;
5147 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5148 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5149 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5150 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5151
5152 /* set up the HQD, this is similar to CP_RB0_CNTL */
5153 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5154 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5155 (order_base_2(ring->ring_size / 4) - 1));
5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5157 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5158#ifdef __BIG_ENDIAN
5159 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5160#endif
5161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5162 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5163 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5164 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5165 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5166 mqd->cp_hqd_pq_control = tmp;
5167
5168 /* set the wb address wether it's enabled or not */
5169 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5170 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5171 mqd->cp_hqd_pq_rptr_report_addr_hi =
5172 upper_32_bits(wb_gpu_addr) & 0xffff;
5173 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5174 mqd->cp_hqd_pq_rptr_report_addr_lo);
5175 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5176 mqd->cp_hqd_pq_rptr_report_addr_hi);
5177
5178 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5179 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5180 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5181 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5182 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5183 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5184 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5185
5186 /* enable the doorbell if requested */
5187 if (use_doorbell) {
5188 if ((adev->asic_type == CHIP_CARRIZO) ||
5189 (adev->asic_type == CHIP_FIJI) ||
5190 (adev->asic_type == CHIP_STONEY) ||
5191 (adev->asic_type == CHIP_POLARIS11) ||
5192 (adev->asic_type == CHIP_POLARIS10) ||
5193 (adev->asic_type == CHIP_POLARIS12)) {
5194 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5195 AMDGPU_DOORBELL_KIQ << 2);
5196 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5197 AMDGPU_DOORBELL_MEC_RING7 << 2);
5198 }
5199 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5201 DOORBELL_OFFSET, ring->doorbell_index);
5202 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5203 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5204 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5205 mqd->cp_hqd_pq_doorbell_control = tmp;
5206
5207 } else {
5208 mqd->cp_hqd_pq_doorbell_control = 0;
5209 }
5210 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5211 mqd->cp_hqd_pq_doorbell_control);
5212 4963
5213 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4964 r = gfx_v8_0_kiq_kcq_enable(adev);
5214 ring->wptr = 0; 4965 if (r)
5215 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); 4966 goto done;
5216 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5217 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5218
5219 /* set the vmid for the queue */
5220 mqd->cp_hqd_vmid = 0;
5221 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5222
5223 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5224 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5225 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5226 mqd->cp_hqd_persistent_state = tmp;
5227 if (adev->asic_type == CHIP_STONEY ||
5228 adev->asic_type == CHIP_POLARIS11 ||
5229 adev->asic_type == CHIP_POLARIS10 ||
5230 adev->asic_type == CHIP_POLARIS12) {
5231 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5232 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5233 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5234 }
5235
5236 /* activate the queue */
5237 mqd->cp_hqd_active = 1;
5238 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5239
5240 vi_srbm_select(adev, 0, 0, 0, 0);
5241 mutex_unlock(&adev->srbm_mutex);
5242
5243 amdgpu_bo_kunmap(ring->mqd_obj);
5244 amdgpu_bo_unreserve(ring->mqd_obj);
5245 }
5246 4967
5247 if (use_doorbell) { 4968 /* Test KIQ */
5248 tmp = RREG32(mmCP_PQ_STATUS); 4969 ring = &adev->gfx.kiq.ring;
5249 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4970 ring->ready = true;
5250 WREG32(mmCP_PQ_STATUS, tmp); 4971 r = amdgpu_ring_test_ring(ring);
4972 if (r) {
4973 ring->ready = false;
4974 goto done;
5251 } 4975 }
5252 4976
5253 gfx_v8_0_cp_compute_enable(adev, true); 4977 /* Test KCQs */
5254
5255 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4978 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5256 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4979 ring = &adev->gfx.compute_ring[i];
5257
5258 ring->ready = true; 4980 ring->ready = true;
5259 r = amdgpu_ring_test_ring(ring); 4981 r = amdgpu_ring_test_ring(ring);
5260 if (r) 4982 if (r)
5261 ring->ready = false; 4983 ring->ready = false;
5262 } 4984 }
5263 4985
5264 return 0; 4986done:
4987 return r;
5265} 4988}
5266 4989
5267static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4990static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
@@ -5314,10 +5037,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5314 if (r) 5037 if (r)
5315 return r; 5038 return r;
5316 5039
5317 if (amdgpu_sriov_vf(adev)) 5040 r = gfx_v8_0_kiq_resume(adev);
5318 r = gfx_v8_0_kiq_resume(adev);
5319 else
5320 r = gfx_v8_0_cp_compute_resume(adev);
5321 if (r) 5041 if (r)
5322 return r; 5042 return r;
5323 5043
@@ -5361,7 +5081,6 @@ static int gfx_v8_0_hw_fini(void *handle)
5361 } 5081 }
5362 gfx_v8_0_cp_enable(adev, false); 5082 gfx_v8_0_cp_enable(adev, false);
5363 gfx_v8_0_rlc_stop(adev); 5083 gfx_v8_0_rlc_stop(adev);
5364 gfx_v8_0_cp_compute_fini(adev);
5365 5084
5366 amdgpu_set_powergating_state(adev, 5085 amdgpu_set_powergating_state(adev,
5367 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5086 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
@@ -5372,15 +5091,18 @@ static int gfx_v8_0_hw_fini(void *handle)
5372static int gfx_v8_0_suspend(void *handle) 5091static int gfx_v8_0_suspend(void *handle)
5373{ 5092{
5374 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5093 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5375 5094 adev->gfx.in_suspend = true;
5376 return gfx_v8_0_hw_fini(adev); 5095 return gfx_v8_0_hw_fini(adev);
5377} 5096}
5378 5097
5379static int gfx_v8_0_resume(void *handle) 5098static int gfx_v8_0_resume(void *handle)
5380{ 5099{
5100 int r;
5381 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5101 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5382 5102
5383 return gfx_v8_0_hw_init(adev); 5103 r = gfx_v8_0_hw_init(adev);
5104 adev->gfx.in_suspend = false;
5105 return r;
5384} 5106}
5385 5107
5386static bool gfx_v8_0_is_idle(void *handle) 5108static bool gfx_v8_0_is_idle(void *handle)
@@ -5469,25 +5191,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
5469 } 5191 }
5470} 5192}
5471 5193
5472static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5473 struct amdgpu_ring *ring)
5474{
5475 int i;
5476
5477 mutex_lock(&adev->srbm_mutex);
5478 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5479 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5480 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
5481 for (i = 0; i < adev->usec_timeout; i++) {
5482 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5483 break;
5484 udelay(1);
5485 }
5486 }
5487 vi_srbm_select(adev, 0, 0, 0, 0);
5488 mutex_unlock(&adev->srbm_mutex);
5489}
5490
5491static int gfx_v8_0_pre_soft_reset(void *handle) 5194static int gfx_v8_0_pre_soft_reset(void *handle)
5492{ 5195{
5493 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5196 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -5517,7 +5220,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
5517 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5220 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5518 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5221 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5519 5222
5520 gfx_v8_0_inactive_hqd(adev, ring); 5223 mutex_lock(&adev->srbm_mutex);
5224 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5225 gfx_v8_0_deactivate_hqd(adev, 2);
5226 vi_srbm_select(adev, 0, 0, 0, 0);
5227 mutex_unlock(&adev->srbm_mutex);
5521 } 5228 }
5522 /* Disable MEC parsing/prefetching */ 5229 /* Disable MEC parsing/prefetching */
5523 gfx_v8_0_cp_compute_enable(adev, false); 5230 gfx_v8_0_cp_compute_enable(adev, false);
@@ -5588,18 +5295,6 @@ static int gfx_v8_0_soft_reset(void *handle)
5588 return 0; 5295 return 0;
5589} 5296}
5590 5297
5591static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5592 struct amdgpu_ring *ring)
5593{
5594 mutex_lock(&adev->srbm_mutex);
5595 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5596 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5597 WREG32(mmCP_HQD_PQ_RPTR, 0);
5598 WREG32(mmCP_HQD_PQ_WPTR, 0);
5599 vi_srbm_select(adev, 0, 0, 0, 0);
5600 mutex_unlock(&adev->srbm_mutex);
5601}
5602
5603static int gfx_v8_0_post_soft_reset(void *handle) 5298static int gfx_v8_0_post_soft_reset(void *handle)
5604{ 5299{
5605 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5300 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -5625,9 +5320,13 @@ static int gfx_v8_0_post_soft_reset(void *handle)
5625 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5320 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5626 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5321 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5627 5322
5628 gfx_v8_0_init_hqd(adev, ring); 5323 mutex_lock(&adev->srbm_mutex);
5324 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5325 gfx_v8_0_deactivate_hqd(adev, 2);
5326 vi_srbm_select(adev, 0, 0, 0, 0);
5327 mutex_unlock(&adev->srbm_mutex);
5629 } 5328 }
5630 gfx_v8_0_cp_compute_resume(adev); 5329 gfx_v8_0_kiq_resume(adev);
5631 } 5330 }
5632 gfx_v8_0_rlc_start(adev); 5331 gfx_v8_0_rlc_start(adev);
5633 5332
@@ -5773,7 +5472,7 @@ static int gfx_v8_0_early_init(void *handle)
5773 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5472 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5774 5473
5775 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5474 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5776 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5475 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5777 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5476 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5778 gfx_v8_0_set_ring_funcs(adev); 5477 gfx_v8_0_set_ring_funcs(adev);
5779 gfx_v8_0_set_irq_funcs(adev); 5478 gfx_v8_0_set_irq_funcs(adev);
@@ -6265,6 +5964,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
6265 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5964 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6266 if (temp != data) 5965 if (temp != data)
6267 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5966 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5967 /* enable interrupts again for PG */
5968 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6268 } 5969 }
6269 5970
6270 gfx_v8_0_wait_for_rlc_serdes(adev); 5971 gfx_v8_0_wait_for_rlc_serdes(adev);
@@ -6568,9 +6269,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6568 6269
6569 control |= ib->length_dw | (vm_id << 24); 6270 control |= ib->length_dw | (vm_id << 24);
6570 6271
6571 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) 6272 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6572 control |= INDIRECT_BUFFER_PRE_ENB(1); 6273 control |= INDIRECT_BUFFER_PRE_ENB(1);
6573 6274
6275 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6276 gfx_v8_0_ring_emit_de_meta(ring);
6277 }
6278
6574 amdgpu_ring_write(ring, header); 6279 amdgpu_ring_write(ring, header);
6575 amdgpu_ring_write(ring, 6280 amdgpu_ring_write(ring,
6576#ifdef __BIG_ENDIAN 6281#ifdef __BIG_ENDIAN
@@ -6753,8 +6458,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6753 uint32_t dw2 = 0; 6458 uint32_t dw2 = 0;
6754 6459
6755 if (amdgpu_sriov_vf(ring->adev)) 6460 if (amdgpu_sriov_vf(ring->adev))
6756 gfx_v8_0_ring_emit_ce_meta_init(ring, 6461 gfx_v8_0_ring_emit_ce_meta(ring);
6757 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6758 6462
6759 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6463 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6760 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6464 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -6780,10 +6484,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6780 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6484 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6781 amdgpu_ring_write(ring, dw2); 6485 amdgpu_ring_write(ring, dw2);
6782 amdgpu_ring_write(ring, 0); 6486 amdgpu_ring_write(ring, 0);
6783
6784 if (amdgpu_sriov_vf(ring->adev))
6785 gfx_v8_0_ring_emit_de_meta_init(ring,
6786 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6787} 6487}
6788 6488
6789static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6489static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
@@ -6813,7 +6513,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
6813 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6513 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6814} 6514}
6815 6515
6816
6817static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6516static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6818{ 6517{
6819 struct amdgpu_device *adev = ring->adev; 6518 struct amdgpu_device *adev = ring->adev;
@@ -6851,15 +6550,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6851 int me, int pipe, 6550 int me, int pipe,
6852 enum amdgpu_interrupt_state state) 6551 enum amdgpu_interrupt_state state)
6853{ 6552{
6553 u32 mec_int_cntl, mec_int_cntl_reg;
6554
6854 /* 6555 /*
6855 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6556 * amdgpu controls only the first MEC. That's why this function only
6856 * handles the setting of interrupts for this specific pipe. All other 6557 * handles the setting of interrupts for this specific MEC. All other
6857 * pipes' interrupts are set by amdkfd. 6558 * pipes' interrupts are set by amdkfd.
6858 */ 6559 */
6859 6560
6860 if (me == 1) { 6561 if (me == 1) {
6861 switch (pipe) { 6562 switch (pipe) {
6862 case 0: 6563 case 0:
6564 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6565 break;
6566 case 1:
6567 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6568 break;
6569 case 2:
6570 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6571 break;
6572 case 3:
6573 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6863 break; 6574 break;
6864 default: 6575 default:
6865 DRM_DEBUG("invalid pipe %d\n", pipe); 6576 DRM_DEBUG("invalid pipe %d\n", pipe);
@@ -6870,8 +6581,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6870 return; 6581 return;
6871 } 6582 }
6872 6583
6873 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6584 switch (state) {
6874 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6585 case AMDGPU_IRQ_STATE_DISABLE:
6586 mec_int_cntl = RREG32(mec_int_cntl_reg);
6587 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6588 WREG32(mec_int_cntl_reg, mec_int_cntl);
6589 break;
6590 case AMDGPU_IRQ_STATE_ENABLE:
6591 mec_int_cntl = RREG32(mec_int_cntl_reg);
6592 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6593 WREG32(mec_int_cntl_reg, mec_int_cntl);
6594 break;
6595 default:
6596 break;
6597 }
6875} 6598}
6876 6599
6877static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6600static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
@@ -6992,8 +6715,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6992{ 6715{
6993 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6716 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6994 6717
6995 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
6996
6997 switch (type) { 6718 switch (type) {
6998 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6719 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6999 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6720 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
@@ -7023,8 +6744,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7023 u8 me_id, pipe_id, queue_id; 6744 u8 me_id, pipe_id, queue_id;
7024 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6745 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7025 6746
7026 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
7027
7028 me_id = (entry->ring_id & 0x0c) >> 2; 6747 me_id = (entry->ring_id & 0x0c) >> 2;
7029 pipe_id = (entry->ring_id & 0x03) >> 0; 6748 pipe_id = (entry->ring_id & 0x03) >> 0;
7030 queue_id = (entry->ring_id & 0x70) >> 4; 6749 queue_id = (entry->ring_id & 0x70) >> 4;
@@ -7257,7 +6976,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7257 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6976 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7258 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6977 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7259 6978
7260 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6979 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7261 6980
7262 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6981 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7263} 6982}
@@ -7268,9 +6987,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7268 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6987 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7269 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6988 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7270 unsigned disable_masks[4 * 2]; 6989 unsigned disable_masks[4 * 2];
6990 u32 ao_cu_num;
7271 6991
7272 memset(cu_info, 0, sizeof(*cu_info)); 6992 memset(cu_info, 0, sizeof(*cu_info));
7273 6993
6994 if (adev->flags & AMD_IS_APU)
6995 ao_cu_num = 2;
6996 else
6997 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6998
7274 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6999 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7275 7000
7276 mutex_lock(&adev->grbm_idx_mutex); 7001 mutex_lock(&adev->grbm_idx_mutex);
@@ -7286,16 +7011,18 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7286 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7011 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7287 cu_info->bitmap[i][j] = bitmap; 7012 cu_info->bitmap[i][j] = bitmap;
7288 7013
7289 for (k = 0; k < 16; k ++) { 7014 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7290 if (bitmap & mask) { 7015 if (bitmap & mask) {
7291 if (counter < 2) 7016 if (counter < ao_cu_num)
7292 ao_bitmap |= mask; 7017 ao_bitmap |= mask;
7293 counter ++; 7018 counter ++;
7294 } 7019 }
7295 mask <<= 1; 7020 mask <<= 1;
7296 } 7021 }
7297 active_cu_number += counter; 7022 active_cu_number += counter;
7298 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7023 if (i < 2 && j < 2)
7024 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7025 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7299 } 7026 }
7300 } 7027 }
7301 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7028 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -7323,7 +7050,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7323 .funcs = &gfx_v8_0_ip_funcs, 7050 .funcs = &gfx_v8_0_ip_funcs,
7324}; 7051};
7325 7052
7326static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7053static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7327{ 7054{
7328 uint64_t ce_payload_addr; 7055 uint64_t ce_payload_addr;
7329 int cnt_ce; 7056 int cnt_ce;
@@ -7333,10 +7060,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
7333 } ce_payload = {}; 7060 } ce_payload = {};
7334 7061
7335 if (ring->adev->virt.chained_ib_support) { 7062 if (ring->adev->virt.chained_ib_support) {
7336 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7063 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7064 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7337 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7065 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7338 } else { 7066 } else {
7339 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); 7067 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7068 offsetof(struct vi_gfx_meta_data, ce_payload);
7340 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7069 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7341 } 7070 }
7342 7071
@@ -7350,15 +7079,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
7350 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7079 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7351} 7080}
7352 7081
7353static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7082static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7354{ 7083{
7355 uint64_t de_payload_addr, gds_addr; 7084 uint64_t de_payload_addr, gds_addr, csa_addr;
7356 int cnt_de; 7085 int cnt_de;
7357 static union { 7086 static union {
7358 struct vi_de_ib_state regular; 7087 struct vi_de_ib_state regular;
7359 struct vi_de_ib_state_chained_ib chained; 7088 struct vi_de_ib_state_chained_ib chained;
7360 } de_payload = {}; 7089 } de_payload = {};
7361 7090
7091 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7362 gds_addr = csa_addr + 4096; 7092 gds_addr = csa_addr + 4096;
7363 if (ring->adev->virt.chained_ib_support) { 7093 if (ring->adev->virt.chained_ib_support) {
7364 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7094 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
@@ -7381,68 +7111,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c
7381 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7111 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7382 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7112 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7383} 7113}
7384
7385/* create MQD for each compute queue */
7386static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev)
7387{
7388 struct amdgpu_ring *ring = NULL;
7389 int r, i;
7390
7391 /* create MQD for KIQ */
7392 ring = &adev->gfx.kiq.ring;
7393 if (!ring->mqd_obj) {
7394 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7395 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7396 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7397 if (r) {
7398 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7399 return r;
7400 }
7401
7402 /* prepare MQD backup */
7403 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7404 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
7405 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7406 }
7407
7408 /* create MQD for each KCQ */
7409 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7410 ring = &adev->gfx.compute_ring[i];
7411 if (!ring->mqd_obj) {
7412 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
7413 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
7414 &ring->mqd_gpu_addr, &ring->mqd_ptr);
7415 if (r) {
7416 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
7417 return r;
7418 }
7419
7420 /* prepare MQD backup */
7421 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL);
7422 if (!adev->gfx.mec.mqd_backup[i])
7423 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
7424 }
7425 }
7426
7427 return 0;
7428}
7429
7430static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
7431{
7432 struct amdgpu_ring *ring = NULL;
7433 int i;
7434
7435 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
7436 ring = &adev->gfx.compute_ring[i];
7437 kfree(adev->gfx.mec.mqd_backup[i]);
7438 amdgpu_bo_free_kernel(&ring->mqd_obj,
7439 &ring->mqd_gpu_addr,
7440 &ring->mqd_ptr);
7441 }
7442
7443 ring = &adev->gfx.kiq.ring;
7444 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
7445 amdgpu_bo_free_kernel(&ring->mqd_obj,
7446 &ring->mqd_gpu_addr,
7447 &ring->mqd_ptr);
7448}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 788cc3ab584b..ec3f11fa986c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -27,4 +27,9 @@
27extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; 27extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
28extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; 28extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
29 29
30struct amdgpu_device;
31struct vi_mqd;
32
33int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
34
30#endif 35#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0c16b7563b73..3a0b69b09ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_gfx.h" 26#include "amdgpu_gfx.h"
27#include "soc15.h" 27#include "soc15.h"
@@ -38,8 +38,17 @@
38#include "v9_structs.h" 38#include "v9_structs.h"
39 39
40#define GFX9_NUM_GFX_RINGS 1 40#define GFX9_NUM_GFX_RINGS 1
41#define GFX9_NUM_COMPUTE_RINGS 8 41#define GFX9_MEC_HPD_SIZE 2048
42#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 42#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
43#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
44#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
45
46#define mmPWR_MISC_CNTL_STATUS 0x0183
47#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
48#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0
49#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1
50#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L
51#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L
43 52
44MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 53MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
45MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 54MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
@@ -48,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
48MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 57MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
49MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 58MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
50 59
60MODULE_FIRMWARE("amdgpu/raven_ce.bin");
61MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
62MODULE_FIRMWARE("amdgpu/raven_me.bin");
63MODULE_FIRMWARE("amdgpu/raven_mec.bin");
64MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
65MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
66
51static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 67static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
52{ 68{
53 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 69 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
@@ -86,14 +102,27 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
86 102
87static const u32 golden_settings_gc_9_0[] = 103static const u32 golden_settings_gc_9_0[] =
88{ 104{
89 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, 105 SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
106 SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
107 SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
108 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
109 SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
110 SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
90 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, 111 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
91 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, 112 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
92 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, 113 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
114 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
115 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
116 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
117 SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
118 SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
119 SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107,
93 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, 120 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
94 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, 121 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
95 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, 122 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
96 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff 123 SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
124 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff,
125 SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
97}; 126};
98 127
99static const u32 golden_settings_gc_9_0_vg10[] = 128static const u32 golden_settings_gc_9_0_vg10[] =
@@ -104,11 +133,47 @@ static const u32 golden_settings_gc_9_0_vg10[] =
104 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, 133 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
105 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, 134 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
106 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, 135 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
107 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, 136 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800
108 SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 137};
138
139static const u32 golden_settings_gc_9_1[] =
140{
141 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104,
142 SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080,
143 SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080,
144 SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080,
145 SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420,
146 SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000,
147 SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080,
148 SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
149 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
150 SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
151 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080,
152 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080,
153 SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080,
154 SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080,
155 SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080,
156 SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
157 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000,
158 SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120,
159 SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000,
160 SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff,
161 SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080
162};
163
164static const u32 golden_settings_gc_9_1_rv1[] =
165{
166 SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
167 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042,
168 SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042,
169 SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000,
170 SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000,
171 SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
172 SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800
109}; 173};
110 174
111#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 175#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
176#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
112 177
113static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 178static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
114static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 179static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -118,6 +183,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
118 struct amdgpu_cu_info *cu_info); 183 struct amdgpu_cu_info *cu_info);
119static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 184static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
120static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 185static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
186static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
121 187
122static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 188static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
123{ 189{
@@ -130,6 +196,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
130 golden_settings_gc_9_0_vg10, 196 golden_settings_gc_9_0_vg10,
131 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 197 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10));
132 break; 198 break;
199 case CHIP_RAVEN:
200 amdgpu_program_register_sequence(adev,
201 golden_settings_gc_9_1,
202 (const u32)ARRAY_SIZE(golden_settings_gc_9_1));
203 amdgpu_program_register_sequence(adev,
204 golden_settings_gc_9_1_rv1,
205 (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1));
206 break;
133 default: 207 default:
134 break; 208 break;
135 } 209 }
@@ -284,6 +358,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
284 struct amdgpu_firmware_info *info = NULL; 358 struct amdgpu_firmware_info *info = NULL;
285 const struct common_firmware_header *header = NULL; 359 const struct common_firmware_header *header = NULL;
286 const struct gfx_firmware_header_v1_0 *cp_hdr; 360 const struct gfx_firmware_header_v1_0 *cp_hdr;
361 const struct rlc_firmware_header_v2_0 *rlc_hdr;
362 unsigned int *tmp = NULL;
363 unsigned int i = 0;
287 364
288 DRM_DEBUG("\n"); 365 DRM_DEBUG("\n");
289 366
@@ -291,6 +368,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
291 case CHIP_VEGA10: 368 case CHIP_VEGA10:
292 chip_name = "vega10"; 369 chip_name = "vega10";
293 break; 370 break;
371 case CHIP_RAVEN:
372 chip_name = "raven";
373 break;
294 default: 374 default:
295 BUG(); 375 BUG();
296 } 376 }
@@ -333,9 +413,46 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
333 if (err) 413 if (err)
334 goto out; 414 goto out;
335 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 415 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
336 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; 416 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
337 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 417 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
338 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 418 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
419 adev->gfx.rlc.save_and_restore_offset =
420 le32_to_cpu(rlc_hdr->save_and_restore_offset);
421 adev->gfx.rlc.clear_state_descriptor_offset =
422 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
423 adev->gfx.rlc.avail_scratch_ram_locations =
424 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
425 adev->gfx.rlc.reg_restore_list_size =
426 le32_to_cpu(rlc_hdr->reg_restore_list_size);
427 adev->gfx.rlc.reg_list_format_start =
428 le32_to_cpu(rlc_hdr->reg_list_format_start);
429 adev->gfx.rlc.reg_list_format_separate_start =
430 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
431 adev->gfx.rlc.starting_offsets_start =
432 le32_to_cpu(rlc_hdr->starting_offsets_start);
433 adev->gfx.rlc.reg_list_format_size_bytes =
434 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
435 adev->gfx.rlc.reg_list_size_bytes =
436 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
437 adev->gfx.rlc.register_list_format =
438 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
439 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
440 if (!adev->gfx.rlc.register_list_format) {
441 err = -ENOMEM;
442 goto out;
443 }
444
445 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
446 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
447 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
448 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
449
450 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
451
452 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
453 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
454 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
455 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
339 456
340 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 457 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
341 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 458 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -447,6 +564,261 @@ out:
447 return err; 564 return err;
448} 565}
449 566
567static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
568{
569 u32 count = 0;
570 const struct cs_section_def *sect = NULL;
571 const struct cs_extent_def *ext = NULL;
572
573 /* begin clear state */
574 count += 2;
575 /* context control state */
576 count += 3;
577
578 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
579 for (ext = sect->section; ext->extent != NULL; ++ext) {
580 if (sect->id == SECT_CONTEXT)
581 count += 2 + ext->reg_count;
582 else
583 return 0;
584 }
585 }
586
587 /* end clear state */
588 count += 2;
589 /* clear state */
590 count += 2;
591
592 return count;
593}
594
595static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
596 volatile u32 *buffer)
597{
598 u32 count = 0, i;
599 const struct cs_section_def *sect = NULL;
600 const struct cs_extent_def *ext = NULL;
601
602 if (adev->gfx.rlc.cs_data == NULL)
603 return;
604 if (buffer == NULL)
605 return;
606
607 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
608 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
609
610 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
611 buffer[count++] = cpu_to_le32(0x80000000);
612 buffer[count++] = cpu_to_le32(0x80000000);
613
614 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
615 for (ext = sect->section; ext->extent != NULL; ++ext) {
616 if (sect->id == SECT_CONTEXT) {
617 buffer[count++] =
618 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
619 buffer[count++] = cpu_to_le32(ext->reg_index -
620 PACKET3_SET_CONTEXT_REG_START);
621 for (i = 0; i < ext->reg_count; i++)
622 buffer[count++] = cpu_to_le32(ext->extent[i]);
623 } else {
624 return;
625 }
626 }
627 }
628
629 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
630 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
631
632 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
633 buffer[count++] = cpu_to_le32(0);
634}
635
636static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
637{
638 uint32_t data;
639
640 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
641 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
642 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
643 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
644 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
645
646 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
647 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
648
649 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
650 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
651
652 mutex_lock(&adev->grbm_idx_mutex);
653 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
654 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
655 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
656
657 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
658 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
659 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
660 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
661 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
662
663 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
664 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
665 data &= 0x0000FFFF;
666 data |= 0x00C00000;
667 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
668
669 /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
670 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
671
672 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
673 * but used for RLC_LB_CNTL configuration */
674 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
675 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
676 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
677 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
678 mutex_unlock(&adev->grbm_idx_mutex);
679}
680
681static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
682{
683 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
684}
685
686static void rv_init_cp_jump_table(struct amdgpu_device *adev)
687{
688 const __le32 *fw_data;
689 volatile u32 *dst_ptr;
690 int me, i, max_me = 5;
691 u32 bo_offset = 0;
692 u32 table_offset, table_size;
693
694 /* write the cp table buffer */
695 dst_ptr = adev->gfx.rlc.cp_table_ptr;
696 for (me = 0; me < max_me; me++) {
697 if (me == 0) {
698 const struct gfx_firmware_header_v1_0 *hdr =
699 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
700 fw_data = (const __le32 *)
701 (adev->gfx.ce_fw->data +
702 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
703 table_offset = le32_to_cpu(hdr->jt_offset);
704 table_size = le32_to_cpu(hdr->jt_size);
705 } else if (me == 1) {
706 const struct gfx_firmware_header_v1_0 *hdr =
707 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
708 fw_data = (const __le32 *)
709 (adev->gfx.pfp_fw->data +
710 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
711 table_offset = le32_to_cpu(hdr->jt_offset);
712 table_size = le32_to_cpu(hdr->jt_size);
713 } else if (me == 2) {
714 const struct gfx_firmware_header_v1_0 *hdr =
715 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
716 fw_data = (const __le32 *)
717 (adev->gfx.me_fw->data +
718 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
719 table_offset = le32_to_cpu(hdr->jt_offset);
720 table_size = le32_to_cpu(hdr->jt_size);
721 } else if (me == 3) {
722 const struct gfx_firmware_header_v1_0 *hdr =
723 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
724 fw_data = (const __le32 *)
725 (adev->gfx.mec_fw->data +
726 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
727 table_offset = le32_to_cpu(hdr->jt_offset);
728 table_size = le32_to_cpu(hdr->jt_size);
729 } else if (me == 4) {
730 const struct gfx_firmware_header_v1_0 *hdr =
731 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
732 fw_data = (const __le32 *)
733 (adev->gfx.mec2_fw->data +
734 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
735 table_offset = le32_to_cpu(hdr->jt_offset);
736 table_size = le32_to_cpu(hdr->jt_size);
737 }
738
739 for (i = 0; i < table_size; i ++) {
740 dst_ptr[bo_offset + i] =
741 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
742 }
743
744 bo_offset += table_size;
745 }
746}
747
748static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
749{
750 /* clear state block */
751 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
752 &adev->gfx.rlc.clear_state_gpu_addr,
753 (void **)&adev->gfx.rlc.cs_ptr);
754
755 /* jump table block */
756 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
757 &adev->gfx.rlc.cp_table_gpu_addr,
758 (void **)&adev->gfx.rlc.cp_table_ptr);
759}
760
761static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
762{
763 volatile u32 *dst_ptr;
764 u32 dws;
765 const struct cs_section_def *cs_data;
766 int r;
767
768 adev->gfx.rlc.cs_data = gfx9_cs_data;
769
770 cs_data = adev->gfx.rlc.cs_data;
771
772 if (cs_data) {
773 /* clear state block */
774 adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
775 if (adev->gfx.rlc.clear_state_obj == NULL) {
776 r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
777 AMDGPU_GEM_DOMAIN_VRAM,
778 &adev->gfx.rlc.clear_state_obj,
779 &adev->gfx.rlc.clear_state_gpu_addr,
780 (void **)&adev->gfx.rlc.cs_ptr);
781 if (r) {
782 dev_err(adev->dev,
783 "(%d) failed to create rlc csb bo\n", r);
784 gfx_v9_0_rlc_fini(adev);
785 return r;
786 }
787 }
788 /* set up the cs buffer */
789 dst_ptr = adev->gfx.rlc.cs_ptr;
790 gfx_v9_0_get_csb_buffer(adev, dst_ptr);
791 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
792 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
793 }
794
795 if (adev->asic_type == CHIP_RAVEN) {
796 /* TODO: double check the cp_table_size for RV */
797 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
798 if (adev->gfx.rlc.cp_table_obj == NULL) {
799 r = amdgpu_bo_create_kernel(adev, adev->gfx.rlc.cp_table_size,
800 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
801 &adev->gfx.rlc.cp_table_obj,
802 &adev->gfx.rlc.cp_table_gpu_addr,
803 (void **)&adev->gfx.rlc.cp_table_ptr);
804 if (r) {
805 dev_err(adev->dev,
806 "(%d) failed to create cp table bo\n", r);
807 gfx_v9_0_rlc_fini(adev);
808 return r;
809 }
810 }
811
812 rv_init_cp_jump_table(adev);
813 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
814 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
815
816 gfx_v9_0_init_lbpw(adev);
817 }
818
819 return 0;
820}
821
450static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 822static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
451{ 823{
452 int r; 824 int r;
@@ -473,8 +845,6 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
473 } 845 }
474} 846}
475 847
476#define MEC_HPD_SIZE 2048
477
478static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 848static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
479{ 849{
480 int r; 850 int r;
@@ -482,20 +852,19 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
482 const __le32 *fw_data; 852 const __le32 *fw_data;
483 unsigned fw_size; 853 unsigned fw_size;
484 u32 *fw; 854 u32 *fw;
855 size_t mec_hpd_size;
485 856
486 const struct gfx_firmware_header_v1_0 *mec_hdr; 857 const struct gfx_firmware_header_v1_0 *mec_hdr;
487 858
488 /* 859 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
489 * we assign only 1 pipe because all other pipes will 860
490 * be handled by KFD 861 /* take ownership of the relevant compute queues */
491 */ 862 amdgpu_gfx_compute_queue_acquire(adev);
492 adev->gfx.mec.num_mec = 1; 863 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
493 adev->gfx.mec.num_pipe = 1;
494 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
495 864
496 if (adev->gfx.mec.hpd_eop_obj == NULL) { 865 if (adev->gfx.mec.hpd_eop_obj == NULL) {
497 r = amdgpu_bo_create(adev, 866 r = amdgpu_bo_create(adev,
498 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 867 mec_hpd_size,
499 PAGE_SIZE, true, 868 PAGE_SIZE, true,
500 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 869 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
501 &adev->gfx.mec.hpd_eop_obj); 870 &adev->gfx.mec.hpd_eop_obj);
@@ -575,131 +944,6 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
575 return 0; 944 return 0;
576} 945}
577 946
578static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev)
579{
580 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
581
582 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
583}
584
585static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
586{
587 int r;
588 u32 *hpd;
589 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
590
591 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
592 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
593 &kiq->eop_gpu_addr, (void **)&hpd);
594 if (r) {
595 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
596 return r;
597 }
598
599 memset(hpd, 0, MEC_HPD_SIZE);
600
601 r = amdgpu_bo_reserve(kiq->eop_obj, true);
602 if (unlikely(r != 0))
603 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
604 amdgpu_bo_kunmap(kiq->eop_obj);
605 amdgpu_bo_unreserve(kiq->eop_obj);
606
607 return 0;
608}
609
610static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
611 struct amdgpu_ring *ring,
612 struct amdgpu_irq_src *irq)
613{
614 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
615 int r = 0;
616
617 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
618 if (r)
619 return r;
620
621 ring->adev = NULL;
622 ring->ring_obj = NULL;
623 ring->use_doorbell = true;
624 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
625 if (adev->gfx.mec2_fw) {
626 ring->me = 2;
627 ring->pipe = 0;
628 } else {
629 ring->me = 1;
630 ring->pipe = 1;
631 }
632
633 ring->queue = 0;
634 ring->eop_gpu_addr = kiq->eop_gpu_addr;
635 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
636 r = amdgpu_ring_init(adev, ring, 1024,
637 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
638 if (r)
639 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
640
641 return r;
642}
643static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
644 struct amdgpu_irq_src *irq)
645{
646 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
647 amdgpu_ring_fini(ring);
648}
649
650/* create MQD for each compute queue */
651static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev)
652{
653 struct amdgpu_ring *ring = NULL;
654 int r, i;
655
656 /* create MQD for KIQ */
657 ring = &adev->gfx.kiq.ring;
658 if (!ring->mqd_obj) {
659 r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
660 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
661 &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
662 if (r) {
663 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
664 return r;
665 }
666
667 /*TODO: prepare MQD backup */
668 }
669
670 /* create MQD for each KCQ */
671 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
672 ring = &adev->gfx.compute_ring[i];
673 if (!ring->mqd_obj) {
674 r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
675 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
676 &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
677 if (r) {
678 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
679 return r;
680 }
681
682 /* TODO: prepare MQD backup */
683 }
684 }
685
686 return 0;
687}
688
689static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
690{
691 struct amdgpu_ring *ring = NULL;
692 int i;
693
694 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
695 ring = &adev->gfx.compute_ring[i];
696 amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
697 }
698
699 ring = &adev->gfx.kiq.ring;
700 amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
701}
702
703static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 947static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
704{ 948{
705 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 949 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
@@ -770,23 +1014,21 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
770 1014
771 switch (adev->asic_type) { 1015 switch (adev->asic_type) {
772 case CHIP_VEGA10: 1016 case CHIP_VEGA10:
773 adev->gfx.config.max_shader_engines = 4;
774 adev->gfx.config.max_cu_per_sh = 16;
775 adev->gfx.config.max_sh_per_se = 1;
776 adev->gfx.config.max_backends_per_se = 4;
777 adev->gfx.config.max_texture_channel_caches = 16;
778 adev->gfx.config.max_gprs = 256;
779 adev->gfx.config.max_gs_threads = 32;
780 adev->gfx.config.max_hw_contexts = 8; 1017 adev->gfx.config.max_hw_contexts = 8;
781
782 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1018 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
783 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1019 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
784 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1020 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
785 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1021 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
786 adev->gfx.config.gs_vgt_table_depth = 32;
787 adev->gfx.config.gs_prim_buffer_depth = 1792;
788 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1022 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
789 break; 1023 break;
1024 case CHIP_RAVEN:
1025 adev->gfx.config.max_hw_contexts = 8;
1026 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1027 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1028 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1029 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1030 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1031 break;
790 default: 1032 default:
791 BUG(); 1033 BUG();
792 break; 1034 break;
@@ -1023,13 +1265,61 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1023 return 0; 1265 return 0;
1024} 1266}
1025 1267
1268static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1269 int mec, int pipe, int queue)
1270{
1271 int r;
1272 unsigned irq_type;
1273 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1274
1275 ring = &adev->gfx.compute_ring[ring_id];
1276
1277 /* mec0 is me1 */
1278 ring->me = mec + 1;
1279 ring->pipe = pipe;
1280 ring->queue = queue;
1281
1282 ring->ring_obj = NULL;
1283 ring->use_doorbell = true;
1284 ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
1285 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1286 + (ring_id * GFX9_MEC_HPD_SIZE);
1287 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1288
1289 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1290 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1291 + ring->pipe;
1292
1293 /* type-2 packets are deprecated on MEC, use type-3 instead */
1294 r = amdgpu_ring_init(adev, ring, 1024,
1295 &adev->gfx.eop_irq, irq_type);
1296 if (r)
1297 return r;
1298
1299
1300 return 0;
1301}
1302
1026static int gfx_v9_0_sw_init(void *handle) 1303static int gfx_v9_0_sw_init(void *handle)
1027{ 1304{
1028 int i, r; 1305 int i, j, k, r, ring_id;
1029 struct amdgpu_ring *ring; 1306 struct amdgpu_ring *ring;
1030 struct amdgpu_kiq *kiq; 1307 struct amdgpu_kiq *kiq;
1031 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1308 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1032 1309
1310 switch (adev->asic_type) {
1311 case CHIP_VEGA10:
1312 case CHIP_RAVEN:
1313 adev->gfx.mec.num_mec = 2;
1314 break;
1315 default:
1316 adev->gfx.mec.num_mec = 1;
1317 break;
1318 }
1319
1320 adev->gfx.mec.num_pipe_per_mec = 4;
1321 adev->gfx.mec.num_queue_per_pipe = 8;
1322
1033 /* KIQ event */ 1323 /* KIQ event */
1034 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); 1324 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1035 if (r) 1325 if (r)
@@ -1062,6 +1352,12 @@ static int gfx_v9_0_sw_init(void *handle)
1062 return r; 1352 return r;
1063 } 1353 }
1064 1354
1355 r = gfx_v9_0_rlc_init(adev);
1356 if (r) {
1357 DRM_ERROR("Failed to init rlc BOs!\n");
1358 return r;
1359 }
1360
1065 r = gfx_v9_0_mec_init(adev); 1361 r = gfx_v9_0_mec_init(adev);
1066 if (r) { 1362 if (r) {
1067 DRM_ERROR("Failed to init MEC BOs!\n"); 1363 DRM_ERROR("Failed to init MEC BOs!\n");
@@ -1081,49 +1377,40 @@ static int gfx_v9_0_sw_init(void *handle)
1081 return r; 1377 return r;
1082 } 1378 }
1083 1379
1084 /* set up the compute queues */ 1380 /* set up the compute queues - allocate horizontally across pipes */
1085 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1381 ring_id = 0;
1086 unsigned irq_type; 1382 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1087 1383 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1088 /* max 32 queues per MEC */ 1384 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1089 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 1385 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1090 DRM_ERROR("Too many (%d) compute rings!\n", i); 1386 continue;
1091 break; 1387
1388 r = gfx_v9_0_compute_ring_init(adev,
1389 ring_id,
1390 i, k, j);
1391 if (r)
1392 return r;
1393
1394 ring_id++;
1395 }
1092 } 1396 }
1093 ring = &adev->gfx.compute_ring[i];
1094 ring->ring_obj = NULL;
1095 ring->use_doorbell = true;
1096 ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
1097 ring->me = 1; /* first MEC */
1098 ring->pipe = i / 8;
1099 ring->queue = i % 8;
1100 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
1101 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1102 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1103 /* type-2 packets are deprecated on MEC, use type-3 instead */
1104 r = amdgpu_ring_init(adev, ring, 1024,
1105 &adev->gfx.eop_irq, irq_type);
1106 if (r)
1107 return r;
1108 } 1397 }
1109 1398
1110 if (amdgpu_sriov_vf(adev)) { 1399 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1111 r = gfx_v9_0_kiq_init(adev); 1400 if (r) {
1112 if (r) { 1401 DRM_ERROR("Failed to init KIQ BOs!\n");
1113 DRM_ERROR("Failed to init KIQ BOs!\n"); 1402 return r;
1114 return r; 1403 }
1115 }
1116 1404
1117 kiq = &adev->gfx.kiq; 1405 kiq = &adev->gfx.kiq;
1118 r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1406 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1119 if (r) 1407 if (r)
1120 return r; 1408 return r;
1121 1409
1122 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1410 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1123 r = gfx_v9_0_compute_mqd_sw_init(adev); 1411 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd));
1124 if (r) 1412 if (r)
1125 return r; 1413 return r;
1126 }
1127 1414
1128 /* reserve GDS, GWS and OA resource for gfx */ 1415 /* reserve GDS, GWS and OA resource for gfx */
1129 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 1416 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -1170,11 +1457,9 @@ static int gfx_v9_0_sw_fini(void *handle)
1170 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1457 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1171 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1458 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1172 1459
1173 if (amdgpu_sriov_vf(adev)) { 1460 amdgpu_gfx_compute_mqd_sw_fini(adev);
1174 gfx_v9_0_compute_mqd_sw_fini(adev); 1461 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1175 gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1462 amdgpu_gfx_kiq_fini(adev);
1176 gfx_v9_0_kiq_fini(adev);
1177 }
1178 1463
1179 gfx_v9_0_mec_fini(adev); 1464 gfx_v9_0_mec_fini(adev);
1180 gfx_v9_0_ngg_fini(adev); 1465 gfx_v9_0_ngg_fini(adev);
@@ -1208,11 +1493,6 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
1208 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1493 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
1209} 1494}
1210 1495
1211static u32 gfx_v9_0_create_bitmask(u32 bit_width)
1212{
1213 return (u32)((1ULL << bit_width) - 1);
1214}
1215
1216static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1496static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1217{ 1497{
1218 u32 data, mask; 1498 u32 data, mask;
@@ -1223,8 +1503,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1223 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1503 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1224 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1504 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1225 1505
1226 mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / 1506 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1227 adev->gfx.config.max_sh_per_se); 1507 adev->gfx.config.max_sh_per_se);
1228 1508
1229 return (~data) & mask; 1509 return (~data) & mask;
1230} 1510}
@@ -1272,7 +1552,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1272 1552
1273 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1553 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1274 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1554 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1275 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1555 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1276 1556
1277 mutex_lock(&adev->srbm_mutex); 1557 mutex_lock(&adev->srbm_mutex);
1278 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1558 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
@@ -1370,9 +1650,6 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1370{ 1650{
1371 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 1651 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
1372 1652
1373 if (enable)
1374 return;
1375
1376 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 1653 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
1377 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 1654 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
1378 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 1655 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
@@ -1381,6 +1658,373 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1381 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 1658 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
1382} 1659}
1383 1660
1661static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
1662{
1663 /* csib */
1664 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
1665 adev->gfx.rlc.clear_state_gpu_addr >> 32);
1666 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
1667 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1668 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
1669 adev->gfx.rlc.clear_state_size);
1670}
1671
1672static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
1673 int indirect_offset,
1674 int list_size,
1675 int *unique_indirect_regs,
1676 int *unique_indirect_reg_count,
1677 int max_indirect_reg_count,
1678 int *indirect_start_offsets,
1679 int *indirect_start_offsets_count,
1680 int max_indirect_start_offsets_count)
1681{
1682 int idx;
1683 bool new_entry = true;
1684
1685 for (; indirect_offset < list_size; indirect_offset++) {
1686
1687 if (new_entry) {
1688 new_entry = false;
1689 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1690 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1691 BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
1692 }
1693
1694 if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
1695 new_entry = true;
1696 continue;
1697 }
1698
1699 indirect_offset += 2;
1700
1701 /* look for the matching indice */
1702 for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
1703 if (unique_indirect_regs[idx] ==
1704 register_list_format[indirect_offset])
1705 break;
1706 }
1707
1708 if (idx >= *unique_indirect_reg_count) {
1709 unique_indirect_regs[*unique_indirect_reg_count] =
1710 register_list_format[indirect_offset];
1711 idx = *unique_indirect_reg_count;
1712 *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
1713 BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
1714 }
1715
1716 register_list_format[indirect_offset] = idx;
1717 }
1718}
1719
1720static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1721{
1722 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1723 int unique_indirect_reg_count = 0;
1724
1725 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1726 int indirect_start_offsets_count = 0;
1727
1728 int list_size = 0;
1729 int i = 0;
1730 u32 tmp = 0;
1731
1732 u32 *register_list_format =
1733 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
1734 if (!register_list_format)
1735 return -ENOMEM;
1736 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
1737 adev->gfx.rlc.reg_list_format_size_bytes);
1738
1739 /* setup unique_indirect_regs array and indirect_start_offsets array */
1740 gfx_v9_0_parse_ind_reg_list(register_list_format,
1741 GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
1742 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
1743 unique_indirect_regs,
1744 &unique_indirect_reg_count,
1745 sizeof(unique_indirect_regs)/sizeof(int),
1746 indirect_start_offsets,
1747 &indirect_start_offsets_count,
1748 sizeof(indirect_start_offsets)/sizeof(int));
1749
1750 /* enable auto inc in case it is disabled */
1751 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1752 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1753 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1754
1755 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
1756 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
1757 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
1758 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1759 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1760 adev->gfx.rlc.register_restore[i]);
1761
1762 /* load direct register */
1763 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
1764 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1765 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1766 adev->gfx.rlc.register_restore[i]);
1767
1768 /* load indirect register */
1769 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1770 adev->gfx.rlc.reg_list_format_start);
1771 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
1772 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1773 register_list_format[i]);
1774
1775 /* set save/restore list size */
1776 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
1777 list_size = list_size >> 1;
1778 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1779 adev->gfx.rlc.reg_restore_list_size);
1780 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
1781
1782 /* write the starting offsets to RLC scratch ram */
1783 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1784 adev->gfx.rlc.starting_offsets_start);
1785 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
1786 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1787 indirect_start_offsets[i]);
1788
1789 /* load unique indirect regs*/
1790 for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) {
1791 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
1792 unique_indirect_regs[i] & 0x3FFFF);
1793 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
1794 unique_indirect_regs[i] >> 20);
1795 }
1796
1797 kfree(register_list_format);
1798 return 0;
1799}
1800
1801static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
1802{
1803 u32 tmp = 0;
1804
1805 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1806 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1807 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1808}
1809
1810static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
1811 bool enable)
1812{
1813 uint32_t data = 0;
1814 uint32_t default_data = 0;
1815
1816 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
1817 if (enable == true) {
1818 /* enable GFXIP control over CGPG */
1819 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1820 if(default_data != data)
1821 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1822
1823 /* update status */
1824 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
1825 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
1826 if(default_data != data)
1827 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1828 } else {
1829 /* restore GFXIP control over GCPG */
1830 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1831 if(default_data != data)
1832 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1833 }
1834}
1835
1836static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
1837{
1838 uint32_t data = 0;
1839
1840 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1841 AMD_PG_SUPPORT_GFX_SMG |
1842 AMD_PG_SUPPORT_GFX_DMG)) {
1843 /* init IDLE_POLL_COUNT = 60 */
1844 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
1845 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
1846 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
1847 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
1848
1849 /* init RLC PG Delay */
1850 data = 0;
1851 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
1852 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
1853 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
1854 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
1855 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
1856
1857 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
1858 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
1859 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
1860 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
1861
1862 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
1863 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
1864 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
1865 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
1866
1867 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
1868 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
1869
1870 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
1871 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
1872 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
1873
1874 pwr_10_0_gfxip_control_over_cgpg(adev, true);
1875 }
1876}
1877
1878static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
1879 bool enable)
1880{
1881 uint32_t data = 0;
1882 uint32_t default_data = 0;
1883
1884 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1885
1886 if (enable == true) {
1887 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
1888 if (default_data != data)
1889 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1890 } else {
1891 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
1892 if(default_data != data)
1893 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1894 }
1895}
1896
1897static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
1898 bool enable)
1899{
1900 uint32_t data = 0;
1901 uint32_t default_data = 0;
1902
1903 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1904
1905 if (enable == true) {
1906 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
1907 if(default_data != data)
1908 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1909 } else {
1910 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
1911 if(default_data != data)
1912 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1913 }
1914}
1915
1916static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
1917 bool enable)
1918{
1919 uint32_t data = 0;
1920 uint32_t default_data = 0;
1921
1922 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1923
1924 if (enable == true) {
1925 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
1926 if(default_data != data)
1927 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1928 } else {
1929 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
1930 if(default_data != data)
1931 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1932 }
1933}
1934
1935static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
1936 bool enable)
1937{
1938 uint32_t data, default_data;
1939
1940 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1941 if (enable == true)
1942 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
1943 else
1944 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
1945 if(default_data != data)
1946 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1947}
1948
1949static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
1950 bool enable)
1951{
1952 uint32_t data, default_data;
1953
1954 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1955 if (enable == true)
1956 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
1957 else
1958 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
1959 if(default_data != data)
1960 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1961
1962 if (!enable)
1963 /* read any GFX register to wake up GFX */
1964 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
1965}
1966
1967static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
1968 bool enable)
1969{
1970 uint32_t data, default_data;
1971
1972 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1973 if (enable == true)
1974 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
1975 else
1976 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
1977 if(default_data != data)
1978 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1979}
1980
1981static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
1982 bool enable)
1983{
1984 uint32_t data, default_data;
1985
1986 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1987 if (enable == true)
1988 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
1989 else
1990 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
1991 if(default_data != data)
1992 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1993}
1994
1995static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
1996{
1997 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1998 AMD_PG_SUPPORT_GFX_SMG |
1999 AMD_PG_SUPPORT_GFX_DMG |
2000 AMD_PG_SUPPORT_CP |
2001 AMD_PG_SUPPORT_GDS |
2002 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2003 gfx_v9_0_init_csb(adev);
2004 gfx_v9_0_init_rlc_save_restore_list(adev);
2005 gfx_v9_0_enable_save_restore_machine(adev);
2006
2007 if (adev->asic_type == CHIP_RAVEN) {
2008 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2009 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2010 gfx_v9_0_init_gfx_power_gating(adev);
2011
2012 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
2013 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
2014 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
2015 } else {
2016 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
2017 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
2018 }
2019
2020 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
2021 gfx_v9_0_enable_cp_power_gating(adev, true);
2022 else
2023 gfx_v9_0_enable_cp_power_gating(adev, false);
2024 }
2025 }
2026}
2027
1384void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2028void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
1385{ 2029{
1386 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 2030 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
@@ -1425,7 +2069,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
1425 * default is 0x9C4 to create a 100us interval */ 2069 * default is 0x9C4 to create a 100us interval */
1426 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2070 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
1427 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2071 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
1428 * to disable the page fault retry interrupts, default is 2072 * to disable the page fault retry interrupts, default is
1429 * 0x100 (256) */ 2073 * 0x100 (256) */
1430 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2074 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
1431 } 2075 }
@@ -1474,6 +2118,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
1474 2118
1475 gfx_v9_0_rlc_reset(adev); 2119 gfx_v9_0_rlc_reset(adev);
1476 2120
2121 gfx_v9_0_init_pg(adev);
2122
1477 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2123 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
1478 /* legacy rlc firmware loading */ 2124 /* legacy rlc firmware loading */
1479 r = gfx_v9_0_rlc_load_microcode(adev); 2125 r = gfx_v9_0_rlc_load_microcode(adev);
@@ -1481,6 +2127,13 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
1481 return r; 2127 return r;
1482 } 2128 }
1483 2129
2130 if (adev->asic_type == CHIP_RAVEN) {
2131 if (amdgpu_lbpw != 0)
2132 gfx_v9_0_enable_lbpw(adev, true);
2133 else
2134 gfx_v9_0_enable_lbpw(adev, false);
2135 }
2136
1484 gfx_v9_0_rlc_start(adev); 2137 gfx_v9_0_rlc_start(adev);
1485 2138
1486 return 0; 2139 return 0;
@@ -1559,35 +2212,6 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
1559 return 0; 2212 return 0;
1560} 2213}
1561 2214
1562static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1563{
1564 u32 count = 0;
1565 const struct cs_section_def *sect = NULL;
1566 const struct cs_extent_def *ext = NULL;
1567
1568 /* begin clear state */
1569 count += 2;
1570 /* context control state */
1571 count += 3;
1572
1573 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1574 for (ext = sect->section; ext->extent != NULL; ++ext) {
1575 if (sect->id == SECT_CONTEXT)
1576 count += 2 + ext->reg_count;
1577 else
1578 return 0;
1579 }
1580 }
1581 /* pa_sc_raster_config/pa_sc_raster_config1 */
1582 count += 4;
1583 /* end clear state */
1584 count += 2;
1585 /* clear state */
1586 count += 2;
1587
1588 return count;
1589}
1590
1591static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2215static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
1592{ 2216{
1593 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2217 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
@@ -1730,13 +2354,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
1730 udelay(50); 2354 udelay(50);
1731} 2355}
1732 2356
1733static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
1734{
1735 gfx_v9_0_cp_compute_enable(adev, true);
1736
1737 return 0;
1738}
1739
1740static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2357static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
1741{ 2358{
1742 const struct gfx_firmware_header_v1_0 *mec_hdr; 2359 const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -1764,7 +2381,7 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
1764 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2381 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
1765 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2382 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
1766 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2383 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
1767 2384
1768 /* MEC1 */ 2385 /* MEC1 */
1769 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2386 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
1770 mec_hdr->jt_offset); 2387 mec_hdr->jt_offset);
@@ -1779,45 +2396,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
1779 return 0; 2396 return 0;
1780} 2397}
1781 2398
1782static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
1783{
1784 int i, r;
1785
1786 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1787 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
1788
1789 if (ring->mqd_obj) {
1790 r = amdgpu_bo_reserve(ring->mqd_obj, true);
1791 if (unlikely(r != 0))
1792 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
1793
1794 amdgpu_bo_unpin(ring->mqd_obj);
1795 amdgpu_bo_unreserve(ring->mqd_obj);
1796
1797 amdgpu_bo_unref(&ring->mqd_obj);
1798 ring->mqd_obj = NULL;
1799 }
1800 }
1801}
1802
1803static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
1804
1805static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
1806{
1807 int i, r;
1808 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1809 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
1810 if (gfx_v9_0_init_queue(ring))
1811 dev_warn(adev->dev, "compute queue %d init failed!\n", i);
1812 }
1813
1814 r = gfx_v9_0_cp_compute_start(adev);
1815 if (r)
1816 return r;
1817
1818 return 0;
1819}
1820
1821/* KIQ functions */ 2399/* KIQ functions */
1822static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2400static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
1823{ 2401{
@@ -1833,51 +2411,95 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
1833 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2411 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
1834} 2412}
1835 2413
1836static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) 2414static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
1837{ 2415{
1838 amdgpu_ring_alloc(ring, 8); 2416 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
1839 /* set resources */ 2417 uint32_t scratch, tmp = 0;
1840 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2418 uint64_t queue_mask = 0;
1841 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 2419 int r, i;
1842 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 2420
1843 amdgpu_ring_write(ring, 0); /* queue mask hi */ 2421 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
1844 amdgpu_ring_write(ring, 0); /* gws mask lo */ 2422 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
1845 amdgpu_ring_write(ring, 0); /* gws mask hi */ 2423 continue;
1846 amdgpu_ring_write(ring, 0); /* oac mask */ 2424
1847 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 2425 /* This situation may be hit in the future if a new HW
1848 amdgpu_ring_commit(ring); 2426 * generation exposes more than 64 queues. If so, the
1849 udelay(50); 2427 * definition of queue_mask needs updating */
1850} 2428 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
2429 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2430 break;
2431 }
1851 2432
1852static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 2433 queue_mask |= (1ull << i);
1853 struct amdgpu_ring *ring) 2434 }
1854{ 2435
1855 struct amdgpu_device *adev = kiq_ring->adev; 2436 r = amdgpu_gfx_scratch_get(adev, &scratch);
1856 uint64_t mqd_addr, wptr_addr; 2437 if (r) {
1857 2438 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
1858 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2439 return r;
1859 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2440 }
1860 amdgpu_ring_alloc(kiq_ring, 8); 2441 WREG32(scratch, 0xCAFEDEAD);
1861 2442
1862 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2443 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
1863 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2444 if (r) {
1864 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2445 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
1865 (0 << 4) | /* Queue_Sel */ 2446 amdgpu_gfx_scratch_free(adev, scratch);
1866 (0 << 8) | /* VMID */ 2447 return r;
1867 (ring->queue << 13 ) | 2448 }
1868 (ring->pipe << 16) | 2449
1869 ((ring->me == 1 ? 0 : 1) << 18) | 2450 /* set resources */
1870 (0 << 21) | /*queue_type: normal compute queue */ 2451 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
1871 (1 << 24) | /* alloc format: all_on_one_pipe */ 2452 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
1872 (0 << 26) | /* engine_sel: compute */ 2453 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
1873 (1 << 29)); /* num_queues: must be 1 */ 2454 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
1874 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); 2455 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
1875 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2456 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
1876 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2457 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
1877 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2458 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
1878 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2459 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
2460 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2461 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2462 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2463 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2464
2465 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2466 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2467 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2468 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2469 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2470 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2471 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2472 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2473 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2474 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
2475 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2476 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2477 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2478 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2479 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2480 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2481 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2482 }
2483 /* write to scratch for completion */
2484 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2485 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2486 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
1879 amdgpu_ring_commit(kiq_ring); 2487 amdgpu_ring_commit(kiq_ring);
1880 udelay(50); 2488
2489 for (i = 0; i < adev->usec_timeout; i++) {
2490 tmp = RREG32(scratch);
2491 if (tmp == 0xDEADBEEF)
2492 break;
2493 DRM_UDELAY(1);
2494 }
2495 if (i >= adev->usec_timeout) {
2496 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
2497 scratch, tmp);
2498 r = -EINVAL;
2499 }
2500 amdgpu_gfx_scratch_free(adev, scratch);
2501
2502 return r;
1881} 2503}
1882 2504
1883static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2505static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
@@ -1902,7 +2524,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
1902 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2524 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1903 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2525 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
1904 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2526 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1905 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 2527 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
1906 2528
1907 mqd->cp_hqd_eop_control = tmp; 2529 mqd->cp_hqd_eop_control = tmp;
1908 2530
@@ -2119,47 +2741,69 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2119static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 2741static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2120{ 2742{
2121 struct amdgpu_device *adev = ring->adev; 2743 struct amdgpu_device *adev = ring->adev;
2122 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
2123 struct v9_mqd *mqd = ring->mqd_ptr; 2744 struct v9_mqd *mqd = ring->mqd_ptr;
2124 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
2125 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 2745 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
2126 2746
2127 if (is_kiq) { 2747 gfx_v9_0_kiq_setting(ring);
2128 gfx_v9_0_kiq_setting(&kiq->ring); 2748
2749 if (adev->gfx.in_reset) { /* for GPU_RESET case */
2750 /* reset MQD to a clean status */
2751 if (adev->gfx.mec.mqd_backup[mqd_idx])
2752 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
2753
2754 /* reset ring buffer */
2755 ring->wptr = 0;
2756 amdgpu_ring_clear_ring(ring);
2757
2758 mutex_lock(&adev->srbm_mutex);
2759 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2760 gfx_v9_0_kiq_init_register(ring);
2761 soc15_grbm_select(adev, 0, 0, 0, 0);
2762 mutex_unlock(&adev->srbm_mutex);
2129 } else { 2763 } else {
2130 mqd_idx = ring - &adev->gfx.compute_ring[0]; 2764 memset((void *)mqd, 0, sizeof(*mqd));
2765 mutex_lock(&adev->srbm_mutex);
2766 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2767 gfx_v9_0_mqd_init(ring);
2768 gfx_v9_0_kiq_init_register(ring);
2769 soc15_grbm_select(adev, 0, 0, 0, 0);
2770 mutex_unlock(&adev->srbm_mutex);
2771
2772 if (adev->gfx.mec.mqd_backup[mqd_idx])
2773 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2131 } 2774 }
2132 2775
2133 if (!adev->gfx.in_reset) { 2776 return 0;
2777}
2778
2779static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2780{
2781 struct amdgpu_device *adev = ring->adev;
2782 struct v9_mqd *mqd = ring->mqd_ptr;
2783 int mqd_idx = ring - &adev->gfx.compute_ring[0];
2784
2785 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
2134 memset((void *)mqd, 0, sizeof(*mqd)); 2786 memset((void *)mqd, 0, sizeof(*mqd));
2135 mutex_lock(&adev->srbm_mutex); 2787 mutex_lock(&adev->srbm_mutex);
2136 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2788 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2137 gfx_v9_0_mqd_init(ring); 2789 gfx_v9_0_mqd_init(ring);
2138 if (is_kiq)
2139 gfx_v9_0_kiq_init_register(ring);
2140 soc15_grbm_select(adev, 0, 0, 0, 0); 2790 soc15_grbm_select(adev, 0, 0, 0, 0);
2141 mutex_unlock(&adev->srbm_mutex); 2791 mutex_unlock(&adev->srbm_mutex);
2142 2792
2143 } else { /* for GPU_RESET case */ 2793 if (adev->gfx.mec.mqd_backup[mqd_idx])
2794 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2795 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
2144 /* reset MQD to a clean status */ 2796 /* reset MQD to a clean status */
2797 if (adev->gfx.mec.mqd_backup[mqd_idx])
2798 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
2145 2799
2146 /* reset ring buffer */ 2800 /* reset ring buffer */
2147 ring->wptr = 0; 2801 ring->wptr = 0;
2148 2802 amdgpu_ring_clear_ring(ring);
2149 if (is_kiq) { 2803 } else {
2150 mutex_lock(&adev->srbm_mutex); 2804 amdgpu_ring_clear_ring(ring);
2151 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2152 gfx_v9_0_kiq_init_register(ring);
2153 soc15_grbm_select(adev, 0, 0, 0, 0);
2154 mutex_unlock(&adev->srbm_mutex);
2155 }
2156 } 2805 }
2157 2806
2158 if (is_kiq)
2159 gfx_v9_0_kiq_enable(ring);
2160 else
2161 gfx_v9_0_map_queue_enable(&kiq->ring, ring);
2162
2163 return 0; 2807 return 0;
2164} 2808}
2165 2809
@@ -2194,7 +2838,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2194 goto done; 2838 goto done;
2195 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2839 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2196 if (!r) { 2840 if (!r) {
2197 r = gfx_v9_0_kiq_init_queue(ring); 2841 r = gfx_v9_0_kcq_init_queue(ring);
2198 amdgpu_bo_kunmap(ring->mqd_obj); 2842 amdgpu_bo_kunmap(ring->mqd_obj);
2199 ring->mqd_ptr = NULL; 2843 ring->mqd_ptr = NULL;
2200 } 2844 }
@@ -2203,13 +2847,14 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2203 goto done; 2847 goto done;
2204 } 2848 }
2205 2849
2850 r = gfx_v9_0_kiq_kcq_enable(adev);
2206done: 2851done:
2207 return r; 2852 return r;
2208} 2853}
2209 2854
2210static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 2855static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2211{ 2856{
2212 int r,i; 2857 int r, i;
2213 struct amdgpu_ring *ring; 2858 struct amdgpu_ring *ring;
2214 2859
2215 if (!(adev->flags & AMD_IS_APU)) 2860 if (!(adev->flags & AMD_IS_APU))
@@ -2230,10 +2875,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2230 if (r) 2875 if (r)
2231 return r; 2876 return r;
2232 2877
2233 if (amdgpu_sriov_vf(adev)) 2878 r = gfx_v9_0_kiq_resume(adev);
2234 r = gfx_v9_0_kiq_resume(adev);
2235 else
2236 r = gfx_v9_0_cp_compute_resume(adev);
2237 if (r) 2879 if (r)
2238 return r; 2880 return r;
2239 2881
@@ -2243,6 +2885,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2243 ring->ready = false; 2885 ring->ready = false;
2244 return r; 2886 return r;
2245 } 2887 }
2888
2889 ring = &adev->gfx.kiq.ring;
2890 ring->ready = true;
2891 r = amdgpu_ring_test_ring(ring);
2892 if (r)
2893 ring->ready = false;
2894
2246 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2895 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2247 ring = &adev->gfx.compute_ring[i]; 2896 ring = &adev->gfx.compute_ring[i];
2248 2897
@@ -2252,14 +2901,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2252 ring->ready = false; 2901 ring->ready = false;
2253 } 2902 }
2254 2903
2255 if (amdgpu_sriov_vf(adev)) {
2256 ring = &adev->gfx.kiq.ring;
2257 ring->ready = true;
2258 r = amdgpu_ring_test_ring(ring);
2259 if (r)
2260 ring->ready = false;
2261 }
2262
2263 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2904 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2264 2905
2265 return 0; 2906 return 0;
@@ -2307,7 +2948,6 @@ static int gfx_v9_0_hw_fini(void *handle)
2307 } 2948 }
2308 gfx_v9_0_cp_enable(adev, false); 2949 gfx_v9_0_cp_enable(adev, false);
2309 gfx_v9_0_rlc_stop(adev); 2950 gfx_v9_0_rlc_stop(adev);
2310 gfx_v9_0_cp_compute_fini(adev);
2311 2951
2312 return 0; 2952 return 0;
2313} 2953}
@@ -2316,14 +2956,18 @@ static int gfx_v9_0_suspend(void *handle)
2316{ 2956{
2317 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2957 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2318 2958
2959 adev->gfx.in_suspend = true;
2319 return gfx_v9_0_hw_fini(adev); 2960 return gfx_v9_0_hw_fini(adev);
2320} 2961}
2321 2962
2322static int gfx_v9_0_resume(void *handle) 2963static int gfx_v9_0_resume(void *handle)
2323{ 2964{
2324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2965 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2966 int r;
2325 2967
2326 return gfx_v9_0_hw_init(adev); 2968 r = gfx_v9_0_hw_init(adev);
2969 adev->gfx.in_suspend = false;
2970 return r;
2327} 2971}
2328 2972
2329static bool gfx_v9_0_is_idle(void *handle) 2973static bool gfx_v9_0_is_idle(void *handle)
@@ -2470,7 +3114,7 @@ static int gfx_v9_0_early_init(void *handle)
2470 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3114 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2471 3115
2472 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3116 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
2473 adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; 3117 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
2474 gfx_v9_0_set_ring_funcs(adev); 3118 gfx_v9_0_set_ring_funcs(adev);
2475 gfx_v9_0_set_irq_funcs(adev); 3119 gfx_v9_0_set_irq_funcs(adev);
2476 gfx_v9_0_set_gds_init(adev); 3120 gfx_v9_0_set_gds_init(adev);
@@ -2549,6 +3193,43 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
2549 } 3193 }
2550} 3194}
2551 3195
3196static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3197 bool enable)
3198{
3199 /* TODO: double check if we need to perform under safe mdoe */
3200 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3201
3202 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3203 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3204 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3205 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3206 } else {
3207 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3208 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3209 }
3210
3211 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3212}
3213
3214static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3215 bool enable)
3216{
3217 /* TODO: double check if we need to perform under safe mode */
3218 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3219
3220 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3221 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3222 else
3223 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3224
3225 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3226 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3227 else
3228 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3229
3230 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
3231}
3232
2552static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3233static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
2553 bool enable) 3234 bool enable)
2554{ 3235{
@@ -2739,6 +3420,34 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
2739static int gfx_v9_0_set_powergating_state(void *handle, 3420static int gfx_v9_0_set_powergating_state(void *handle,
2740 enum amd_powergating_state state) 3421 enum amd_powergating_state state)
2741{ 3422{
3423 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3424 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
3425
3426 switch (adev->asic_type) {
3427 case CHIP_RAVEN:
3428 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3429 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
3430 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
3431 } else {
3432 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
3433 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
3434 }
3435
3436 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3437 gfx_v9_0_enable_cp_power_gating(adev, true);
3438 else
3439 gfx_v9_0_enable_cp_power_gating(adev, false);
3440
3441 /* update gfx cgpg state */
3442 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
3443
3444 /* update mgcg state */
3445 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
3446 break;
3447 default:
3448 break;
3449 }
3450
2742 return 0; 3451 return 0;
2743} 3452}
2744 3453
@@ -2752,6 +3461,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
2752 3461
2753 switch (adev->asic_type) { 3462 switch (adev->asic_type) {
2754 case CHIP_VEGA10: 3463 case CHIP_VEGA10:
3464 case CHIP_RAVEN:
2755 gfx_v9_0_update_gfx_clock_gating(adev, 3465 gfx_v9_0_update_gfx_clock_gating(adev,
2756 state == AMD_CG_STATE_GATE ? true : false); 3466 state == AMD_CG_STATE_GATE ? true : false);
2757 break; 3467 break;
@@ -2879,31 +3589,33 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2879 struct amdgpu_ib *ib, 3589 struct amdgpu_ib *ib,
2880 unsigned vm_id, bool ctx_switch) 3590 unsigned vm_id, bool ctx_switch)
2881{ 3591{
2882 u32 header, control = 0; 3592 u32 header, control = 0;
2883 3593
2884 if (ib->flags & AMDGPU_IB_FLAG_CE) 3594 if (ib->flags & AMDGPU_IB_FLAG_CE)
2885 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 3595 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2886 else 3596 else
2887 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 3597 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2888 3598
2889 control |= ib->length_dw | (vm_id << 24); 3599 control |= ib->length_dw | (vm_id << 24);
2890 3600
2891 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) 3601 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
2892 control |= INDIRECT_BUFFER_PRE_ENB(1); 3602 control |= INDIRECT_BUFFER_PRE_ENB(1);
2893 3603
2894 amdgpu_ring_write(ring, header); 3604 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
2895 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3605 gfx_v9_0_ring_emit_de_meta(ring);
2896 amdgpu_ring_write(ring, 3606 }
3607
3608 amdgpu_ring_write(ring, header);
3609BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3610 amdgpu_ring_write(ring,
2897#ifdef __BIG_ENDIAN 3611#ifdef __BIG_ENDIAN
2898 (2 << 0) | 3612 (2 << 0) |
2899#endif 3613#endif
2900 lower_32_bits(ib->gpu_addr)); 3614 lower_32_bits(ib->gpu_addr));
2901 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 3615 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
2902 amdgpu_ring_write(ring, control); 3616 amdgpu_ring_write(ring, control);
2903} 3617}
2904 3618
2905#define INDIRECT_BUFFER_VALID (1 << 23)
2906
2907static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 3619static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2908 struct amdgpu_ib *ib, 3620 struct amdgpu_ib *ib,
2909 unsigned vm_id, bool ctx_switch) 3621 unsigned vm_id, bool ctx_switch)
@@ -2971,9 +3683,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
2971 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 3683 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
2972 unsigned eng = ring->vm_inv_eng; 3684 unsigned eng = ring->vm_inv_eng;
2973 3685
2974 pd_addr = pd_addr | 0x1; /* valid bit */ 3686 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
2975 /* now only use physical base address of PDE and valid */ 3687 pd_addr |= AMDGPU_PTE_VALID;
2976 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
2977 3688
2978 gfx_v9_0_write_data_to_reg(ring, usepfp, true, 3689 gfx_v9_0_write_data_to_reg(ring, usepfp, true,
2979 hub->ctx0_ptb_addr_lo32 + (2 * vm_id), 3690 hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
@@ -3130,9 +3841,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
3130 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3841 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3131 amdgpu_ring_write(ring, dw2); 3842 amdgpu_ring_write(ring, dw2);
3132 amdgpu_ring_write(ring, 0); 3843 amdgpu_ring_write(ring, 0);
3133
3134 if (amdgpu_sriov_vf(ring->adev))
3135 gfx_v9_0_ring_emit_de_meta(ring);
3136} 3844}
3137 3845
3138static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 3846static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
@@ -3160,6 +3868,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
3160 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 3868 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
3161} 3869}
3162 3870
3871static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
3872{
3873 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
3874 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
3875}
3876
3163static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 3877static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
3164{ 3878{
3165 struct amdgpu_device *adev = ring->adev; 3879 struct amdgpu_device *adev = ring->adev;
@@ -3208,8 +3922,8 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3208 u32 mec_int_cntl, mec_int_cntl_reg; 3922 u32 mec_int_cntl, mec_int_cntl_reg;
3209 3923
3210 /* 3924 /*
3211 * amdgpu controls only pipe 0 of MEC1. That's why this function only 3925 * amdgpu controls only the first MEC. That's why this function only
3212 * handles the setting of interrupts for this specific pipe. All other 3926 * handles the setting of interrupts for this specific MEC. All other
3213 * pipes' interrupts are set by amdkfd. 3927 * pipes' interrupts are set by amdkfd.
3214 */ 3928 */
3215 3929
@@ -3218,6 +3932,15 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3218 case 0: 3932 case 0:
3219 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 3933 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
3220 break; 3934 break;
3935 case 1:
3936 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
3937 break;
3938 case 2:
3939 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
3940 break;
3941 case 3:
3942 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
3943 break;
3221 default: 3944 default:
3222 DRM_DEBUG("invalid pipe %d\n", pipe); 3945 DRM_DEBUG("invalid pipe %d\n", pipe);
3223 return; 3946 return;
@@ -3494,6 +4217,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
3494 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 4217 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
3495 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 4218 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
3496 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 4219 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
4220 .emit_tmz = gfx_v9_0_ring_emit_tmz,
3497}; 4221};
3498 4222
3499static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 4223static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -3605,6 +4329,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
3605{ 4329{
3606 switch (adev->asic_type) { 4330 switch (adev->asic_type) {
3607 case CHIP_VEGA10: 4331 case CHIP_VEGA10:
4332 case CHIP_RAVEN:
3608 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 4333 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
3609 break; 4334 break;
3610 default: 4335 default:
@@ -3640,6 +4365,20 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
3640 } 4365 }
3641} 4366}
3642 4367
4368static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
4369 u32 bitmap)
4370{
4371 u32 data;
4372
4373 if (!bitmap)
4374 return;
4375
4376 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4377 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4378
4379 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
4380}
4381
3643static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 4382static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3644{ 4383{
3645 u32 data, mask; 4384 u32 data, mask;
@@ -3650,7 +4389,7 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3650 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 4389 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3651 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 4390 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3652 4391
3653 mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 4392 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
3654 4393
3655 return (~data) & mask; 4394 return (~data) & mask;
3656} 4395}
@@ -3660,11 +4399,12 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
3660{ 4399{
3661 int i, j, k, counter, active_cu_number = 0; 4400 int i, j, k, counter, active_cu_number = 0;
3662 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 4401 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4402 unsigned disable_masks[4 * 2];
3663 4403
3664 if (!adev || !cu_info) 4404 if (!adev || !cu_info)
3665 return -EINVAL; 4405 return -EINVAL;
3666 4406
3667 memset(cu_info, 0, sizeof(*cu_info)); 4407 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
3668 4408
3669 mutex_lock(&adev->grbm_idx_mutex); 4409 mutex_lock(&adev->grbm_idx_mutex);
3670 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 4410 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -3673,19 +4413,24 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
3673 ao_bitmap = 0; 4413 ao_bitmap = 0;
3674 counter = 0; 4414 counter = 0;
3675 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 4415 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
4416 if (i < 4 && j < 2)
4417 gfx_v9_0_set_user_cu_inactive_bitmap(
4418 adev, disable_masks[i * 2 + j]);
3676 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 4419 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
3677 cu_info->bitmap[i][j] = bitmap; 4420 cu_info->bitmap[i][j] = bitmap;
3678 4421
3679 for (k = 0; k < 16; k ++) { 4422 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
3680 if (bitmap & mask) { 4423 if (bitmap & mask) {
3681 if (counter < 2) 4424 if (counter < adev->gfx.config.max_cu_per_sh)
3682 ao_bitmap |= mask; 4425 ao_bitmap |= mask;
3683 counter ++; 4426 counter ++;
3684 } 4427 }
3685 mask <<= 1; 4428 mask <<= 1;
3686 } 4429 }
3687 active_cu_number += counter; 4430 active_cu_number += counter;
3688 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 4431 if (i < 2 && j < 2)
4432 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4433 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
3689 } 4434 }
3690 } 4435 }
3691 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 4436 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -3697,218 +4442,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
3697 return 0; 4442 return 0;
3698} 4443}
3699 4444
3700static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
3701{
3702 int r, j;
3703 u32 tmp;
3704 bool use_doorbell = true;
3705 u64 hqd_gpu_addr;
3706 u64 mqd_gpu_addr;
3707 u64 eop_gpu_addr;
3708 u64 wb_gpu_addr;
3709 u32 *buf;
3710 struct v9_mqd *mqd;
3711 struct amdgpu_device *adev;
3712
3713 adev = ring->adev;
3714 if (ring->mqd_obj == NULL) {
3715 r = amdgpu_bo_create(adev,
3716 sizeof(struct v9_mqd),
3717 PAGE_SIZE,true,
3718 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3719 NULL, &ring->mqd_obj);
3720 if (r) {
3721 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3722 return r;
3723 }
3724 }
3725
3726 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3727 if (unlikely(r != 0)) {
3728 gfx_v9_0_cp_compute_fini(adev);
3729 return r;
3730 }
3731
3732 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3733 &mqd_gpu_addr);
3734 if (r) {
3735 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3736 gfx_v9_0_cp_compute_fini(adev);
3737 return r;
3738 }
3739 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3740 if (r) {
3741 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3742 gfx_v9_0_cp_compute_fini(adev);
3743 return r;
3744 }
3745
3746 /* init the mqd struct */
3747 memset(buf, 0, sizeof(struct v9_mqd));
3748
3749 mqd = (struct v9_mqd *)buf;
3750 mqd->header = 0xC0310800;
3751 mqd->compute_pipelinestat_enable = 0x00000001;
3752 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3753 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3754 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3755 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3756 mqd->compute_misc_reserved = 0x00000003;
3757 mutex_lock(&adev->srbm_mutex);
3758 soc15_grbm_select(adev, ring->me,
3759 ring->pipe,
3760 ring->queue, 0);
3761 /* disable wptr polling */
3762 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3763
3764 /* write the EOP addr */
3765 BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
3766 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
3767 eop_gpu_addr >>= 8;
3768
3769 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
3770 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3771 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
3772 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
3773
3774 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3775 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3776 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3777 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3778 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
3779
3780 /* enable doorbell? */
3781 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3782 if (use_doorbell)
3783 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3784 else
3785 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3786
3787 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3788 mqd->cp_hqd_pq_doorbell_control = tmp;
3789
3790 /* disable the queue if it's active */
3791 ring->wptr = 0;
3792 mqd->cp_hqd_dequeue_request = 0;
3793 mqd->cp_hqd_pq_rptr = 0;
3794 mqd->cp_hqd_pq_wptr_lo = 0;
3795 mqd->cp_hqd_pq_wptr_hi = 0;
3796 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3797 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3798 for (j = 0; j < adev->usec_timeout; j++) {
3799 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3800 break;
3801 udelay(1);
3802 }
3803 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3804 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3805 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
3806 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
3807 }
3808
3809 /* set the pointer to the MQD */
3810 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3811 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3812 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3813 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3814
3815 /* set MQD vmid to 0 */
3816 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3817 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3818 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
3819 mqd->cp_mqd_control = tmp;
3820
3821 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3822 hqd_gpu_addr = ring->gpu_addr >> 8;
3823 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3824 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3825 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3826 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3827
3828 /* set up the HQD, this is similar to CP_RB0_CNTL */
3829 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3830 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3831 (order_base_2(ring->ring_size / 4) - 1));
3832 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3833 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3834#ifdef __BIG_ENDIAN
3835 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3836#endif
3837 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3838 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3839 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3840 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3841 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
3842 mqd->cp_hqd_pq_control = tmp;
3843
3844 /* set the wb address wether it's enabled or not */
3845 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3846 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3847 mqd->cp_hqd_pq_rptr_report_addr_hi =
3848 upper_32_bits(wb_gpu_addr) & 0xffff;
3849 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3850 mqd->cp_hqd_pq_rptr_report_addr_lo);
3851 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3852 mqd->cp_hqd_pq_rptr_report_addr_hi);
3853
3854 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3855 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3856 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3857 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3858 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3859 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3860 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3861 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3862
3863 /* enable the doorbell if requested */
3864 if (use_doorbell) {
3865 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3866 (AMDGPU_DOORBELL64_KIQ * 2) << 2);
3867 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3868 (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
3869 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3870 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3871 DOORBELL_OFFSET, ring->doorbell_index);
3872 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3873 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3874 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3875 mqd->cp_hqd_pq_doorbell_control = tmp;
3876
3877 } else {
3878 mqd->cp_hqd_pq_doorbell_control = 0;
3879 }
3880 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3881 mqd->cp_hqd_pq_doorbell_control);
3882
3883 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3884 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
3885 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
3886
3887 /* set the vmid for the queue */
3888 mqd->cp_hqd_vmid = 0;
3889 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3890
3891 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3892 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3893 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
3894 mqd->cp_hqd_persistent_state = tmp;
3895
3896 /* activate the queue */
3897 mqd->cp_hqd_active = 1;
3898 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3899
3900 soc15_grbm_select(adev, 0, 0, 0, 0);
3901 mutex_unlock(&adev->srbm_mutex);
3902
3903 amdgpu_bo_kunmap(ring->mqd_obj);
3904 amdgpu_bo_unreserve(ring->mqd_obj);
3905
3906 if (use_doorbell)
3907 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3908
3909 return 0;
3910}
3911
3912const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 4445const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
3913{ 4446{
3914 .type = AMD_IP_BLOCK_TYPE_GFX, 4447 .type = AMD_IP_BLOCK_TYPE_GFX,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 005075ff00f7..a42f483767e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -31,178 +31,161 @@
31 31
32#include "soc15_common.h" 32#include "soc15_common.h"
33 33
34int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) 34u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
35{ 35{
36 u32 tmp; 36 return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
37 u64 value; 37}
38 u32 i;
39 38
40 /* Program MC. */ 39static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
41 /* Update configuration */ 40{
42 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), 41 uint64_t value;
43 adev->mc.vram_start >> 18);
44 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
45 adev->mc.vram_end >> 18);
46 42
47 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start 43 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
44 value = adev->gart.table_addr - adev->mc.vram_start
48 + adev->vm_manager.vram_base_offset; 45 + adev->vm_manager.vram_base_offset;
49 WREG32(SOC15_REG_OFFSET(GC, 0, 46 value &= 0x0000FFFFFFFFF000ULL;
50 mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), 47 value |= 0x1; /*valid bit*/
51 (u32)(value >> 12));
52 WREG32(SOC15_REG_OFFSET(GC, 0,
53 mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
54 (u32)(value >> 44));
55 48
56 if (amdgpu_sriov_vf(adev)) { 49 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
57 /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so 50 lower_32_bits(value));
58 vbios post doesn't program them, for SRIOV driver need to program them */ 51
59 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE), 52 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
60 adev->mc.vram_start >> 24); 53 upper_32_bits(value));
61 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP), 54}
62 adev->mc.vram_end >> 24); 55
63 } 56static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
57{
58 gfxhub_v1_0_init_gart_pt_regs(adev);
59
60 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
61 (u32)(adev->mc.gtt_start >> 12));
62 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
63 (u32)(adev->mc.gtt_start >> 44));
64
65 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
66 (u32)(adev->mc.gtt_end >> 12));
67 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
68 (u32)(adev->mc.gtt_end >> 44));
69}
70
71static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
72{
73 uint64_t value;
64 74
65 /* Disable AGP. */ 75 /* Disable AGP. */
66 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); 76 WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
67 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); 77 WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
68 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); 78 WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF);
69 79
70 /* GART Enable. */ 80 /* Program the system aperture low logical page number. */
81 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
82 adev->mc.vram_start >> 18);
83 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
84 adev->mc.vram_end >> 18);
85
86 /* Set default page address. */
87 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
88 + adev->vm_manager.vram_base_offset;
89 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
90 (u32)(value >> 12));
91 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
92 (u32)(value >> 44));
93
94 /* Program "protection fault". */
95 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
96 (u32)(adev->dummy_page.addr >> 12));
97 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
98 (u32)((u64)adev->dummy_page.addr >> 44));
99
100 WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
101 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
102}
103
104static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
105{
106 uint32_t tmp;
71 107
72 /* Setup TLB control */ 108 /* Setup TLB control */
73 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); 109 tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
110
74 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); 111 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
75 tmp = REG_SET_FIELD(tmp, 112 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
76 MC_VM_MX_L1_TLB_CNTL, 113 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
77 SYSTEM_ACCESS_MODE, 114 ENABLE_ADVANCED_DRIVER_MODEL, 1);
78 3); 115 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
79 tmp = REG_SET_FIELD(tmp, 116 SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
80 MC_VM_MX_L1_TLB_CNTL, 117 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
81 ENABLE_ADVANCED_DRIVER_MODEL, 118 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
82 1); 119 MTYPE, MTYPE_UC);/* XXX for emulation. */
83 tmp = REG_SET_FIELD(tmp, 120 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
84 MC_VM_MX_L1_TLB_CNTL, 121
85 SYSTEM_APERTURE_UNMAPPED_ACCESS, 122 WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
86 0); 123}
87 tmp = REG_SET_FIELD(tmp, 124
88 MC_VM_MX_L1_TLB_CNTL, 125static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
89 ECO_BITS, 126{
90 0); 127 uint32_t tmp;
91 tmp = REG_SET_FIELD(tmp,
92 MC_VM_MX_L1_TLB_CNTL,
93 MTYPE,
94 MTYPE_UC);/* XXX for emulation. */
95 tmp = REG_SET_FIELD(tmp,
96 MC_VM_MX_L1_TLB_CNTL,
97 ATC_EN,
98 1);
99 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
100 128
101 /* Setup L2 cache */ 129 /* Setup L2 cache */
102 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); 130 tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL);
103 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); 131 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
104 tmp = REG_SET_FIELD(tmp, 132 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
105 VM_L2_CNTL, 133 /* XXX for emulation, Refer to closed source code.*/
106 ENABLE_L2_FRAGMENT_PROCESSING, 134 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
107 0); 135 0);
108 tmp = REG_SET_FIELD(tmp,
109 VM_L2_CNTL,
110 L2_PDE0_CACHE_TAG_GENERATION_MODE,
111 0);/* XXX for emulation, Refer to closed source code.*/
112 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); 136 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
113 tmp = REG_SET_FIELD(tmp, 137 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
114 VM_L2_CNTL, 138 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
115 CONTEXT1_IDENTITY_ACCESS_MODE, 139 WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
116 1);
117 tmp = REG_SET_FIELD(tmp,
118 VM_L2_CNTL,
119 IDENTITY_MODE_FRAGMENT_SIZE,
120 0);
121 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
122 140
123 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); 141 tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
124 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 142 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
125 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 143 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
126 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); 144 WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
127 145
128 tmp = mmVM_L2_CNTL3_DEFAULT; 146 tmp = mmVM_L2_CNTL3_DEFAULT;
129 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); 147 WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
130 148
131 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); 149 tmp = mmVM_L2_CNTL4_DEFAULT;
132 tmp = REG_SET_FIELD(tmp, 150 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
133 VM_L2_CNTL4, 151 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
134 VMC_TAP_PDE_REQUEST_PHYSICAL, 152 WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
135 0); 153}
136 tmp = REG_SET_FIELD(tmp,
137 VM_L2_CNTL4,
138 VMC_TAP_PTE_REQUEST_PHYSICAL,
139 0);
140 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp);
141
142 /* setup context0 */
143 WREG32(SOC15_REG_OFFSET(GC, 0,
144 mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
145 (u32)(adev->mc.gtt_start >> 12));
146 WREG32(SOC15_REG_OFFSET(GC, 0,
147 mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
148 (u32)(adev->mc.gtt_start >> 44));
149
150 WREG32(SOC15_REG_OFFSET(GC, 0,
151 mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
152 (u32)(adev->mc.gtt_end >> 12));
153 WREG32(SOC15_REG_OFFSET(GC, 0,
154 mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
155 (u32)(adev->mc.gtt_end >> 44));
156 154
157 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); 155static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
158 value = adev->gart.table_addr - adev->mc.vram_start 156{
159 + adev->vm_manager.vram_base_offset; 157 uint32_t tmp;
160 value &= 0x0000FFFFFFFFF000ULL;
161 value |= 0x1; /*valid bit*/
162 158
163 WREG32(SOC15_REG_OFFSET(GC, 0, 159 tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL);
164 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32),
165 (u32)value);
166 WREG32(SOC15_REG_OFFSET(GC, 0,
167 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32),
168 (u32)(value >> 32));
169
170 WREG32(SOC15_REG_OFFSET(GC, 0,
171 mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
172 (u32)(adev->dummy_page.addr >> 12));
173 WREG32(SOC15_REG_OFFSET(GC, 0,
174 mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
175 (u32)((u64)adev->dummy_page.addr >> 44));
176
177 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
178 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
179 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
180 1);
181 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
182
183 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL));
184 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); 160 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
185 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); 161 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
186 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); 162 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp);
163}
164
165static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
166{
167 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
168 0XFFFFFFFF);
169 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
170 0x0000000F);
171
172 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
173 0);
174 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
175 0);
187 176
188 /* Disable identity aperture.*/ 177 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
189 WREG32(SOC15_REG_OFFSET(GC, 0, 178 WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
190 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
191 WREG32(SOC15_REG_OFFSET(GC, 0,
192 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
193 179
194 WREG32(SOC15_REG_OFFSET(GC, 0, 180}
195 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0);
196 WREG32(SOC15_REG_OFFSET(GC, 0,
197 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0);
198 181
199 WREG32(SOC15_REG_OFFSET(GC, 0, 182static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
200 mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); 183{
201 WREG32(SOC15_REG_OFFSET(GC, 0, 184 int i;
202 mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); 185 uint32_t tmp;
203 186
204 for (i = 0; i <= 14; i++) { 187 for (i = 0; i <= 14; i++) {
205 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); 188 tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
206 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); 189 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
207 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 190 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
208 adev->vm_manager.num_level); 191 adev->vm_manager.num_level);
@@ -223,15 +206,52 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
223 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 206 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
224 PAGE_TABLE_BLOCK_SIZE, 207 PAGE_TABLE_BLOCK_SIZE,
225 adev->vm_manager.block_size - 9); 208 adev->vm_manager.block_size - 9);
226 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); 209 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp);
227 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); 210 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
228 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); 211 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
229 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, 212 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
230 lower_32_bits(adev->vm_manager.max_pfn - 1)); 213 lower_32_bits(adev->vm_manager.max_pfn - 1));
231 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 214 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
232 upper_32_bits(adev->vm_manager.max_pfn - 1)); 215 upper_32_bits(adev->vm_manager.max_pfn - 1));
233 } 216 }
217}
218
219static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)
220{
221 unsigned i;
222
223 for (i = 0 ; i < 18; ++i) {
224 WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
225 2 * i, 0xffffffff);
226 WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
227 2 * i, 0x1f);
228 }
229}
230
231int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
232{
233 if (amdgpu_sriov_vf(adev)) {
234 /*
235 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
236 * VF copy registers so vbios post doesn't program them, for
237 * SRIOV driver need to program them
238 */
239 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
240 adev->mc.vram_start >> 24);
241 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
242 adev->mc.vram_end >> 24);
243 }
234 244
245 /* GART Enable. */
246 gfxhub_v1_0_init_gart_aperture_regs(adev);
247 gfxhub_v1_0_init_system_aperture_regs(adev);
248 gfxhub_v1_0_init_tlb_regs(adev);
249 gfxhub_v1_0_init_cache_regs(adev);
250
251 gfxhub_v1_0_enable_system_domain(adev);
252 gfxhub_v1_0_disable_identity_aperture(adev);
253 gfxhub_v1_0_setup_vmid_config(adev);
254 gfxhub_v1_0_program_invalidation(adev);
235 255
236 return 0; 256 return 0;
237} 257}
@@ -243,22 +263,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
243 263
244 /* Disable all tables */ 264 /* Disable all tables */
245 for (i = 0; i < 16; i++) 265 for (i = 0; i < 16; i++)
246 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); 266 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0);
247 267
248 /* Setup TLB control */ 268 /* Setup TLB control */
249 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); 269 tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
250 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); 270 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
251 tmp = REG_SET_FIELD(tmp, 271 tmp = REG_SET_FIELD(tmp,
252 MC_VM_MX_L1_TLB_CNTL, 272 MC_VM_MX_L1_TLB_CNTL,
253 ENABLE_ADVANCED_DRIVER_MODEL, 273 ENABLE_ADVANCED_DRIVER_MODEL,
254 0); 274 0);
255 WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); 275 WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
256 276
257 /* Setup L2 cache */ 277 /* Setup L2 cache */
258 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); 278 WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
259 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 279 WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0);
260 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp);
261 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0);
262} 280}
263 281
264/** 282/**
@@ -271,7 +289,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
271 bool value) 289 bool value)
272{ 290{
273 u32 tmp; 291 u32 tmp;
274 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); 292 tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
275 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 293 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
276 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 294 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
277 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 295 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@@ -296,22 +314,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
296 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 314 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
297 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 315 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
298 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 316 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
299 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); 317 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
300}
301
302static int gfxhub_v1_0_early_init(void *handle)
303{
304 return 0;
305}
306
307static int gfxhub_v1_0_late_init(void *handle)
308{
309 return 0;
310} 318}
311 319
312static int gfxhub_v1_0_sw_init(void *handle) 320void gfxhub_v1_0_init(struct amdgpu_device *adev)
313{ 321{
314 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
315 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; 322 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
316 323
317 hub->ctx0_ptb_addr_lo32 = 324 hub->ctx0_ptb_addr_lo32 =
@@ -330,96 +337,4 @@ static int gfxhub_v1_0_sw_init(void *handle)
330 SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); 337 SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS);
331 hub->vm_l2_pro_fault_cntl = 338 hub->vm_l2_pro_fault_cntl =
332 SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); 339 SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
333
334 return 0;
335}
336
337static int gfxhub_v1_0_sw_fini(void *handle)
338{
339 return 0;
340} 340}
341
342static int gfxhub_v1_0_hw_init(void *handle)
343{
344 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
345 unsigned i;
346
347 for (i = 0 ; i < 18; ++i) {
348 WREG32(SOC15_REG_OFFSET(GC, 0,
349 mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
350 2 * i, 0xffffffff);
351 WREG32(SOC15_REG_OFFSET(GC, 0,
352 mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
353 2 * i, 0x1f);
354 }
355
356 return 0;
357}
358
359static int gfxhub_v1_0_hw_fini(void *handle)
360{
361 return 0;
362}
363
364static int gfxhub_v1_0_suspend(void *handle)
365{
366 return 0;
367}
368
369static int gfxhub_v1_0_resume(void *handle)
370{
371 return 0;
372}
373
374static bool gfxhub_v1_0_is_idle(void *handle)
375{
376 return true;
377}
378
379static int gfxhub_v1_0_wait_for_idle(void *handle)
380{
381 return 0;
382}
383
384static int gfxhub_v1_0_soft_reset(void *handle)
385{
386 return 0;
387}
388
389static int gfxhub_v1_0_set_clockgating_state(void *handle,
390 enum amd_clockgating_state state)
391{
392 return 0;
393}
394
395static int gfxhub_v1_0_set_powergating_state(void *handle,
396 enum amd_powergating_state state)
397{
398 return 0;
399}
400
401const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = {
402 .name = "gfxhub_v1_0",
403 .early_init = gfxhub_v1_0_early_init,
404 .late_init = gfxhub_v1_0_late_init,
405 .sw_init = gfxhub_v1_0_sw_init,
406 .sw_fini = gfxhub_v1_0_sw_fini,
407 .hw_init = gfxhub_v1_0_hw_init,
408 .hw_fini = gfxhub_v1_0_hw_fini,
409 .suspend = gfxhub_v1_0_suspend,
410 .resume = gfxhub_v1_0_resume,
411 .is_idle = gfxhub_v1_0_is_idle,
412 .wait_for_idle = gfxhub_v1_0_wait_for_idle,
413 .soft_reset = gfxhub_v1_0_soft_reset,
414 .set_clockgating_state = gfxhub_v1_0_set_clockgating_state,
415 .set_powergating_state = gfxhub_v1_0_set_powergating_state,
416};
417
418const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block =
419{
420 .type = AMD_IP_BLOCK_TYPE_GFXHUB,
421 .major = 1,
422 .minor = 0,
423 .rev = 0,
424 .funcs = &gfxhub_v1_0_ip_funcs,
425};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
index 5129a8ff0932..d2dbb085f480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
@@ -28,7 +28,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev);
28void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); 28void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev);
29void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, 29void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
30 bool value); 30 bool value);
31 31void gfxhub_v1_0_init(struct amdgpu_device *adev);
32u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
32extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; 33extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs;
33extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; 34extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block;
34 35
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index a572979f186c..d0214d942bfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "gmc_v6_0.h" 26#include "gmc_v6_0.h"
27#include "amdgpu_ucode.h" 27#include "amdgpu_ucode.h"
@@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev,
395 return pte_flag; 395 return pte_flag;
396} 396}
397 397
398static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
399{
400 BUG_ON(addr & 0xFFFFFF0000000FFFULL);
401 return addr;
402}
403
398static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, 404static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
399 bool value) 405 bool value)
400{ 406{
@@ -614,33 +620,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
614 amdgpu_gart_fini(adev); 620 amdgpu_gart_fini(adev);
615} 621}
616 622
617static int gmc_v6_0_vm_init(struct amdgpu_device *adev)
618{
619 /*
620 * number of VMs
621 * VMID 0 is reserved for System
622 * amdgpu graphics/compute will use VMIDs 1-7
623 * amdkfd will use VMIDs 8-15
624 */
625 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
626 adev->vm_manager.num_level = 1;
627 amdgpu_vm_manager_init(adev);
628
629 /* base offset of vram pages */
630 if (adev->flags & AMD_IS_APU) {
631 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
632 tmp <<= 22;
633 adev->vm_manager.vram_base_offset = tmp;
634 } else
635 adev->vm_manager.vram_base_offset = 0;
636
637 return 0;
638}
639
640static void gmc_v6_0_vm_fini(struct amdgpu_device *adev)
641{
642}
643
644static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, 623static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
645 u32 status, u32 addr, u32 mc_client) 624 u32 status, u32 addr, u32 mc_client)
646{ 625{
@@ -815,14 +794,6 @@ static int gmc_v6_0_early_init(void *handle)
815 gmc_v6_0_set_gart_funcs(adev); 794 gmc_v6_0_set_gart_funcs(adev);
816 gmc_v6_0_set_irq_funcs(adev); 795 gmc_v6_0_set_irq_funcs(adev);
817 796
818 if (adev->flags & AMD_IS_APU) {
819 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
820 } else {
821 u32 tmp = RREG32(mmMC_SEQ_MISC0);
822 tmp &= MC_SEQ_MISC0__MT__MASK;
823 adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
824 }
825
826 return 0; 797 return 0;
827} 798}
828 799
@@ -842,6 +813,14 @@ static int gmc_v6_0_sw_init(void *handle)
842 int dma_bits; 813 int dma_bits;
843 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 814 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
844 815
816 if (adev->flags & AMD_IS_APU) {
817 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
818 } else {
819 u32 tmp = RREG32(mmMC_SEQ_MISC0);
820 tmp &= MC_SEQ_MISC0__MT__MASK;
821 adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
822 }
823
845 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); 824 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
846 if (r) 825 if (r)
847 return r; 826 return r;
@@ -855,6 +834,8 @@ static int gmc_v6_0_sw_init(void *handle)
855 834
856 adev->mc.mc_mask = 0xffffffffffULL; 835 adev->mc.mc_mask = 0xffffffffffULL;
857 836
837 adev->mc.stolen_size = 256 * 1024;
838
858 adev->need_dma32 = false; 839 adev->need_dma32 = false;
859 dma_bits = adev->need_dma32 ? 32 : 40; 840 dma_bits = adev->need_dma32 ? 32 : 40;
860 r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 841 r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -887,26 +868,34 @@ static int gmc_v6_0_sw_init(void *handle)
887 if (r) 868 if (r)
888 return r; 869 return r;
889 870
890 if (!adev->vm_manager.enabled) { 871 /*
891 r = gmc_v6_0_vm_init(adev); 872 * number of VMs
892 if (r) { 873 * VMID 0 is reserved for System
893 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); 874 * amdgpu graphics/compute will use VMIDs 1-7
894 return r; 875 * amdkfd will use VMIDs 8-15
895 } 876 */
896 adev->vm_manager.enabled = true; 877 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
878 adev->vm_manager.num_level = 1;
879 amdgpu_vm_manager_init(adev);
880
881 /* base offset of vram pages */
882 if (adev->flags & AMD_IS_APU) {
883 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
884
885 tmp <<= 22;
886 adev->vm_manager.vram_base_offset = tmp;
887 } else {
888 adev->vm_manager.vram_base_offset = 0;
897 } 889 }
898 890
899 return r; 891 return 0;
900} 892}
901 893
902static int gmc_v6_0_sw_fini(void *handle) 894static int gmc_v6_0_sw_fini(void *handle)
903{ 895{
904 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
905 897
906 if (adev->vm_manager.enabled) { 898 amdgpu_vm_manager_fini(adev);
907 gmc_v6_0_vm_fini(adev);
908 adev->vm_manager.enabled = false;
909 }
910 gmc_v6_0_gart_fini(adev); 899 gmc_v6_0_gart_fini(adev);
911 amdgpu_gem_force_release(adev); 900 amdgpu_gem_force_release(adev);
912 amdgpu_bo_fini(adev); 901 amdgpu_bo_fini(adev);
@@ -950,10 +939,6 @@ static int gmc_v6_0_suspend(void *handle)
950{ 939{
951 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 940 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
952 941
953 if (adev->vm_manager.enabled) {
954 gmc_v6_0_vm_fini(adev);
955 adev->vm_manager.enabled = false;
956 }
957 gmc_v6_0_hw_fini(adev); 942 gmc_v6_0_hw_fini(adev);
958 943
959 return 0; 944 return 0;
@@ -968,16 +953,9 @@ static int gmc_v6_0_resume(void *handle)
968 if (r) 953 if (r)
969 return r; 954 return r;
970 955
971 if (!adev->vm_manager.enabled) { 956 amdgpu_vm_reset_all_ids(adev);
972 r = gmc_v6_0_vm_init(adev);
973 if (r) {
974 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
975 return r;
976 }
977 adev->vm_manager.enabled = true;
978 }
979 957
980 return r; 958 return 0;
981} 959}
982 960
983static bool gmc_v6_0_is_idle(void *handle) 961static bool gmc_v6_0_is_idle(void *handle)
@@ -995,16 +973,10 @@ static bool gmc_v6_0_is_idle(void *handle)
995static int gmc_v6_0_wait_for_idle(void *handle) 973static int gmc_v6_0_wait_for_idle(void *handle)
996{ 974{
997 unsigned i; 975 unsigned i;
998 u32 tmp;
999 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 976 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1000 977
1001 for (i = 0; i < adev->usec_timeout; i++) { 978 for (i = 0; i < adev->usec_timeout; i++) {
1002 tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | 979 if (gmc_v6_0_is_idle(handle))
1003 SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
1004 SRBM_STATUS__MCC_BUSY_MASK |
1005 SRBM_STATUS__MCD_BUSY_MASK |
1006 SRBM_STATUS__VMC_BUSY_MASK);
1007 if (!tmp)
1008 return 0; 980 return 0;
1009 udelay(1); 981 udelay(1);
1010 } 982 }
@@ -1157,6 +1129,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
1157 .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, 1129 .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
1158 .set_pte_pde = gmc_v6_0_gart_set_pte_pde, 1130 .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
1159 .set_prt = gmc_v6_0_set_prt, 1131 .set_prt = gmc_v6_0_set_prt,
1132 .get_vm_pde = gmc_v6_0_get_vm_pde,
1160 .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags 1133 .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
1161}; 1134};
1162 1135
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index a9083a16a250..7e9ea53edf8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "cikd.h" 26#include "cikd.h"
27#include "cik.h" 27#include "cik.h"
@@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev,
472 return pte_flag; 472 return pte_flag;
473} 473}
474 474
475static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
476{
477 BUG_ON(addr & 0xFFFFFF0000000FFFULL);
478 return addr;
479}
480
475/** 481/**
476 * gmc_v8_0_set_fault_enable_default - update VM fault handling 482 * gmc_v8_0_set_fault_enable_default - update VM fault handling
477 * 483 *
@@ -724,55 +730,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
724 amdgpu_gart_fini(adev); 730 amdgpu_gart_fini(adev);
725} 731}
726 732
727/*
728 * vm
729 * VMID 0 is the physical GPU addresses as used by the kernel.
730 * VMIDs 1-15 are used for userspace clients and are handled
731 * by the amdgpu vm/hsa code.
732 */
733/**
734 * gmc_v7_0_vm_init - cik vm init callback
735 *
736 * @adev: amdgpu_device pointer
737 *
738 * Inits cik specific vm parameters (number of VMs, base of vram for
739 * VMIDs 1-15) (CIK).
740 * Returns 0 for success.
741 */
742static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
743{
744 /*
745 * number of VMs
746 * VMID 0 is reserved for System
747 * amdgpu graphics/compute will use VMIDs 1-7
748 * amdkfd will use VMIDs 8-15
749 */
750 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
751 adev->vm_manager.num_level = 1;
752 amdgpu_vm_manager_init(adev);
753
754 /* base offset of vram pages */
755 if (adev->flags & AMD_IS_APU) {
756 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
757 tmp <<= 22;
758 adev->vm_manager.vram_base_offset = tmp;
759 } else
760 adev->vm_manager.vram_base_offset = 0;
761
762 return 0;
763}
764
765/**
766 * gmc_v7_0_vm_fini - cik vm fini callback
767 *
768 * @adev: amdgpu_device pointer
769 *
770 * Tear down any asic specific VM setup (CIK).
771 */
772static void gmc_v7_0_vm_fini(struct amdgpu_device *adev)
773{
774}
775
776/** 733/**
777 * gmc_v7_0_vm_decode_fault - print human readable fault info 734 * gmc_v7_0_vm_decode_fault - print human readable fault info
778 * 735 *
@@ -1013,6 +970,8 @@ static int gmc_v7_0_sw_init(void *handle)
1013 */ 970 */
1014 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 971 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
1015 972
973 adev->mc.stolen_size = 256 * 1024;
974
1016 /* set DMA mask + need_dma32 flags. 975 /* set DMA mask + need_dma32 flags.
1017 * PCIE - can handle 40-bits. 976 * PCIE - can handle 40-bits.
1018 * IGP - can handle 40-bits 977 * IGP - can handle 40-bits
@@ -1051,27 +1010,34 @@ static int gmc_v7_0_sw_init(void *handle)
1051 if (r) 1010 if (r)
1052 return r; 1011 return r;
1053 1012
1054 if (!adev->vm_manager.enabled) { 1013 /*
1055 r = gmc_v7_0_vm_init(adev); 1014 * number of VMs
1056 if (r) { 1015 * VMID 0 is reserved for System
1057 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); 1016 * amdgpu graphics/compute will use VMIDs 1-7
1058 return r; 1017 * amdkfd will use VMIDs 8-15
1059 } 1018 */
1060 adev->vm_manager.enabled = true; 1019 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
1020 adev->vm_manager.num_level = 1;
1021 amdgpu_vm_manager_init(adev);
1022
1023 /* base offset of vram pages */
1024 if (adev->flags & AMD_IS_APU) {
1025 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
1026
1027 tmp <<= 22;
1028 adev->vm_manager.vram_base_offset = tmp;
1029 } else {
1030 adev->vm_manager.vram_base_offset = 0;
1061 } 1031 }
1062 1032
1063 return r; 1033 return 0;
1064} 1034}
1065 1035
1066static int gmc_v7_0_sw_fini(void *handle) 1036static int gmc_v7_0_sw_fini(void *handle)
1067{ 1037{
1068 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1038 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1069 1039
1070 if (adev->vm_manager.enabled) { 1040 amdgpu_vm_manager_fini(adev);
1071 amdgpu_vm_manager_fini(adev);
1072 gmc_v7_0_vm_fini(adev);
1073 adev->vm_manager.enabled = false;
1074 }
1075 gmc_v7_0_gart_fini(adev); 1041 gmc_v7_0_gart_fini(adev);
1076 amdgpu_gem_force_release(adev); 1042 amdgpu_gem_force_release(adev);
1077 amdgpu_bo_fini(adev); 1043 amdgpu_bo_fini(adev);
@@ -1117,10 +1083,6 @@ static int gmc_v7_0_suspend(void *handle)
1117{ 1083{
1118 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1084 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1119 1085
1120 if (adev->vm_manager.enabled) {
1121 gmc_v7_0_vm_fini(adev);
1122 adev->vm_manager.enabled = false;
1123 }
1124 gmc_v7_0_hw_fini(adev); 1086 gmc_v7_0_hw_fini(adev);
1125 1087
1126 return 0; 1088 return 0;
@@ -1135,16 +1097,9 @@ static int gmc_v7_0_resume(void *handle)
1135 if (r) 1097 if (r)
1136 return r; 1098 return r;
1137 1099
1138 if (!adev->vm_manager.enabled) { 1100 amdgpu_vm_reset_all_ids(adev);
1139 r = gmc_v7_0_vm_init(adev);
1140 if (r) {
1141 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
1142 return r;
1143 }
1144 adev->vm_manager.enabled = true;
1145 }
1146 1101
1147 return r; 1102 return 0;
1148} 1103}
1149 1104
1150static bool gmc_v7_0_is_idle(void *handle) 1105static bool gmc_v7_0_is_idle(void *handle)
@@ -1346,7 +1301,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
1346 .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, 1301 .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
1347 .set_pte_pde = gmc_v7_0_gart_set_pte_pde, 1302 .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
1348 .set_prt = gmc_v7_0_set_prt, 1303 .set_prt = gmc_v7_0_set_prt,
1349 .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags 1304 .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
1305 .get_vm_pde = gmc_v7_0_get_vm_pde
1350}; 1306};
1351 1307
1352static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { 1308static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 4ac99784160a..cc9f88057cd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "gmc_v8_0.h" 26#include "gmc_v8_0.h"
27#include "amdgpu_ucode.h" 27#include "amdgpu_ucode.h"
@@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev,
656 return pte_flag; 656 return pte_flag;
657} 657}
658 658
659static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
660{
661 BUG_ON(addr & 0xFFFFFF0000000FFFULL);
662 return addr;
663}
664
659/** 665/**
660 * gmc_v8_0_set_fault_enable_default - update VM fault handling 666 * gmc_v8_0_set_fault_enable_default - update VM fault handling
661 * 667 *
@@ -927,55 +933,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
927 amdgpu_gart_fini(adev); 933 amdgpu_gart_fini(adev);
928} 934}
929 935
930/*
931 * vm
932 * VMID 0 is the physical GPU addresses as used by the kernel.
933 * VMIDs 1-15 are used for userspace clients and are handled
934 * by the amdgpu vm/hsa code.
935 */
936/**
937 * gmc_v8_0_vm_init - cik vm init callback
938 *
939 * @adev: amdgpu_device pointer
940 *
941 * Inits cik specific vm parameters (number of VMs, base of vram for
942 * VMIDs 1-15) (CIK).
943 * Returns 0 for success.
944 */
945static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
946{
947 /*
948 * number of VMs
949 * VMID 0 is reserved for System
950 * amdgpu graphics/compute will use VMIDs 1-7
951 * amdkfd will use VMIDs 8-15
952 */
953 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
954 adev->vm_manager.num_level = 1;
955 amdgpu_vm_manager_init(adev);
956
957 /* base offset of vram pages */
958 if (adev->flags & AMD_IS_APU) {
959 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
960 tmp <<= 22;
961 adev->vm_manager.vram_base_offset = tmp;
962 } else
963 adev->vm_manager.vram_base_offset = 0;
964
965 return 0;
966}
967
968/**
969 * gmc_v8_0_vm_fini - cik vm fini callback
970 *
971 * @adev: amdgpu_device pointer
972 *
973 * Tear down any asic specific VM setup (CIK).
974 */
975static void gmc_v8_0_vm_fini(struct amdgpu_device *adev)
976{
977}
978
979/** 936/**
980 * gmc_v8_0_vm_decode_fault - print human readable fault info 937 * gmc_v8_0_vm_decode_fault - print human readable fault info
981 * 938 *
@@ -1097,6 +1054,8 @@ static int gmc_v8_0_sw_init(void *handle)
1097 */ 1054 */
1098 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 1055 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
1099 1056
1057 adev->mc.stolen_size = 256 * 1024;
1058
1100 /* set DMA mask + need_dma32 flags. 1059 /* set DMA mask + need_dma32 flags.
1101 * PCIE - can handle 40-bits. 1060 * PCIE - can handle 40-bits.
1102 * IGP - can handle 40-bits 1061 * IGP - can handle 40-bits
@@ -1135,27 +1094,34 @@ static int gmc_v8_0_sw_init(void *handle)
1135 if (r) 1094 if (r)
1136 return r; 1095 return r;
1137 1096
1138 if (!adev->vm_manager.enabled) { 1097 /*
1139 r = gmc_v8_0_vm_init(adev); 1098 * number of VMs
1140 if (r) { 1099 * VMID 0 is reserved for System
1141 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); 1100 * amdgpu graphics/compute will use VMIDs 1-7
1142 return r; 1101 * amdkfd will use VMIDs 8-15
1143 } 1102 */
1144 adev->vm_manager.enabled = true; 1103 adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS;
1104 adev->vm_manager.num_level = 1;
1105 amdgpu_vm_manager_init(adev);
1106
1107 /* base offset of vram pages */
1108 if (adev->flags & AMD_IS_APU) {
1109 u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
1110
1111 tmp <<= 22;
1112 adev->vm_manager.vram_base_offset = tmp;
1113 } else {
1114 adev->vm_manager.vram_base_offset = 0;
1145 } 1115 }
1146 1116
1147 return r; 1117 return 0;
1148} 1118}
1149 1119
1150static int gmc_v8_0_sw_fini(void *handle) 1120static int gmc_v8_0_sw_fini(void *handle)
1151{ 1121{
1152 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1122 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1153 1123
1154 if (adev->vm_manager.enabled) { 1124 amdgpu_vm_manager_fini(adev);
1155 amdgpu_vm_manager_fini(adev);
1156 gmc_v8_0_vm_fini(adev);
1157 adev->vm_manager.enabled = false;
1158 }
1159 gmc_v8_0_gart_fini(adev); 1125 gmc_v8_0_gart_fini(adev);
1160 amdgpu_gem_force_release(adev); 1126 amdgpu_gem_force_release(adev);
1161 amdgpu_bo_fini(adev); 1127 amdgpu_bo_fini(adev);
@@ -1209,10 +1175,6 @@ static int gmc_v8_0_suspend(void *handle)
1209{ 1175{
1210 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1176 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1211 1177
1212 if (adev->vm_manager.enabled) {
1213 gmc_v8_0_vm_fini(adev);
1214 adev->vm_manager.enabled = false;
1215 }
1216 gmc_v8_0_hw_fini(adev); 1178 gmc_v8_0_hw_fini(adev);
1217 1179
1218 return 0; 1180 return 0;
@@ -1227,16 +1189,9 @@ static int gmc_v8_0_resume(void *handle)
1227 if (r) 1189 if (r)
1228 return r; 1190 return r;
1229 1191
1230 if (!adev->vm_manager.enabled) { 1192 amdgpu_vm_reset_all_ids(adev);
1231 r = gmc_v8_0_vm_init(adev);
1232 if (r) {
1233 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r);
1234 return r;
1235 }
1236 adev->vm_manager.enabled = true;
1237 }
1238 1193
1239 return r; 1194 return 0;
1240} 1195}
1241 1196
1242static bool gmc_v8_0_is_idle(void *handle) 1197static bool gmc_v8_0_is_idle(void *handle)
@@ -1665,7 +1620,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
1665 .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, 1620 .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
1666 .set_pte_pde = gmc_v8_0_gart_set_pte_pde, 1621 .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
1667 .set_prt = gmc_v8_0_set_prt, 1622 .set_prt = gmc_v8_0_set_prt,
1668 .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags 1623 .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
1624 .get_vm_pde = gmc_v8_0_get_vm_pde
1669}; 1625};
1670 1626
1671static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { 1627static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc1e1c1d6b24..175ba5f9691c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -33,6 +33,7 @@
33#include "soc15_common.h" 33#include "soc15_common.h"
34 34
35#include "nbio_v6_1.h" 35#include "nbio_v6_1.h"
36#include "nbio_v7_0.h"
36#include "gfxhub_v1_0.h" 37#include "gfxhub_v1_0.h"
37#include "mmhub_v1_0.h" 38#include "mmhub_v1_0.h"
38 39
@@ -215,7 +216,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
215 unsigned i, j; 216 unsigned i, j;
216 217
217 /* flush hdp cache */ 218 /* flush hdp cache */
218 nbio_v6_1_hdp_flush(adev); 219 if (adev->flags & AMD_IS_APU)
220 nbio_v7_0_hdp_flush(adev);
221 else
222 nbio_v6_1_hdp_flush(adev);
219 223
220 spin_lock(&adev->mc.invalidate_lock); 224 spin_lock(&adev->mc.invalidate_lock);
221 225
@@ -354,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
354 return pte_flag; 358 return pte_flag;
355} 359}
356 360
357static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) 361static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
358{ 362{
359 return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; 363 addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
364 BUG_ON(addr & 0xFFFF00000000003FULL);
365 return addr;
360} 366}
361 367
362static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { 368static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
363 .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, 369 .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
364 .set_pte_pde = gmc_v9_0_gart_set_pte_pde, 370 .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
365 .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
366 .adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
367 .get_invalidate_req = gmc_v9_0_get_invalidate_req, 371 .get_invalidate_req = gmc_v9_0_get_invalidate_req,
372 .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
373 .get_vm_pde = gmc_v9_0_get_vm_pde
368}; 374};
369 375
370static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) 376static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
@@ -415,6 +421,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
415 amdgpu_vram_location(adev, &adev->mc, base); 421 amdgpu_vram_location(adev, &adev->mc, base);
416 adev->mc.gtt_base_align = 0; 422 adev->mc.gtt_base_align = 0;
417 amdgpu_gtt_location(adev, mc); 423 amdgpu_gtt_location(adev, mc);
424 /* base offset of vram pages */
425 if (adev->flags & AMD_IS_APU)
426 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
427 else
428 adev->vm_manager.vram_base_offset = 0;
418} 429}
419 430
420/** 431/**
@@ -434,7 +445,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
434 /* hbm memory channel size */ 445 /* hbm memory channel size */
435 chansize = 128; 446 chansize = 128;
436 447
437 tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); 448 tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
438 tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; 449 tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
439 tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; 450 tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
440 switch (tmp) { 451 switch (tmp) {
@@ -474,7 +485,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
474 adev->mc.aper_size = pci_resource_len(adev->pdev, 0); 485 adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
475 /* size in MB on si */ 486 /* size in MB on si */
476 adev->mc.mc_vram_size = 487 adev->mc.mc_vram_size =
477 nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; 488 ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) :
489 nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL;
478 adev->mc.real_vram_size = adev->mc.mc_vram_size; 490 adev->mc.real_vram_size = adev->mc.mc_vram_size;
479 adev->mc.visible_vram_size = adev->mc.aper_size; 491 adev->mc.visible_vram_size = adev->mc.aper_size;
480 492
@@ -514,64 +526,15 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
514 return amdgpu_gart_table_vram_alloc(adev); 526 return amdgpu_gart_table_vram_alloc(adev);
515} 527}
516 528
517/*
518 * vm
519 * VMID 0 is the physical GPU addresses as used by the kernel.
520 * VMIDs 1-15 are used for userspace clients and are handled
521 * by the amdgpu vm/hsa code.
522 */
523/**
524 * gmc_v9_0_vm_init - vm init callback
525 *
526 * @adev: amdgpu_device pointer
527 *
528 * Inits vega10 specific vm parameters (number of VMs, base of vram for
529 * VMIDs 1-15) (vega10).
530 * Returns 0 for success.
531 */
532static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
533{
534 /*
535 * number of VMs
536 * VMID 0 is reserved for System
537 * amdgpu graphics/compute will use VMIDs 1-7
538 * amdkfd will use VMIDs 8-15
539 */
540 adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
541 adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
542
543 /* TODO: fix num_level for APU when updating vm size and block size */
544 if (adev->flags & AMD_IS_APU)
545 adev->vm_manager.num_level = 1;
546 else
547 adev->vm_manager.num_level = 3;
548 amdgpu_vm_manager_init(adev);
549
550 /* base offset of vram pages */
551 /*XXX This value is not zero for APU*/
552 adev->vm_manager.vram_base_offset = 0;
553
554 return 0;
555}
556
557/**
558 * gmc_v9_0_vm_fini - vm fini callback
559 *
560 * @adev: amdgpu_device pointer
561 *
562 * Tear down any asic specific VM setup.
563 */
564static void gmc_v9_0_vm_fini(struct amdgpu_device *adev)
565{
566 return;
567}
568
569static int gmc_v9_0_sw_init(void *handle) 529static int gmc_v9_0_sw_init(void *handle)
570{ 530{
571 int r; 531 int r;
572 int dma_bits; 532 int dma_bits;
573 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 533 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
574 534
535 gfxhub_v1_0_init(adev);
536 mmhub_v1_0_init(adev);
537
575 spin_lock_init(&adev->mc.invalidate_lock); 538 spin_lock_init(&adev->mc.invalidate_lock);
576 539
577 if (adev->flags & AMD_IS_APU) { 540 if (adev->flags & AMD_IS_APU) {
@@ -609,6 +572,12 @@ static int gmc_v9_0_sw_init(void *handle)
609 */ 572 */
610 adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 573 adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
611 574
575 /*
576 * It needs to reserve 8M stolen memory for vega10
577 * TODO: Figure out how to avoid that...
578 */
579 adev->mc.stolen_size = 8 * 1024 * 1024;
580
612 /* set DMA mask + need_dma32 flags. 581 /* set DMA mask + need_dma32 flags.
613 * PCIE - can handle 44-bits. 582 * PCIE - can handle 44-bits.
614 * IGP - can handle 44-bits 583 * IGP - can handle 44-bits
@@ -641,15 +610,23 @@ static int gmc_v9_0_sw_init(void *handle)
641 if (r) 610 if (r)
642 return r; 611 return r;
643 612
644 if (!adev->vm_manager.enabled) { 613 /*
645 r = gmc_v9_0_vm_init(adev); 614 * number of VMs
646 if (r) { 615 * VMID 0 is reserved for System
647 dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); 616 * amdgpu graphics/compute will use VMIDs 1-7
648 return r; 617 * amdkfd will use VMIDs 8-15
649 } 618 */
650 adev->vm_manager.enabled = true; 619 adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
651 } 620 adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
652 return r; 621
622 /* TODO: fix num_level for APU when updating vm size and block size */
623 if (adev->flags & AMD_IS_APU)
624 adev->vm_manager.num_level = 1;
625 else
626 adev->vm_manager.num_level = 3;
627 amdgpu_vm_manager_init(adev);
628
629 return 0;
653} 630}
654 631
655/** 632/**
@@ -669,11 +646,7 @@ static int gmc_v9_0_sw_fini(void *handle)
669{ 646{
670 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 647 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
671 648
672 if (adev->vm_manager.enabled) { 649 amdgpu_vm_manager_fini(adev);
673 amdgpu_vm_manager_fini(adev);
674 gmc_v9_0_vm_fini(adev);
675 adev->vm_manager.enabled = false;
676 }
677 gmc_v9_0_gart_fini(adev); 650 gmc_v9_0_gart_fini(adev);
678 amdgpu_gem_force_release(adev); 651 amdgpu_gem_force_release(adev);
679 amdgpu_bo_fini(adev); 652 amdgpu_bo_fini(adev);
@@ -686,6 +659,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
686 switch (adev->asic_type) { 659 switch (adev->asic_type) {
687 case CHIP_VEGA10: 660 case CHIP_VEGA10:
688 break; 661 break;
662 case CHIP_RAVEN:
663 break;
689 default: 664 default:
690 break; 665 break;
691 } 666 }
@@ -715,7 +690,19 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
715 return r; 690 return r;
716 691
717 /* After HDP is initialized, flush HDP.*/ 692 /* After HDP is initialized, flush HDP.*/
718 nbio_v6_1_hdp_flush(adev); 693 if (adev->flags & AMD_IS_APU)
694 nbio_v7_0_hdp_flush(adev);
695 else
696 nbio_v6_1_hdp_flush(adev);
697
698 switch (adev->asic_type) {
699 case CHIP_RAVEN:
700 mmhub_v1_0_initialize_power_gating(adev);
701 mmhub_v1_0_update_power_gating(adev, true);
702 break;
703 default:
704 break;
705 }
719 706
720 r = gfxhub_v1_0_gart_enable(adev); 707 r = gfxhub_v1_0_gart_enable(adev);
721 if (r) 708 if (r)
@@ -725,12 +712,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
725 if (r) 712 if (r)
726 return r; 713 return r;
727 714
728 tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); 715 tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
729 tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; 716 tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
730 WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); 717 WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
731 718
732 tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); 719 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
733 WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); 720 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
734 721
735 722
736 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 723 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
@@ -781,6 +768,12 @@ static int gmc_v9_0_hw_fini(void *handle)
781{ 768{
782 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 769 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
783 770
771 if (amdgpu_sriov_vf(adev)) {
772 /* full access mode, so don't touch any GMC register */
773 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
774 return 0;
775 }
776
784 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); 777 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
785 gmc_v9_0_gart_disable(adev); 778 gmc_v9_0_gart_disable(adev);
786 779
@@ -791,10 +784,6 @@ static int gmc_v9_0_suspend(void *handle)
791{ 784{
792 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 785 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
793 786
794 if (adev->vm_manager.enabled) {
795 gmc_v9_0_vm_fini(adev);
796 adev->vm_manager.enabled = false;
797 }
798 gmc_v9_0_hw_fini(adev); 787 gmc_v9_0_hw_fini(adev);
799 788
800 return 0; 789 return 0;
@@ -809,17 +798,9 @@ static int gmc_v9_0_resume(void *handle)
809 if (r) 798 if (r)
810 return r; 799 return r;
811 800
812 if (!adev->vm_manager.enabled) { 801 amdgpu_vm_reset_all_ids(adev);
813 r = gmc_v9_0_vm_init(adev);
814 if (r) {
815 dev_err(adev->dev,
816 "vm manager initialization failed (%d).\n", r);
817 return r;
818 }
819 adev->vm_manager.enabled = true;
820 }
821 802
822 return r; 803 return 0;
823} 804}
824 805
825static bool gmc_v9_0_is_idle(void *handle) 806static bool gmc_v9_0_is_idle(void *handle)
@@ -843,7 +824,16 @@ static int gmc_v9_0_soft_reset(void *handle)
843static int gmc_v9_0_set_clockgating_state(void *handle, 824static int gmc_v9_0_set_clockgating_state(void *handle,
844 enum amd_clockgating_state state) 825 enum amd_clockgating_state state)
845{ 826{
846 return 0; 827 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
828
829 return mmhub_v1_0_set_clockgating(adev, state);
830}
831
832static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
833{
834 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
835
836 mmhub_v1_0_get_clockgating(adev, flags);
847} 837}
848 838
849static int gmc_v9_0_set_powergating_state(void *handle, 839static int gmc_v9_0_set_powergating_state(void *handle,
@@ -867,6 +857,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
867 .soft_reset = gmc_v9_0_soft_reset, 857 .soft_reset = gmc_v9_0_soft_reset,
868 .set_clockgating_state = gmc_v9_0_set_clockgating_state, 858 .set_clockgating_state = gmc_v9_0_set_clockgating_state,
869 .set_powergating_state = gmc_v9_0_set_powergating_state, 859 .set_powergating_state = gmc_v9_0_set_powergating_state,
860 .get_clockgating_state = gmc_v9_0_get_clockgating_state,
870}; 861};
871 862
872const struct amdgpu_ip_block_version gmc_v9_0_ip_block = 863const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index cb622add99a7..7a0ea27ac429 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "vid.h" 26#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 79a52ad2c80d..3bbf2ccfca89 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23 23
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_pm.h" 26#include "amdgpu_pm.h"
27#include "cikd.h" 27#include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
index e6b7b42acfe1..b82e33c01571 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
@@ -22,7 +22,7 @@
22 * Authors: Alex Deucher 22 * Authors: Alex Deucher
23 */ 23 */
24 24
25#include "drmP.h" 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "cikd.h" 27#include "cikd.h"
28#include "kv_dpm.h" 28#include "kv_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index dbfe48d1207a..9804318f3488 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -34,9 +34,12 @@
34 34
35#include "soc15_common.h" 35#include "soc15_common.h"
36 36
37#define mmDAGB0_CNTL_MISC2_RV 0x008f
38#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0
39
37u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) 40u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
38{ 41{
39 u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); 42 u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
40 43
41 base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; 44 base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
42 base <<= 24; 45 base <<= 24;
@@ -44,184 +47,160 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
44 return base; 47 return base;
45} 48}
46 49
47int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) 50static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
48{ 51{
49 u32 tmp; 52 uint64_t value;
50 u64 value;
51 uint64_t addr;
52 u32 i;
53 53
54 /* Program MC. */ 54 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
55 /* Update configuration */ 55 value = adev->gart.table_addr - adev->mc.vram_start +
56 DRM_INFO("%s -- in\n", __func__);
57 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR),
58 adev->mc.vram_start >> 18);
59 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR),
60 adev->mc.vram_end >> 18);
61 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
62 adev->vm_manager.vram_base_offset; 56 adev->vm_manager.vram_base_offset;
63 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 57 value &= 0x0000FFFFFFFFF000ULL;
64 mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), 58 value |= 0x1; /* valid bit */
65 (u32)(value >> 12));
66 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
67 mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
68 (u32)(value >> 44));
69 59
70 if (amdgpu_sriov_vf(adev)) { 60 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
71 /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so 61 lower_32_bits(value));
72 vbios post doesn't program them, for SRIOV driver need to program them */ 62
73 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE), 63 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
74 adev->mc.vram_start >> 24); 64 upper_32_bits(value));
75 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP), 65}
76 adev->mc.vram_end >> 24); 66
77 } 67static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
68{
69 mmhub_v1_0_init_gart_pt_regs(adev);
70
71 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
72 (u32)(adev->mc.gtt_start >> 12));
73 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
74 (u32)(adev->mc.gtt_start >> 44));
75
76 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
77 (u32)(adev->mc.gtt_end >> 12));
78 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
79 (u32)(adev->mc.gtt_end >> 44));
80}
81
82static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
83{
84 uint64_t value;
85 uint32_t tmp;
78 86
79 /* Disable AGP. */ 87 /* Disable AGP. */
80 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); 88 WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
81 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); 89 WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0);
82 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); 90 WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF);
83 91
84 /* GART Enable. */ 92 /* Program the system aperture low logical page number. */
93 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
94 adev->mc.vram_start >> 18);
95 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
96 adev->mc.vram_end >> 18);
97
98 /* Set default page address. */
99 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
100 adev->vm_manager.vram_base_offset;
101 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
102 (u32)(value >> 12));
103 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
104 (u32)(value >> 44));
105
106 /* Program "protection fault". */
107 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
108 (u32)(adev->dummy_page.addr >> 12));
109 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
110 (u32)((u64)adev->dummy_page.addr >> 44));
111
112 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
113 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
114 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
115 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp);
116}
117
118static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
119{
120 uint32_t tmp;
85 121
86 /* Setup TLB control */ 122 /* Setup TLB control */
87 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); 123 tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
124
88 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); 125 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
89 tmp = REG_SET_FIELD(tmp, 126 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
90 MC_VM_MX_L1_TLB_CNTL, 127 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
91 SYSTEM_ACCESS_MODE, 128 ENABLE_ADVANCED_DRIVER_MODEL, 1);
92 3); 129 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
93 tmp = REG_SET_FIELD(tmp, 130 SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
94 MC_VM_MX_L1_TLB_CNTL, 131 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
95 ENABLE_ADVANCED_DRIVER_MODEL, 132 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
96 1); 133 MTYPE, MTYPE_UC);/* XXX for emulation. */
97 tmp = REG_SET_FIELD(tmp, 134 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
98 MC_VM_MX_L1_TLB_CNTL, 135
99 SYSTEM_APERTURE_UNMAPPED_ACCESS, 136 WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
100 0); 137}
101 tmp = REG_SET_FIELD(tmp, 138
102 MC_VM_MX_L1_TLB_CNTL, 139static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
103 ECO_BITS, 140{
104 0); 141 uint32_t tmp;
105 tmp = REG_SET_FIELD(tmp,
106 MC_VM_MX_L1_TLB_CNTL,
107 MTYPE,
108 MTYPE_UC);/* XXX for emulation. */
109 tmp = REG_SET_FIELD(tmp,
110 MC_VM_MX_L1_TLB_CNTL,
111 ATC_EN,
112 1);
113 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp);
114 142
115 /* Setup L2 cache */ 143 /* Setup L2 cache */
116 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); 144 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
117 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); 145 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
118 tmp = REG_SET_FIELD(tmp, 146 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
119 VM_L2_CNTL, 147 /* XXX for emulation, Refer to closed source code.*/
120 ENABLE_L2_FRAGMENT_PROCESSING, 148 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
121 0); 149 0);
122 tmp = REG_SET_FIELD(tmp,
123 VM_L2_CNTL,
124 L2_PDE0_CACHE_TAG_GENERATION_MODE,
125 0);/* XXX for emulation, Refer to closed source code.*/
126 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); 150 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
127 tmp = REG_SET_FIELD(tmp, 151 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
128 VM_L2_CNTL, 152 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
129 CONTEXT1_IDENTITY_ACCESS_MODE, 153 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
130 1);
131 tmp = REG_SET_FIELD(tmp,
132 VM_L2_CNTL,
133 IDENTITY_MODE_FRAGMENT_SIZE,
134 0);
135 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp);
136 154
137 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); 155 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2);
138 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 156 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
139 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 157 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
140 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); 158 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
141 159
142 tmp = mmVM_L2_CNTL3_DEFAULT; 160 tmp = mmVM_L2_CNTL3_DEFAULT;
143 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); 161 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
144
145 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4));
146 tmp = REG_SET_FIELD(tmp,
147 VM_L2_CNTL4,
148 VMC_TAP_PDE_REQUEST_PHYSICAL,
149 0);
150 tmp = REG_SET_FIELD(tmp,
151 VM_L2_CNTL4,
152 VMC_TAP_PTE_REQUEST_PHYSICAL,
153 0);
154 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp);
155
156 /* setup context0 */
157 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
158 mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32),
159 (u32)(adev->mc.gtt_start >> 12));
160 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
161 mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32),
162 (u32)(adev->mc.gtt_start >> 44));
163
164 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
165 mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32),
166 (u32)(adev->mc.gtt_end >> 12));
167 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
168 mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32),
169 (u32)(adev->mc.gtt_end >> 44));
170
171 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
172 value = adev->gart.table_addr - adev->mc.vram_start +
173 adev->vm_manager.vram_base_offset;
174 value &= 0x0000FFFFFFFFF000ULL;
175 value |= 0x1; /* valid bit */
176 162
177 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 163 tmp = mmVM_L2_CNTL4_DEFAULT;
178 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), 164 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
179 (u32)value); 165 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
180 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 166 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp);
181 mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), 167}
182 (u32)(value >> 32));
183
184 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
185 mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32),
186 (u32)(adev->dummy_page.addr >> 12));
187 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
188 mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32),
189 (u32)((u64)adev->dummy_page.addr >> 44));
190
191 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2));
192 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
193 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY,
194 1);
195 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp);
196 168
197 addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); 169static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
198 tmp = RREG32(addr); 170{
171 uint32_t tmp;
199 172
173 tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL);
200 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); 174 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
201 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); 175 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
202 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); 176 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp);
203 177}
204 tmp = RREG32(addr);
205
206 /* Disable identity aperture.*/
207 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
208 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF);
209 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
210 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F);
211 178
212 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 179static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
213 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); 180{
214 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 181 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
215 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); 182 0XFFFFFFFF);
183 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
184 0x0000000F);
185
186 WREG32_SOC15(MMHUB, 0,
187 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
188 WREG32_SOC15(MMHUB, 0,
189 mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
190
191 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
192 0);
193 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
194 0);
195}
216 196
217 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 197static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
218 mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); 198{
219 WREG32(SOC15_REG_OFFSET(MMHUB, 0, 199 int i;
220 mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); 200 uint32_t tmp;
221 201
222 for (i = 0; i <= 14; i++) { 202 for (i = 0; i <= 14; i++) {
223 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) 203 tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
224 + i);
225 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 204 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
226 ENABLE_CONTEXT, 1); 205 ENABLE_CONTEXT, 1);
227 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 206 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
@@ -243,14 +222,270 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
243 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, 222 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
244 PAGE_TABLE_BLOCK_SIZE, 223 PAGE_TABLE_BLOCK_SIZE,
245 adev->vm_manager.block_size - 9); 224 adev->vm_manager.block_size - 9);
246 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); 225 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp);
247 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); 226 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
248 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); 227 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
249 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, 228 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
250 lower_32_bits(adev->vm_manager.max_pfn - 1)); 229 lower_32_bits(adev->vm_manager.max_pfn - 1));
251 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, 230 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
252 upper_32_bits(adev->vm_manager.max_pfn - 1)); 231 upper_32_bits(adev->vm_manager.max_pfn - 1));
253 } 232 }
233}
234
235static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
236{
237 unsigned i;
238
239 for (i = 0; i < 18; ++i) {
240 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
241 2 * i, 0xffffffff);
242 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
243 2 * i, 0x1f);
244 }
245}
246
247struct pctl_data {
248 uint32_t index;
249 uint32_t data;
250};
251
252const struct pctl_data pctl0_data[] = {
253 {0x0, 0x7a640},
254 {0x9, 0x2a64a},
255 {0xd, 0x2a680},
256 {0x11, 0x6a684},
257 {0x19, 0xea68e},
258 {0x29, 0xa69e},
259 {0x2b, 0x34a6c0},
260 {0x61, 0x83a707},
261 {0xe6, 0x8a7a4},
262 {0xf0, 0x1a7b8},
263 {0xf3, 0xfa7cc},
264 {0x104, 0x17a7dd},
265 {0x11d, 0xa7dc},
266 {0x11f, 0x12a7f5},
267 {0x133, 0xa808},
268 {0x135, 0x12a810},
269 {0x149, 0x7a82c}
270};
271#define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0]))
272
273#define PCTL0_RENG_EXEC_END_PTR 0x151
274#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
275#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
276
277const struct pctl_data pctl1_data[] = {
278 {0x0, 0x39a000},
279 {0x3b, 0x44a040},
280 {0x81, 0x2a08d},
281 {0x85, 0x6ba094},
282 {0xf2, 0x18a100},
283 {0x10c, 0x4a132},
284 {0x112, 0xca141},
285 {0x120, 0x2fa158},
286 {0x151, 0x17a1d0},
287 {0x16a, 0x1a1e9},
288 {0x16d, 0x13a1ec},
289 {0x182, 0x7a201},
290 {0x18b, 0x3a20a},
291 {0x190, 0x7a580},
292 {0x199, 0xa590},
293 {0x19b, 0x4a594},
294 {0x1a1, 0x1a59c},
295 {0x1a4, 0x7a82c},
296 {0x1ad, 0xfa7cc},
297 {0x1be, 0x17a7dd},
298 {0x1d7, 0x12a810}
299};
300#define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0]))
301
302#define PCTL1_RENG_EXEC_END_PTR 0x1ea
303#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000
304#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d
305#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580
306#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d
307#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c
308#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833
309
310static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev)
311{
312 uint32_t tmp = 0;
313
314 /* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */
315 tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
316 STCTRL_REGISTER_SAVE_BASE,
317 PCTL0_STCTRL_REG_SAVE_RANGE0_BASE);
318 tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
319 STCTRL_REGISTER_SAVE_LIMIT,
320 PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT);
321 WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp);
322
323 /* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */
324 tmp = 0;
325 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
326 STCTRL_REGISTER_SAVE_BASE,
327 PCTL1_STCTRL_REG_SAVE_RANGE0_BASE);
328 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
329 STCTRL_REGISTER_SAVE_LIMIT,
330 PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT);
331 WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp);
332
333 /* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */
334 tmp = 0;
335 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
336 STCTRL_REGISTER_SAVE_BASE,
337 PCTL1_STCTRL_REG_SAVE_RANGE1_BASE);
338 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
339 STCTRL_REGISTER_SAVE_LIMIT,
340 PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT);
341 WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp);
342
343 /* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */
344 tmp = 0;
345 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
346 STCTRL_REGISTER_SAVE_BASE,
347 PCTL1_STCTRL_REG_SAVE_RANGE2_BASE);
348 tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
349 STCTRL_REGISTER_SAVE_LIMIT,
350 PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT);
351 WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp);
352}
353
354void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
355{
356 uint32_t pctl0_misc = 0;
357 uint32_t pctl0_reng_execute = 0;
358 uint32_t pctl1_misc = 0;
359 uint32_t pctl1_reng_execute = 0;
360 int i = 0;
361
362 if (amdgpu_sriov_vf(adev))
363 return;
364
365 pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
366 pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
367 pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
368 pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
369
370 /* Light sleep must be disabled before writing to pctl0 registers */
371 pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
372 WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
373
374 /* Write data used to access ram of register engine */
375 for (i = 0; i < PCTL0_DATA_LEN; i++) {
376 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX,
377 pctl0_data[i].index);
378 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA,
379 pctl0_data[i].data);
380 }
381
382 /* Set the reng execute end ptr for pctl0 */
383 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
384 PCTL0_RENG_EXECUTE,
385 RENG_EXECUTE_END_PTR,
386 PCTL0_RENG_EXEC_END_PTR);
387 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
388
389 /* Light sleep must be disabled before writing to pctl1 registers */
390 pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
391 WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
392
393 /* Write data used to access ram of register engine */
394 for (i = 0; i < PCTL1_DATA_LEN; i++) {
395 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX,
396 pctl1_data[i].index);
397 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA,
398 pctl1_data[i].data);
399 }
400
401 /* Set the reng execute end ptr for pctl1 */
402 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
403 PCTL1_RENG_EXECUTE,
404 RENG_EXECUTE_END_PTR,
405 PCTL1_RENG_EXEC_END_PTR);
406 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
407
408 mmhub_v1_0_power_gating_write_save_ranges(adev);
409
410 /* Re-enable light sleep */
411 pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
412 WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
413 pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
414 WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
415}
416
417void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
418 bool enable)
419{
420 uint32_t pctl0_reng_execute = 0;
421 uint32_t pctl1_reng_execute = 0;
422
423 if (amdgpu_sriov_vf(adev))
424 return;
425
426 pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
427 pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
428
429 if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
430 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
431 PCTL0_RENG_EXECUTE,
432 RENG_EXECUTE_ON_PWR_UP, 1);
433 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
434 PCTL0_RENG_EXECUTE,
435 RENG_EXECUTE_ON_REG_UPDATE, 1);
436 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
437
438 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
439 PCTL1_RENG_EXECUTE,
440 RENG_EXECUTE_ON_PWR_UP, 1);
441 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
442 PCTL1_RENG_EXECUTE,
443 RENG_EXECUTE_ON_REG_UPDATE, 1);
444 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
445
446 } else {
447 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
448 PCTL0_RENG_EXECUTE,
449 RENG_EXECUTE_ON_PWR_UP, 0);
450 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
451 PCTL0_RENG_EXECUTE,
452 RENG_EXECUTE_ON_REG_UPDATE, 0);
453 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
454
455 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
456 PCTL1_RENG_EXECUTE,
457 RENG_EXECUTE_ON_PWR_UP, 0);
458 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
459 PCTL1_RENG_EXECUTE,
460 RENG_EXECUTE_ON_REG_UPDATE, 0);
461 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
462 }
463}
464
465int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
466{
467 if (amdgpu_sriov_vf(adev)) {
468 /*
469 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
470 * VF copy registers so vbios post doesn't program them, for
471 * SRIOV driver need to program them
472 */
473 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
474 adev->mc.vram_start >> 24);
475 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
476 adev->mc.vram_end >> 24);
477 }
478
479 /* GART Enable. */
480 mmhub_v1_0_init_gart_aperture_regs(adev);
481 mmhub_v1_0_init_system_aperture_regs(adev);
482 mmhub_v1_0_init_tlb_regs(adev);
483 mmhub_v1_0_init_cache_regs(adev);
484
485 mmhub_v1_0_enable_system_domain(adev);
486 mmhub_v1_0_disable_identity_aperture(adev);
487 mmhub_v1_0_setup_vmid_config(adev);
488 mmhub_v1_0_program_invalidation(adev);
254 489
255 return 0; 490 return 0;
256} 491}
@@ -262,22 +497,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
262 497
263 /* Disable all tables */ 498 /* Disable all tables */
264 for (i = 0; i < 16; i++) 499 for (i = 0; i < 16; i++)
265 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); 500 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0);
266 501
267 /* Setup TLB control */ 502 /* Setup TLB control */
268 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); 503 tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL);
269 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); 504 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
270 tmp = REG_SET_FIELD(tmp, 505 tmp = REG_SET_FIELD(tmp,
271 MC_VM_MX_L1_TLB_CNTL, 506 MC_VM_MX_L1_TLB_CNTL,
272 ENABLE_ADVANCED_DRIVER_MODEL, 507 ENABLE_ADVANCED_DRIVER_MODEL,
273 0); 508 0);
274 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); 509 WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
275 510
276 /* Setup L2 cache */ 511 /* Setup L2 cache */
277 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); 512 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
278 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 513 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
279 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); 514 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
280 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); 515 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
281} 516}
282 517
283/** 518/**
@@ -289,7 +524,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
289void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) 524void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
290{ 525{
291 u32 tmp; 526 u32 tmp;
292 tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); 527 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
293 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 528 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
294 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 529 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
295 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 530 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
@@ -314,22 +549,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
314 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 549 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
315 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 550 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
316 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 551 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
317 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); 552 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
318} 553}
319 554
320static int mmhub_v1_0_early_init(void *handle) 555void mmhub_v1_0_init(struct amdgpu_device *adev)
321{ 556{
322 return 0;
323}
324
325static int mmhub_v1_0_late_init(void *handle)
326{
327 return 0;
328}
329
330static int mmhub_v1_0_sw_init(void *handle)
331{
332 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
333 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; 557 struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
334 558
335 hub->ctx0_ptb_addr_lo32 = 559 hub->ctx0_ptb_addr_lo32 =
@@ -349,69 +573,20 @@ static int mmhub_v1_0_sw_init(void *handle)
349 hub->vm_l2_pro_fault_cntl = 573 hub->vm_l2_pro_fault_cntl =
350 SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); 574 SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
351 575
352 return 0;
353}
354
355static int mmhub_v1_0_sw_fini(void *handle)
356{
357 return 0;
358}
359
360static int mmhub_v1_0_hw_init(void *handle)
361{
362 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
363 unsigned i;
364
365 for (i = 0; i < 18; ++i) {
366 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
367 mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) +
368 2 * i, 0xffffffff);
369 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
370 mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) +
371 2 * i, 0x1f);
372 }
373
374 return 0;
375}
376
377static int mmhub_v1_0_hw_fini(void *handle)
378{
379 return 0;
380}
381
382static int mmhub_v1_0_suspend(void *handle)
383{
384 return 0;
385}
386
387static int mmhub_v1_0_resume(void *handle)
388{
389 return 0;
390}
391
392static bool mmhub_v1_0_is_idle(void *handle)
393{
394 return true;
395}
396
397static int mmhub_v1_0_wait_for_idle(void *handle)
398{
399 return 0;
400}
401
402static int mmhub_v1_0_soft_reset(void *handle)
403{
404 return 0;
405} 576}
406 577
407static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 578static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
408 bool enable) 579 bool enable)
409{ 580{
410 uint32_t def, data, def1, data1, def2, data2; 581 uint32_t def, data, def1, data1, def2 = 0, data2 = 0;
411 582
412 def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); 583 def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
413 def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); 584
414 def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); 585 if (adev->asic_type != CHIP_RAVEN) {
586 def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
587 def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2);
588 } else
589 def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV);
415 590
416 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { 591 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
417 data |= ATC_L2_MISC_CG__ENABLE_MASK; 592 data |= ATC_L2_MISC_CG__ENABLE_MASK;
@@ -423,12 +598,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
423 DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | 598 DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
424 DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); 599 DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
425 600
426 data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | 601 if (adev->asic_type != CHIP_RAVEN)
427 DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | 602 data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
428 DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | 603 DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
429 DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | 604 DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
430 DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | 605 DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
431 DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); 606 DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
607 DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
432 } else { 608 } else {
433 data &= ~ATC_L2_MISC_CG__ENABLE_MASK; 609 data &= ~ATC_L2_MISC_CG__ENABLE_MASK;
434 610
@@ -439,22 +615,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
439 DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | 615 DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
440 DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); 616 DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
441 617
442 data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | 618 if (adev->asic_type != CHIP_RAVEN)
443 DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | 619 data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
444 DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | 620 DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
445 DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | 621 DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
446 DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | 622 DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
447 DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); 623 DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
624 DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
448 } 625 }
449 626
450 if (def != data) 627 if (def != data)
451 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); 628 WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
452 629
453 if (def1 != data1) 630 if (def1 != data1) {
454 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); 631 if (adev->asic_type != CHIP_RAVEN)
632 WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
633 else
634 WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1);
635 }
455 636
456 if (def2 != data2) 637 if (adev->asic_type != CHIP_RAVEN && def2 != data2)
457 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); 638 WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);
458} 639}
459 640
460static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, 641static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -462,7 +643,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
462{ 643{
463 uint32_t def, data; 644 uint32_t def, data;
464 645
465 def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); 646 def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
466 647
467 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) 648 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
468 data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; 649 data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -470,7 +651,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
470 data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; 651 data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
471 652
472 if (def != data) 653 if (def != data)
473 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); 654 WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
474} 655}
475 656
476static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, 657static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@@ -478,7 +659,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
478{ 659{
479 uint32_t def, data; 660 uint32_t def, data;
480 661
481 def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); 662 def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
482 663
483 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) 664 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
484 data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; 665 data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
@@ -486,7 +667,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
486 data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; 667 data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
487 668
488 if (def != data) 669 if (def != data)
489 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); 670 WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);
490} 671}
491 672
492static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, 673static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
@@ -494,7 +675,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
494{ 675{
495 uint32_t def, data; 676 uint32_t def, data;
496 677
497 def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); 678 def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
498 679
499 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && 680 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
500 (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) 681 (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
@@ -503,19 +684,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
503 data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; 684 data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
504 685
505 if(def != data) 686 if(def != data)
506 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); 687 WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
507} 688}
508 689
509static int mmhub_v1_0_set_clockgating_state(void *handle, 690int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
510 enum amd_clockgating_state state) 691 enum amd_clockgating_state state)
511{ 692{
512 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
513
514 if (amdgpu_sriov_vf(adev)) 693 if (amdgpu_sriov_vf(adev))
515 return 0; 694 return 0;
516 695
517 switch (adev->asic_type) { 696 switch (adev->asic_type) {
518 case CHIP_VEGA10: 697 case CHIP_VEGA10:
698 case CHIP_RAVEN:
519 mmhub_v1_0_update_medium_grain_clock_gating(adev, 699 mmhub_v1_0_update_medium_grain_clock_gating(adev,
520 state == AMD_CG_STATE_GATE ? true : false); 700 state == AMD_CG_STATE_GATE ? true : false);
521 athub_update_medium_grain_clock_gating(adev, 701 athub_update_medium_grain_clock_gating(adev,
@@ -532,54 +712,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle,
532 return 0; 712 return 0;
533} 713}
534 714
535static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags) 715void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
536{ 716{
537 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
538 int data; 717 int data;
539 718
540 if (amdgpu_sriov_vf(adev)) 719 if (amdgpu_sriov_vf(adev))
541 *flags = 0; 720 *flags = 0;
542 721
543 /* AMD_CG_SUPPORT_MC_MGCG */ 722 /* AMD_CG_SUPPORT_MC_MGCG */
544 data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); 723 data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
545 if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) 724 if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
546 *flags |= AMD_CG_SUPPORT_MC_MGCG; 725 *flags |= AMD_CG_SUPPORT_MC_MGCG;
547 726
548 /* AMD_CG_SUPPORT_MC_LS */ 727 /* AMD_CG_SUPPORT_MC_LS */
549 data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); 728 data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);
550 if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) 729 if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
551 *flags |= AMD_CG_SUPPORT_MC_LS; 730 *flags |= AMD_CG_SUPPORT_MC_LS;
552} 731}
553
554static int mmhub_v1_0_set_powergating_state(void *handle,
555 enum amd_powergating_state state)
556{
557 return 0;
558}
559
560const struct amd_ip_funcs mmhub_v1_0_ip_funcs = {
561 .name = "mmhub_v1_0",
562 .early_init = mmhub_v1_0_early_init,
563 .late_init = mmhub_v1_0_late_init,
564 .sw_init = mmhub_v1_0_sw_init,
565 .sw_fini = mmhub_v1_0_sw_fini,
566 .hw_init = mmhub_v1_0_hw_init,
567 .hw_fini = mmhub_v1_0_hw_fini,
568 .suspend = mmhub_v1_0_suspend,
569 .resume = mmhub_v1_0_resume,
570 .is_idle = mmhub_v1_0_is_idle,
571 .wait_for_idle = mmhub_v1_0_wait_for_idle,
572 .soft_reset = mmhub_v1_0_soft_reset,
573 .set_clockgating_state = mmhub_v1_0_set_clockgating_state,
574 .set_powergating_state = mmhub_v1_0_set_powergating_state,
575 .get_clockgating_state = mmhub_v1_0_get_clockgating_state,
576};
577
578const struct amdgpu_ip_block_version mmhub_v1_0_ip_block =
579{
580 .type = AMD_IP_BLOCK_TYPE_MMHUB,
581 .major = 1,
582 .minor = 0,
583 .rev = 0,
584 .funcs = &mmhub_v1_0_ip_funcs,
585};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index aadedf99c028..57bb940c0ecd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -28,6 +28,13 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev);
28void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); 28void mmhub_v1_0_gart_disable(struct amdgpu_device *adev);
29void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, 29void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
30 bool value); 30 bool value);
31void mmhub_v1_0_init(struct amdgpu_device *adev);
32int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
33 enum amd_clockgating_state state);
34void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
35void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev);
36void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
37 bool enable);
31 38
32extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; 39extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs;
33extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; 40extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 1493301b6a94..bde3ca3c21c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
124 r = -ETIME; 124 r = -ETIME;
125 break; 125 break;
126 } 126 }
127 msleep(1); 127 mdelay(5);
128 timeout -= 1; 128 timeout -= 5;
129 129
130 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 130 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
131 mmBIF_BX_PF0_MAILBOX_CONTROL)); 131 mmBIF_BX_PF0_MAILBOX_CONTROL));
@@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
141 r = xgpu_ai_mailbox_rcv_msg(adev, event); 141 r = xgpu_ai_mailbox_rcv_msg(adev, event);
142 while (r) { 142 while (r) {
143 if (timeout <= 0) { 143 if (timeout <= 0) {
144 pr_err("Doesn't get ack from pf.\n"); 144 pr_err("Doesn't get msg:%d from pf.\n", event);
145 r = -ETIME; 145 r = -ETIME;
146 break; 146 break;
147 } 147 }
148 msleep(1); 148 mdelay(5);
149 timeout -= 1; 149 timeout -= 5;
150 150
151 r = xgpu_ai_mailbox_rcv_msg(adev, event); 151 r = xgpu_ai_mailbox_rcv_msg(adev, event);
152 } 152 }
@@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
165 /* start to poll ack */ 165 /* start to poll ack */
166 r = xgpu_ai_poll_ack(adev); 166 r = xgpu_ai_poll_ack(adev);
167 if (r) 167 if (r)
168 return r; 168 pr_err("Doesn't get ack from pf, continue\n");
169 169
170 xgpu_ai_mailbox_set_valid(adev, false); 170 xgpu_ai_mailbox_set_valid(adev, false);
171 171
@@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
174 req == IDH_REQ_GPU_FINI_ACCESS || 174 req == IDH_REQ_GPU_FINI_ACCESS ||
175 req == IDH_REQ_GPU_RESET_ACCESS) { 175 req == IDH_REQ_GPU_RESET_ACCESS) {
176 r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); 176 r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
177 if (r) 177 if (r) {
178 pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");
178 return r; 179 return r;
180 }
179 } 181 }
180 182
181 return 0; 183 return 0;
@@ -241,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
241 } 243 }
242 244
243 /* Trigger recovery due to world switch failure */ 245 /* Trigger recovery due to world switch failure */
244 amdgpu_sriov_gpu_reset(adev, false); 246 amdgpu_sriov_gpu_reset(adev, NULL);
245} 247}
246 248
247static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, 249static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -264,12 +266,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
264{ 266{
265 int r; 267 int r;
266 268
267 /* see what event we get */ 269 /* trigger gpu-reset by hypervisor only if TDR disbaled */
268 r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); 270 if (amdgpu_lockup_timeout == 0) {
271 /* see what event we get */
272 r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
269 273
270 /* only handle FLR_NOTIFY now */ 274 /* only handle FLR_NOTIFY now */
271 if (!r) 275 if (!r)
272 schedule_work(&adev->virt.flr_work); 276 schedule_work(&adev->virt.flr_work);
277 }
273 278
274 return 0; 279 return 0;
275} 280}
@@ -296,11 +301,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
296{ 301{
297 int r; 302 int r;
298 303
299 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); 304 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
300 if (r) 305 if (r)
301 return r; 306 return r;
302 307
303 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); 308 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
304 if (r) { 309 if (r) {
305 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 310 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
306 return r; 311 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 7bdc51b02326..171a658135b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev)
398 r = -ETIME; 398 r = -ETIME;
399 break; 399 break;
400 } 400 }
401 msleep(1); 401 mdelay(5);
402 timeout -= 1; 402 timeout -= 5;
403 403
404 reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); 404 reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
405 } 405 }
@@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
418 r = -ETIME; 418 r = -ETIME;
419 break; 419 break;
420 } 420 }
421 msleep(1); 421 mdelay(5);
422 timeout -= 1; 422 timeout -= 5;
423 423
424 r = xgpu_vi_mailbox_rcv_msg(adev, event); 424 r = xgpu_vi_mailbox_rcv_msg(adev, event);
425 } 425 }
@@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev,
447 request == IDH_REQ_GPU_RESET_ACCESS) { 447 request == IDH_REQ_GPU_RESET_ACCESS) {
448 r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); 448 r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
449 if (r) 449 if (r)
450 return r; 450 pr_err("Doesn't get ack from pf, continue\n");
451 } 451 }
452 452
453 return 0; 453 return 0;
@@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
514 } 514 }
515 515
516 /* Trigger recovery due to world switch failure */ 516 /* Trigger recovery due to world switch failure */
517 amdgpu_sriov_gpu_reset(adev, false); 517 amdgpu_sriov_gpu_reset(adev, NULL);
518} 518}
519 519
520static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, 520static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
537{ 537{
538 int r; 538 int r;
539 539
540 /* see what event we get */ 540 /* trigger gpu-reset by hypervisor only if TDR disbaled */
541 r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); 541 if (amdgpu_lockup_timeout == 0) {
542 /* see what event we get */
543 r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
542 544
543 /* only handle FLR_NOTIFY now */ 545 /* only handle FLR_NOTIFY now */
544 if (!r) 546 if (!r)
545 schedule_work(&adev->virt.flr_work); 547 schedule_work(&adev->virt.flr_work);
548 }
546 549
547 return 0; 550 return 0;
548} 551}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 97057f4a10de..1e272f785def 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -35,7 +35,7 @@
35 35
36u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) 36u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
37{ 37{
38 u32 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0)); 38 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
39 39
40 tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; 40 tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
41 tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; 41 tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -46,32 +46,33 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
46u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, 46u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev,
47 uint32_t idx) 47 uint32_t idx)
48{ 48{
49 return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx); 49 return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx);
50} 50}
51 51
52void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, 52void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev,
53 uint32_t idx, uint32_t val) 53 uint32_t idx, uint32_t val)
54{ 54{
55 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx, val); 55 WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val);
56} 56}
57 57
58void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) 58void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
59{ 59{
60 if (enable) 60 if (enable)
61 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 61 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
62 BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); 62 BIF_FB_EN__FB_READ_EN_MASK |
63 BIF_FB_EN__FB_WRITE_EN_MASK);
63 else 64 else
64 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 0); 65 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
65} 66}
66 67
67void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) 68void nbio_v6_1_hdp_flush(struct amdgpu_device *adev)
68{ 69{
69 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); 70 WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
70} 71}
71 72
72u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) 73u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
73{ 74{
74 return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE)); 75 return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE);
75} 76}
76 77
77static const u32 nbio_sdma_doorbell_range_reg[] = 78static const u32 nbio_sdma_doorbell_range_reg[] =
@@ -97,15 +98,7 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
97void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, 98void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev,
98 bool enable) 99 bool enable)
99{ 100{
100 u32 tmp; 101 WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
101
102 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN));
103 if (enable)
104 tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1);
105 else
106 tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0);
107
108 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN), tmp);
109} 102}
110 103
111void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, 104void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
@@ -115,23 +108,23 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
115 108
116 if (enable) { 109 if (enable) {
117 tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | 110 tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) |
118 REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | 111 REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) |
119 REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); 112 REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0);
120 113
121 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW), 114 WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
122 lower_32_bits(adev->doorbell.base)); 115 lower_32_bits(adev->doorbell.base));
123 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH), 116 WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
124 upper_32_bits(adev->doorbell.base)); 117 upper_32_bits(adev->doorbell.base));
125 } 118 }
126 119
127 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL), tmp); 120 WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp);
128} 121}
129 122
130 123
131void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, 124void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
132 bool use_doorbell, int doorbell_index) 125 bool use_doorbell, int doorbell_index)
133{ 126{
134 u32 ih_doorbell_range = RREG32(SOC15_REG_OFFSET(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE)); 127 u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
135 128
136 if (use_doorbell) { 129 if (use_doorbell) {
137 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); 130 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
@@ -139,7 +132,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,
139 } else 132 } else
140 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); 133 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
141 134
142 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_IH_DOORBELL_RANGE), ih_doorbell_range); 135 WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
143} 136}
144 137
145void nbio_v6_1_ih_control(struct amdgpu_device *adev) 138void nbio_v6_1_ih_control(struct amdgpu_device *adev)
@@ -147,15 +140,15 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev)
147 u32 interrupt_cntl; 140 u32 interrupt_cntl;
148 141
149 /* setup interrupt control */ 142 /* setup interrupt control */
150 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL2), adev->dummy_page.addr >> 8); 143 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
151 interrupt_cntl = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL)); 144 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
152 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 145 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
153 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 146 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
154 */ 147 */
155 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); 148 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
156 /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ 149 /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
157 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); 150 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
158 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL), interrupt_cntl); 151 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
159} 152}
160 153
161void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, 154void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -251,8 +244,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev)
251{ 244{
252 uint32_t reg; 245 uint32_t reg;
253 246
254 reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, 247 reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER);
255 mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER));
256 if (reg & 1) 248 if (reg & 1)
257 adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; 249 adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
258 250
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
new file mode 100644
index 000000000000..aa04632523fa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -0,0 +1,212 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include "amdgpu.h"
24#include "amdgpu_atombios.h"
25#include "nbio_v7_0.h"
26
27#include "vega10/soc15ip.h"
28#include "raven1/NBIO/nbio_7_0_default.h"
29#include "raven1/NBIO/nbio_7_0_offset.h"
30#include "raven1/NBIO/nbio_7_0_sh_mask.h"
31#include "vega10/vega10_enum.h"
32
33#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
34
35u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
36{
37 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
38
39 tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
40 tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
41
42 return tmp;
43}
44
45u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
46 uint32_t idx)
47{
48 return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx);
49}
50
51void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev,
52 uint32_t idx, uint32_t val)
53{
54 WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val);
55}
56
57void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
58{
59 if (enable)
60 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
61 BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
62 else
63 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
64}
65
66void nbio_v7_0_hdp_flush(struct amdgpu_device *adev)
67{
68 WREG32_SOC15(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
69}
70
71u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
72{
73 return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
74}
75
76static const u32 nbio_sdma_doorbell_range_reg[] =
77{
78 SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE),
79 SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE)
80};
81
82void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
83 bool use_doorbell, int doorbell_index)
84{
85 u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]);
86
87 if (use_doorbell) {
88 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
89 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
90 } else
91 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
92
93 WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range);
94}
95
96void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
97 bool enable)
98{
99 WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
100}
101
102void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
103 bool use_doorbell, int doorbell_index)
104{
105 u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
106
107 if (use_doorbell) {
108 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
109 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2);
110 } else
111 ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
112
113 WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
114}
115
116static uint32_t nbio_7_0_read_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset)
117{
118 uint32_t data;
119
120 WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset);
121 data = RREG32_SOC15(NBIO, 0, mmSYSHUB_DATA);
122
123 return data;
124}
125
126static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset,
127 uint32_t data)
128{
129 WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset);
130 WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data);
131}
132
133void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
134 bool enable)
135{
136 uint32_t def, data;
137
138 /* NBIF_MGCG_CTRL_LCLK */
139 def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
140
141 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
142 data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK;
143 else
144 data &= ~NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK;
145
146 if (def != data)
147 WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data);
148
149 /* SYSHUB_MGCG_CTRL_SOCCLK */
150 def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK);
151
152 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
153 data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK;
154 else
155 data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK;
156
157 if (def != data)
158 nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK, data);
159
160 /* SYSHUB_MGCG_CTRL_SHUBCLK */
161 def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK);
162
163 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
164 data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK;
165 else
166 data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK;
167
168 if (def != data)
169 nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data);
170}
171
172void nbio_v7_0_ih_control(struct amdgpu_device *adev)
173{
174 u32 interrupt_cntl;
175
176 /* setup interrupt control */
177 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
178 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
179 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
180 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
181 */
182 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
183 /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
184 interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
185 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
186}
187
188struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
189struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
190
191int nbio_v7_0_init(struct amdgpu_device *adev)
192{
193 nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
194 nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
195 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK;
196 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK;
197 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK;
198 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK;
199 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK;
200 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK;
201 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK;
202 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK;
203 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK;
204 nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK;
205 nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
206 nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
207
208 nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
209 nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
210
211 return 0;
212}
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
new file mode 100644
index 000000000000..054ff49427e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h
@@ -0,0 +1,49 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __NBIO_V7_0_H__
25#define __NBIO_V7_0_H__
26
27#include "soc15_common.h"
28
29extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg;
30extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data;
31int nbio_v7_0_init(struct amdgpu_device *adev);
32u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev,
33 uint32_t idx);
34void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev,
35 uint32_t idx, uint32_t val);
36void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable);
37void nbio_v7_0_hdp_flush(struct amdgpu_device *adev);
38u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev);
39void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
40 bool use_doorbell, int doorbell_index);
41void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,
42 bool enable);
43void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,
44 bool use_doorbell, int doorbell_index);
45void nbio_v7_0_ih_control(struct amdgpu_device *adev);
46u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev);
47void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
48 bool enable);
49#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
new file mode 100644
index 000000000000..2258323a3c26
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -0,0 +1,308 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Author: Huang Rui
23 *
24 */
25
26#include <linux/firmware.h>
27#include "amdgpu.h"
28#include "amdgpu_psp.h"
29#include "amdgpu_ucode.h"
30#include "soc15_common.h"
31#include "psp_v10_0.h"
32
33#include "vega10/soc15ip.h"
34#include "raven1/MP/mp_10_0_offset.h"
35#include "raven1/GC/gc_9_1_offset.h"
36#include "raven1/SDMA0/sdma0_4_1_offset.h"
37
38static int
39psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
40{
41 switch(ucode->ucode_id) {
42 case AMDGPU_UCODE_ID_SDMA0:
43 *type = GFX_FW_TYPE_SDMA0;
44 break;
45 case AMDGPU_UCODE_ID_SDMA1:
46 *type = GFX_FW_TYPE_SDMA1;
47 break;
48 case AMDGPU_UCODE_ID_CP_CE:
49 *type = GFX_FW_TYPE_CP_CE;
50 break;
51 case AMDGPU_UCODE_ID_CP_PFP:
52 *type = GFX_FW_TYPE_CP_PFP;
53 break;
54 case AMDGPU_UCODE_ID_CP_ME:
55 *type = GFX_FW_TYPE_CP_ME;
56 break;
57 case AMDGPU_UCODE_ID_CP_MEC1:
58 *type = GFX_FW_TYPE_CP_MEC;
59 break;
60 case AMDGPU_UCODE_ID_CP_MEC1_JT:
61 *type = GFX_FW_TYPE_CP_MEC_ME1;
62 break;
63 case AMDGPU_UCODE_ID_CP_MEC2:
64 *type = GFX_FW_TYPE_CP_MEC;
65 break;
66 case AMDGPU_UCODE_ID_CP_MEC2_JT:
67 *type = GFX_FW_TYPE_CP_MEC_ME2;
68 break;
69 case AMDGPU_UCODE_ID_RLC_G:
70 *type = GFX_FW_TYPE_RLC_G;
71 break;
72 case AMDGPU_UCODE_ID_SMC:
73 *type = GFX_FW_TYPE_SMU;
74 break;
75 case AMDGPU_UCODE_ID_UVD:
76 *type = GFX_FW_TYPE_UVD;
77 break;
78 case AMDGPU_UCODE_ID_VCE:
79 *type = GFX_FW_TYPE_VCE;
80 break;
81 case AMDGPU_UCODE_ID_MAXIMUM:
82 default:
83 return -EINVAL;
84 }
85
86 return 0;
87}
88
89int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd)
90{
91 int ret;
92 uint64_t fw_mem_mc_addr = ucode->mc_addr;
93 struct common_firmware_header *header;
94
95 memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
96 header = (struct common_firmware_header *)ucode->fw;
97
98 cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
99 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
100 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
101 cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes);
102
103 ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
104 if (ret)
105 DRM_ERROR("Unknown firmware type\n");
106
107 return ret;
108}
109
110int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
111{
112 int ret = 0;
113 unsigned int psp_ring_reg = 0;
114 struct psp_ring *ring;
115 struct amdgpu_device *adev = psp->adev;
116
117 ring = &psp->km_ring;
118
119 ring->ring_type = ring_type;
120
121 /* allocate 4k Page of Local Frame Buffer memory for ring */
122 ring->ring_size = 0x1000;
123 ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
124 AMDGPU_GEM_DOMAIN_VRAM,
125 &adev->firmware.rbuf,
126 &ring->ring_mem_mc_addr,
127 (void **)&ring->ring_mem);
128 if (ret) {
129 ring->ring_size = 0;
130 return ret;
131 }
132
133 /* Write low address of the ring to C2PMSG_69 */
134 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
135 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
136 /* Write high address of the ring to C2PMSG_70 */
137 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
138 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
139 /* Write size of ring to C2PMSG_71 */
140 psp_ring_reg = ring->ring_size;
141 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
142 /* Write the ring initialization command to C2PMSG_64 */
143 psp_ring_reg = ring_type;
144 psp_ring_reg = psp_ring_reg << 16;
145 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
146 /* Wait for response flag (bit 31) in C2PMSG_64 */
147 psp_ring_reg = 0;
148 while ((psp_ring_reg & 0x80000000) == 0) {
149 psp_ring_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64);
150 }
151
152 return 0;
153}
154
155int psp_v10_0_cmd_submit(struct psp_context *psp,
156 struct amdgpu_firmware_info *ucode,
157 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
158 int index)
159{
160 unsigned int psp_write_ptr_reg = 0;
161 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
162 struct psp_ring *ring = &psp->km_ring;
163 struct amdgpu_device *adev = psp->adev;
164
165 /* KM (GPCOM) prepare write pointer */
166 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
167
168 /* Update KM RB frame pointer to new frame */
169 if ((psp_write_ptr_reg % ring->ring_size) == 0)
170 write_frame = ring->ring_mem;
171 else
172 write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4));
173
174 /* Update KM RB frame */
175 write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
176 write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
177 write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
178 write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
179 write_frame->fence_value = index;
180
181 /* Update the write Pointer in DWORDs */
182 psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4;
183 psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg;
184 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
185
186 return 0;
187}
188
189static int
190psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
191 unsigned int *sram_data_reg_offset,
192 enum AMDGPU_UCODE_ID ucode_id)
193{
194 int ret = 0;
195
196 switch(ucode_id) {
197/* TODO: needs to confirm */
198#if 0
199 case AMDGPU_UCODE_ID_SMC:
200 *sram_offset = 0;
201 *sram_addr_reg_offset = 0;
202 *sram_data_reg_offset = 0;
203 break;
204#endif
205
206 case AMDGPU_UCODE_ID_CP_CE:
207 *sram_offset = 0x0;
208 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
209 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
210 break;
211
212 case AMDGPU_UCODE_ID_CP_PFP:
213 *sram_offset = 0x0;
214 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
215 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
216 break;
217
218 case AMDGPU_UCODE_ID_CP_ME:
219 *sram_offset = 0x0;
220 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
221 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
222 break;
223
224 case AMDGPU_UCODE_ID_CP_MEC1:
225 *sram_offset = 0x10000;
226 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
227 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
228 break;
229
230 case AMDGPU_UCODE_ID_CP_MEC2:
231 *sram_offset = 0x10000;
232 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
233 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
234 break;
235
236 case AMDGPU_UCODE_ID_RLC_G:
237 *sram_offset = 0x2000;
238 *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
239 *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
240 break;
241
242 case AMDGPU_UCODE_ID_SDMA0:
243 *sram_offset = 0x0;
244 *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
245 *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
246 break;
247
248/* TODO: needs to confirm */
249#if 0
250 case AMDGPU_UCODE_ID_SDMA1:
251 *sram_offset = ;
252 *sram_addr_reg_offset = ;
253 break;
254
255 case AMDGPU_UCODE_ID_UVD:
256 *sram_offset = ;
257 *sram_addr_reg_offset = ;
258 break;
259
260 case AMDGPU_UCODE_ID_VCE:
261 *sram_offset = ;
262 *sram_addr_reg_offset = ;
263 break;
264#endif
265
266 case AMDGPU_UCODE_ID_MAXIMUM:
267 default:
268 ret = -EINVAL;
269 break;
270 }
271
272 return ret;
273}
274
275bool psp_v10_0_compare_sram_data(struct psp_context *psp,
276 struct amdgpu_firmware_info *ucode,
277 enum AMDGPU_UCODE_ID ucode_type)
278{
279 int err = 0;
280 unsigned int fw_sram_reg_val = 0;
281 unsigned int fw_sram_addr_reg_offset = 0;
282 unsigned int fw_sram_data_reg_offset = 0;
283 unsigned int ucode_size;
284 uint32_t *ucode_mem = NULL;
285 struct amdgpu_device *adev = psp->adev;
286
287 err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset,
288 &fw_sram_data_reg_offset, ucode_type);
289 if (err)
290 return false;
291
292 WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
293
294 ucode_size = ucode->ucode_size;
295 ucode_mem = (uint32_t *)ucode->kaddr;
296 while (!ucode_size) {
297 fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
298
299 if (*ucode_mem != fw_sram_reg_val)
300 return false;
301
302 ucode_mem++;
303 /* 4 bytes */
304 ucode_size -= 4;
305 }
306
307 return true;
308}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
new file mode 100644
index 000000000000..2022b7b7151e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Author: Huang Rui
23 *
24 */
25#ifndef __PSP_V10_0_H__
26#define __PSP_V10_0_H__
27
28#include "amdgpu_psp.h"
29
30extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
31 struct psp_gfx_cmd_resp *cmd);
32extern int psp_v10_0_ring_init(struct psp_context *psp,
33 enum psp_ring_type ring_type);
34extern int psp_v10_0_cmd_submit(struct psp_context *psp,
35 struct amdgpu_firmware_info *ucode,
36 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
37 int index);
38extern bool psp_v10_0_compare_sram_data(struct psp_context *psp,
39 struct amdgpu_firmware_info *ucode,
40 enum AMDGPU_UCODE_ID ucode_type);
41#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 60a6407ba267..c98d77d0c8f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26#include <linux/firmware.h> 26#include <linux/firmware.h>
27#include "drmP.h" 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_psp.h" 29#include "amdgpu_psp.h"
30#include "amdgpu_ucode.h" 30#include "amdgpu_ucode.h"
@@ -172,7 +172,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
172 /* Check sOS sign of life register to confirm sys driver and sOS 172 /* Check sOS sign of life register to confirm sys driver and sOS
173 * are already been loaded. 173 * are already been loaded.
174 */ 174 */
175 sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); 175 sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
176 if (sol_reg) 176 if (sol_reg)
177 return 0; 177 return 0;
178 178
@@ -188,10 +188,10 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
188 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); 188 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
189 189
190 /* Provide the sys driver to bootrom */ 190 /* Provide the sys driver to bootrom */
191 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), 191 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
192 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 192 (uint32_t)(psp->fw_pri_mc_addr >> 20));
193 psp_gfxdrv_command_reg = 1 << 16; 193 psp_gfxdrv_command_reg = 1 << 16;
194 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 194 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
195 psp_gfxdrv_command_reg); 195 psp_gfxdrv_command_reg);
196 196
197 /* there might be handshake issue with hardware which needs delay */ 197 /* there might be handshake issue with hardware which needs delay */
@@ -213,7 +213,7 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
213 /* Check sOS sign of life register to confirm sys driver and sOS 213 /* Check sOS sign of life register to confirm sys driver and sOS
214 * are already been loaded. 214 * are already been loaded.
215 */ 215 */
216 sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); 216 sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
217 if (sol_reg) 217 if (sol_reg)
218 return 0; 218 return 0;
219 219
@@ -229,17 +229,17 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
229 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); 229 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
230 230
231 /* Provide the PSP secure OS to bootrom */ 231 /* Provide the PSP secure OS to bootrom */
232 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), 232 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
233 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 233 (uint32_t)(psp->fw_pri_mc_addr >> 20));
234 psp_gfxdrv_command_reg = 2 << 16; 234 psp_gfxdrv_command_reg = 2 << 16;
235 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 235 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
236 psp_gfxdrv_command_reg); 236 psp_gfxdrv_command_reg);
237 237
238 /* there might be handshake issue with hardware which needs delay */ 238 /* there might be handshake issue with hardware which needs delay */
239 mdelay(20); 239 mdelay(20);
240#if 0 240#if 0
241 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), 241 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
242 RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), 242 RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
243 0, true); 243 0, true);
244#endif 244#endif
245 245
@@ -254,8 +254,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
254 memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); 254 memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
255 255
256 cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; 256 cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
257 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; 257 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
258 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); 258 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
259 cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; 259 cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
260 260
261 ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); 261 ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
@@ -299,17 +299,17 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
299 299
300 /* Write low address of the ring to C2PMSG_69 */ 300 /* Write low address of the ring to C2PMSG_69 */
301 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); 301 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
302 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); 302 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
303 /* Write high address of the ring to C2PMSG_70 */ 303 /* Write high address of the ring to C2PMSG_70 */
304 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); 304 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
305 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); 305 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
306 /* Write size of ring to C2PMSG_71 */ 306 /* Write size of ring to C2PMSG_71 */
307 psp_ring_reg = ring->ring_size; 307 psp_ring_reg = ring->ring_size;
308 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); 308 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
309 /* Write the ring initialization command to C2PMSG_64 */ 309 /* Write the ring initialization command to C2PMSG_64 */
310 psp_ring_reg = ring_type; 310 psp_ring_reg = ring_type;
311 psp_ring_reg = psp_ring_reg << 16; 311 psp_ring_reg = psp_ring_reg << 16;
312 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); 312 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
313 313
314 /* there might be handshake issue with hardware which needs delay */ 314 /* there might be handshake issue with hardware which needs delay */
315 mdelay(20); 315 mdelay(20);
@@ -332,7 +332,7 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
332 332
333 /* Write the ring destroy command to C2PMSG_64 */ 333 /* Write the ring destroy command to C2PMSG_64 */
334 psp_ring_reg = 3 << 16; 334 psp_ring_reg = 3 << 16;
335 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); 335 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
336 336
337 /* there might be handshake issue with hardware which needs delay */ 337 /* there might be handshake issue with hardware which needs delay */
338 mdelay(20); 338 mdelay(20);
@@ -361,7 +361,7 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
361 uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; 361 uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
362 362
363 /* KM (GPCOM) prepare write pointer */ 363 /* KM (GPCOM) prepare write pointer */
364 psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); 364 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
365 365
366 /* Update KM RB frame pointer to new frame */ 366 /* Update KM RB frame pointer to new frame */
367 /* write_frame ptr increments by size of rb_frame in bytes */ 367 /* write_frame ptr increments by size of rb_frame in bytes */
@@ -375,15 +375,15 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
375 memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); 375 memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
376 376
377 /* Update KM RB frame */ 377 /* Update KM RB frame */
378 write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); 378 write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
379 write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); 379 write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
380 write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); 380 write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
381 write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); 381 write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
382 write_frame->fence_value = index; 382 write_frame->fence_value = index;
383 383
384 /* Update the write Pointer in DWORDs */ 384 /* Update the write Pointer in DWORDs */
385 psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; 385 psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
386 WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); 386 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
387 387
388 return 0; 388 return 0;
389} 389}
@@ -515,7 +515,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
515 uint32_t reg; 515 uint32_t reg;
516 516
517 reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; 517 reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000;
518 WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg); 518 WREG32_SOC15(NBIO, 0, mmPCIE_INDEX2, reg);
519 reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); 519 reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2);
520 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; 520 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
521} 521}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index a69e5d4e1d2a..1d766ae98dc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -643,8 +643,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
643 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 643 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
644 644
645 /* Initialize the ring buffer's read and write pointers */ 645 /* Initialize the ring buffer's read and write pointers */
646 ring->wptr = 0;
646 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); 647 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
647 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); 648 sdma_v3_0_ring_set_wptr(ring);
648 WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); 649 WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
649 WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); 650 WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
650 651
@@ -659,9 +660,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
659 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); 660 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
660 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); 661 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
661 662
662 ring->wptr = 0;
663 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
664
665 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); 663 doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]);
666 664
667 if (ring->use_doorbell) { 665 if (ring->use_doorbell) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index ecc70a730a54..4a65697ccc94 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -35,6 +35,7 @@
35#include "vega10/MMHUB/mmhub_1_0_offset.h" 35#include "vega10/MMHUB/mmhub_1_0_offset.h"
36#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" 36#include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
37#include "vega10/HDP/hdp_4_0_offset.h" 37#include "vega10/HDP/hdp_4_0_offset.h"
38#include "raven1/SDMA0/sdma0_4_1_default.h"
38 39
39#include "soc15_common.h" 40#include "soc15_common.h"
40#include "soc15.h" 41#include "soc15.h"
@@ -42,6 +43,10 @@
42 43
43MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); 44MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
44MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); 45MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
46MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
47
48#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
49#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
45 50
46static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); 51static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
47static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); 52static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
@@ -82,6 +87,26 @@ static const u32 golden_settings_sdma_vg10[] = {
82 SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 87 SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002
83}; 88};
84 89
90static const u32 golden_settings_sdma_4_1[] =
91{
92 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07,
93 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100,
94 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100,
95 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
96 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051,
97 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100,
98 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
99 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100,
100 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
101 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0
102};
103
104static const u32 golden_settings_sdma_rv1[] =
105{
106 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002,
107 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002
108};
109
85static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) 110static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset)
86{ 111{
87 u32 base = 0; 112 u32 base = 0;
@@ -112,25 +137,19 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
112 golden_settings_sdma_vg10, 137 golden_settings_sdma_vg10,
113 (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); 138 (const u32)ARRAY_SIZE(golden_settings_sdma_vg10));
114 break; 139 break;
140 case CHIP_RAVEN:
141 amdgpu_program_register_sequence(adev,
142 golden_settings_sdma_4_1,
143 (const u32)ARRAY_SIZE(golden_settings_sdma_4_1));
144 amdgpu_program_register_sequence(adev,
145 golden_settings_sdma_rv1,
146 (const u32)ARRAY_SIZE(golden_settings_sdma_rv1));
147 break;
115 default: 148 default:
116 break; 149 break;
117 } 150 }
118} 151}
119 152
120static void sdma_v4_0_print_ucode_regs(void *handle)
121{
122 int i;
123 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
124
125 dev_info(adev->dev, "VEGA10 SDMA ucode registers\n");
126 for (i = 0; i < adev->sdma.num_instances; i++) {
127 dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n",
128 i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR)));
129 dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n",
130 i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM)));
131 }
132}
133
134/** 153/**
135 * sdma_v4_0_init_microcode - load ucode images from disk 154 * sdma_v4_0_init_microcode - load ucode images from disk
136 * 155 *
@@ -158,6 +177,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
158 case CHIP_VEGA10: 177 case CHIP_VEGA10:
159 chip_name = "vega10"; 178 chip_name = "vega10";
160 break; 179 break;
180 case CHIP_RAVEN:
181 chip_name = "raven";
182 break;
161 default: 183 default:
162 BUG(); 184 BUG();
163 } 185 }
@@ -350,7 +372,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
350 u32 ref_and_mask = 0; 372 u32 ref_and_mask = 0;
351 struct nbio_hdp_flush_reg *nbio_hf_reg; 373 struct nbio_hdp_flush_reg *nbio_hf_reg;
352 374
353 if (ring->adev->asic_type == CHIP_VEGA10) 375 if (ring->adev->flags & AMD_IS_APU)
376 nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
377 else
354 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; 378 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
355 379
356 if (ring == &ring->adev->sdma.instance[0].ring) 380 if (ring == &ring->adev->sdma.instance[0].ring)
@@ -581,7 +605,10 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
581 } 605 }
582 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); 606 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell);
583 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); 607 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
584 nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); 608 if (adev->flags & AMD_IS_APU)
609 nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index);
610 else
611 nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index);
585 612
586 if (amdgpu_sriov_vf(adev)) 613 if (amdgpu_sriov_vf(adev))
587 sdma_v4_0_ring_set_wptr(ring); 614 sdma_v4_0_ring_set_wptr(ring);
@@ -633,6 +660,69 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
633 return 0; 660 return 0;
634} 661}
635 662
663static void
664sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
665{
666 uint32_t def, data;
667
668 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
669 /* disable idle interrupt */
670 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
671 data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
672
673 if (data != def)
674 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
675 } else {
676 /* disable idle interrupt */
677 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
678 data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
679 if (data != def)
680 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
681 }
682}
683
684static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev)
685{
686 uint32_t def, data;
687
688 /* Enable HW based PG. */
689 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
690 data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK;
691 if (data != def)
692 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
693
694 /* enable interrupt */
695 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
696 data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
697 if (data != def)
698 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data);
699
700 /* Configure hold time to filter in-valid power on/off request. Use default right now */
701 def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL));
702 data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK;
703 data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK);
704 /* Configure switch time for hysteresis purpose. Use default right now */
705 data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK;
706 data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK);
707 if(data != def)
708 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
709}
710
711static void sdma_v4_0_init_pg(struct amdgpu_device *adev)
712{
713 if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA))
714 return;
715
716 switch (adev->asic_type) {
717 case CHIP_RAVEN:
718 sdma_v4_1_init_power_gating(adev);
719 sdma_v4_1_update_power_gating(adev, true);
720 break;
721 default:
722 break;
723 }
724}
725
636/** 726/**
637 * sdma_v4_0_rlc_resume - setup and start the async dma engines 727 * sdma_v4_0_rlc_resume - setup and start the async dma engines
638 * 728 *
@@ -643,7 +733,8 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
643 */ 733 */
644static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) 734static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev)
645{ 735{
646 /* XXX todo */ 736 sdma_v4_0_init_pg(adev);
737
647 return 0; 738 return 0;
648} 739}
649 740
@@ -699,8 +790,6 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
699 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 790 WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
700 } 791 }
701 792
702 sdma_v4_0_print_ucode_regs(adev);
703
704 return 0; 793 return 0;
705} 794}
706 795
@@ -726,7 +815,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
726 } 815 }
727 816
728 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 817 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
729 DRM_INFO("Loading via direct write\n");
730 r = sdma_v4_0_load_microcode(adev); 818 r = sdma_v4_0_load_microcode(adev);
731 if (r) 819 if (r)
732 return r; 820 return r;
@@ -764,8 +852,6 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
764 u32 tmp; 852 u32 tmp;
765 u64 gpu_addr; 853 u64 gpu_addr;
766 854
767 DRM_INFO("In Ring test func\n");
768
769 r = amdgpu_wb_get(adev, &index); 855 r = amdgpu_wb_get(adev, &index);
770 if (r) { 856 if (r) {
771 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 857 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
@@ -1038,9 +1124,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1038 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 1124 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
1039 unsigned eng = ring->vm_inv_eng; 1125 unsigned eng = ring->vm_inv_eng;
1040 1126
1041 pd_addr = pd_addr | 0x1; /* valid bit */ 1127 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
1042 /* now only use physical base address of PDE and valid */ 1128 pd_addr |= AMDGPU_PTE_VALID;
1043 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
1044 1129
1045 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1130 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1046 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1131 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
@@ -1074,7 +1159,10 @@ static int sdma_v4_0_early_init(void *handle)
1074{ 1159{
1075 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1160 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1076 1161
1077 adev->sdma.num_instances = 2; 1162 if (adev->asic_type == CHIP_RAVEN)
1163 adev->sdma.num_instances = 1;
1164 else
1165 adev->sdma.num_instances = 2;
1078 1166
1079 sdma_v4_0_set_ring_funcs(adev); 1167 sdma_v4_0_set_ring_funcs(adev);
1080 sdma_v4_0_set_buffer_funcs(adev); 1168 sdma_v4_0_set_buffer_funcs(adev);
@@ -1406,6 +1494,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
1406 1494
1407 switch (adev->asic_type) { 1495 switch (adev->asic_type) {
1408 case CHIP_VEGA10: 1496 case CHIP_VEGA10:
1497 case CHIP_RAVEN:
1409 sdma_v4_0_update_medium_grain_clock_gating(adev, 1498 sdma_v4_0_update_medium_grain_clock_gating(adev,
1410 state == AMD_CG_STATE_GATE ? true : false); 1499 state == AMD_CG_STATE_GATE ? true : false);
1411 sdma_v4_0_update_medium_grain_light_sleep(adev, 1500 sdma_v4_0_update_medium_grain_light_sleep(adev,
@@ -1420,6 +1509,17 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
1420static int sdma_v4_0_set_powergating_state(void *handle, 1509static int sdma_v4_0_set_powergating_state(void *handle,
1421 enum amd_powergating_state state) 1510 enum amd_powergating_state state)
1422{ 1511{
1512 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1513
1514 switch (adev->asic_type) {
1515 case CHIP_RAVEN:
1516 sdma_v4_1_update_power_gating(adev,
1517 state == AMD_PG_STATE_GATE ? true : false);
1518 break;
1519 default:
1520 break;
1521 }
1522
1423 return 0; 1523 return 0;
1424} 1524}
1425 1525
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index c0b1aabf282f..f45fb0f022b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -24,7 +24,7 @@
24#include <linux/firmware.h> 24#include <linux/firmware.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include "drmP.h" 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_atombios.h" 29#include "amdgpu_atombios.h"
30#include "amdgpu_ih.h" 30#include "amdgpu_ih.h"
@@ -971,44 +971,44 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
971} 971}
972 972
973static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { 973static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
974 {GRBM_STATUS, false}, 974 {GRBM_STATUS},
975 {GB_ADDR_CONFIG, false}, 975 {GB_ADDR_CONFIG},
976 {MC_ARB_RAMCFG, false}, 976 {MC_ARB_RAMCFG},
977 {GB_TILE_MODE0, false}, 977 {GB_TILE_MODE0},
978 {GB_TILE_MODE1, false}, 978 {GB_TILE_MODE1},
979 {GB_TILE_MODE2, false}, 979 {GB_TILE_MODE2},
980 {GB_TILE_MODE3, false}, 980 {GB_TILE_MODE3},
981 {GB_TILE_MODE4, false}, 981 {GB_TILE_MODE4},
982 {GB_TILE_MODE5, false}, 982 {GB_TILE_MODE5},
983 {GB_TILE_MODE6, false}, 983 {GB_TILE_MODE6},
984 {GB_TILE_MODE7, false}, 984 {GB_TILE_MODE7},
985 {GB_TILE_MODE8, false}, 985 {GB_TILE_MODE8},
986 {GB_TILE_MODE9, false}, 986 {GB_TILE_MODE9},
987 {GB_TILE_MODE10, false}, 987 {GB_TILE_MODE10},
988 {GB_TILE_MODE11, false}, 988 {GB_TILE_MODE11},
989 {GB_TILE_MODE12, false}, 989 {GB_TILE_MODE12},
990 {GB_TILE_MODE13, false}, 990 {GB_TILE_MODE13},
991 {GB_TILE_MODE14, false}, 991 {GB_TILE_MODE14},
992 {GB_TILE_MODE15, false}, 992 {GB_TILE_MODE15},
993 {GB_TILE_MODE16, false}, 993 {GB_TILE_MODE16},
994 {GB_TILE_MODE17, false}, 994 {GB_TILE_MODE17},
995 {GB_TILE_MODE18, false}, 995 {GB_TILE_MODE18},
996 {GB_TILE_MODE19, false}, 996 {GB_TILE_MODE19},
997 {GB_TILE_MODE20, false}, 997 {GB_TILE_MODE20},
998 {GB_TILE_MODE21, false}, 998 {GB_TILE_MODE21},
999 {GB_TILE_MODE22, false}, 999 {GB_TILE_MODE22},
1000 {GB_TILE_MODE23, false}, 1000 {GB_TILE_MODE23},
1001 {GB_TILE_MODE24, false}, 1001 {GB_TILE_MODE24},
1002 {GB_TILE_MODE25, false}, 1002 {GB_TILE_MODE25},
1003 {GB_TILE_MODE26, false}, 1003 {GB_TILE_MODE26},
1004 {GB_TILE_MODE27, false}, 1004 {GB_TILE_MODE27},
1005 {GB_TILE_MODE28, false}, 1005 {GB_TILE_MODE28},
1006 {GB_TILE_MODE29, false}, 1006 {GB_TILE_MODE29},
1007 {GB_TILE_MODE30, false}, 1007 {GB_TILE_MODE30},
1008 {GB_TILE_MODE31, false}, 1008 {GB_TILE_MODE31},
1009 {CC_RB_BACKEND_DISABLE, false, true}, 1009 {CC_RB_BACKEND_DISABLE, true},
1010 {GC_USER_RB_BACKEND_DISABLE, false, true}, 1010 {GC_USER_RB_BACKEND_DISABLE, true},
1011 {PA_SC_RASTER_CONFIG, false, true}, 1011 {PA_SC_RASTER_CONFIG, true},
1012}; 1012};
1013 1013
1014static uint32_t si_get_register_value(struct amdgpu_device *adev, 1014static uint32_t si_get_register_value(struct amdgpu_device *adev,
@@ -1093,13 +1093,13 @@ static int si_read_register(struct amdgpu_device *adev, u32 se_num,
1093 1093
1094 *value = 0; 1094 *value = 0;
1095 for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) { 1095 for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) {
1096 bool indexed = si_allowed_read_registers[i].grbm_indexed;
1097
1096 if (reg_offset != si_allowed_read_registers[i].reg_offset) 1098 if (reg_offset != si_allowed_read_registers[i].reg_offset)
1097 continue; 1099 continue;
1098 1100
1099 if (!si_allowed_read_registers[i].untouched) 1101 *value = si_get_register_value(adev, indexed, se_num, sh_num,
1100 *value = si_get_register_value(adev, 1102 reg_offset);
1101 si_allowed_read_registers[i].grbm_indexed,
1102 se_num, sh_num, reg_offset);
1103 return 0; 1103 return 0;
1104 } 1104 }
1105 return -EINVAL; 1105 return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 7c1c5d127281..a7ad8390981c 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23 23
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_pm.h" 26#include "amdgpu_pm.h"
27#include "amdgpu_dpm.h" 27#include "amdgpu_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index e66084211c74..ce25e03a077d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "sid.h" 26#include "sid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c
index 0726bc3b6f90..4a2fd8b61940 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c
@@ -23,7 +23,7 @@
23 */ 23 */
24 24
25#include <linux/firmware.h> 25#include <linux/firmware.h>
26#include "drmP.h" 26#include <drm/drmP.h>
27#include "amdgpu.h" 27#include "amdgpu.h"
28#include "sid.h" 28#include "sid.h"
29#include "ppsmc.h" 29#include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6b55d451ae7f..a7341d88a320 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -23,7 +23,7 @@
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include "drmP.h" 26#include <drm/drmP.h>
27#include "amdgpu.h" 27#include "amdgpu.h"
28#include "amdgpu_atomfirmware.h" 28#include "amdgpu_atomfirmware.h"
29#include "amdgpu_ih.h" 29#include "amdgpu_ih.h"
@@ -57,6 +57,7 @@
57#include "sdma_v4_0.h" 57#include "sdma_v4_0.h"
58#include "uvd_v7_0.h" 58#include "uvd_v7_0.h"
59#include "vce_v4_0.h" 59#include "vce_v4_0.h"
60#include "vcn_v1_0.h"
60#include "amdgpu_powerplay.h" 61#include "amdgpu_powerplay.h"
61#include "dce_virtual.h" 62#include "dce_virtual.h"
62#include "mxgpu_ai.h" 63#include "mxgpu_ai.h"
@@ -104,10 +105,10 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
104 u32 r; 105 u32 r;
105 struct nbio_pcie_index_data *nbio_pcie_id; 106 struct nbio_pcie_index_data *nbio_pcie_id;
106 107
107 if (adev->asic_type == CHIP_VEGA10) 108 if (adev->flags & AMD_IS_APU)
108 nbio_pcie_id = &nbio_v6_1_pcie_index_data; 109 nbio_pcie_id = &nbio_v7_0_pcie_index_data;
109 else 110 else
110 BUG(); 111 nbio_pcie_id = &nbio_v6_1_pcie_index_data;
111 112
112 address = nbio_pcie_id->index_offset; 113 address = nbio_pcie_id->index_offset;
113 data = nbio_pcie_id->data_offset; 114 data = nbio_pcie_id->data_offset;
@@ -125,10 +126,10 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
125 unsigned long flags, address, data; 126 unsigned long flags, address, data;
126 struct nbio_pcie_index_data *nbio_pcie_id; 127 struct nbio_pcie_index_data *nbio_pcie_id;
127 128
128 if (adev->asic_type == CHIP_VEGA10) 129 if (adev->flags & AMD_IS_APU)
129 nbio_pcie_id = &nbio_v6_1_pcie_index_data; 130 nbio_pcie_id = &nbio_v7_0_pcie_index_data;
130 else 131 else
131 BUG(); 132 nbio_pcie_id = &nbio_v6_1_pcie_index_data;
132 133
133 address = nbio_pcie_id->index_offset; 134 address = nbio_pcie_id->index_offset;
134 data = nbio_pcie_id->data_offset; 135 data = nbio_pcie_id->data_offset;
@@ -199,13 +200,20 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
199 200
200static u32 soc15_get_config_memsize(struct amdgpu_device *adev) 201static u32 soc15_get_config_memsize(struct amdgpu_device *adev)
201{ 202{
202 return nbio_v6_1_get_memsize(adev); 203 if (adev->flags & AMD_IS_APU)
204 return nbio_v7_0_get_memsize(adev);
205 else
206 return nbio_v6_1_get_memsize(adev);
203} 207}
204 208
205static const u32 vega10_golden_init[] = 209static const u32 vega10_golden_init[] =
206{ 210{
207}; 211};
208 212
213static const u32 raven_golden_init[] =
214{
215};
216
209static void soc15_init_golden_registers(struct amdgpu_device *adev) 217static void soc15_init_golden_registers(struct amdgpu_device *adev)
210{ 218{
211 /* Some of the registers might be dependent on GRBM_GFX_INDEX */ 219 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
@@ -217,6 +225,11 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev)
217 vega10_golden_init, 225 vega10_golden_init,
218 (const u32)ARRAY_SIZE(vega10_golden_init)); 226 (const u32)ARRAY_SIZE(vega10_golden_init));
219 break; 227 break;
228 case CHIP_RAVEN:
229 amdgpu_program_register_sequence(adev,
230 raven_golden_init,
231 (const u32)ARRAY_SIZE(raven_golden_init));
232 break;
220 default: 233 default:
221 break; 234 break;
222 } 235 }
@@ -280,29 +293,25 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
280 return true; 293 return true;
281} 294}
282 295
283static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = {
284 /* todo */
285};
286
287static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { 296static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = {
288 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS), false}, 297 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)},
289 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2), false}, 298 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)},
290 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0), false}, 299 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)},
291 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1), false}, 300 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)},
292 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2), false}, 301 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)},
293 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3), false}, 302 { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)},
294 { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG), false}, 303 { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)},
295 { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG), false}, 304 { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)},
296 { SOC15_REG_OFFSET(GC, 0, mmCP_STAT), false}, 305 { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)},
297 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1), false}, 306 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)},
298 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2), false}, 307 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)},
299 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3), false}, 308 { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)},
300 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT), false}, 309 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)},
301 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1), false}, 310 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)},
302 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS), false}, 311 { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)},
303 { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1), false}, 312 { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)},
304 { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS), false}, 313 { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)},
305 { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), false}, 314 { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)},
306}; 315};
307 316
308static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, 317static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -341,41 +350,16 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
341static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, 350static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
342 u32 sh_num, u32 reg_offset, u32 *value) 351 u32 sh_num, u32 reg_offset, u32 *value)
343{ 352{
344 struct amdgpu_allowed_register_entry *asic_register_table = NULL; 353 uint32_t i;
345 struct amdgpu_allowed_register_entry *asic_register_entry;
346 uint32_t size, i;
347 354
348 *value = 0; 355 *value = 0;
349 switch (adev->asic_type) {
350 case CHIP_VEGA10:
351 asic_register_table = vega10_allowed_read_registers;
352 size = ARRAY_SIZE(vega10_allowed_read_registers);
353 break;
354 default:
355 return -EINVAL;
356 }
357
358 if (asic_register_table) {
359 for (i = 0; i < size; i++) {
360 asic_register_entry = asic_register_table + i;
361 if (reg_offset != asic_register_entry->reg_offset)
362 continue;
363 if (!asic_register_entry->untouched)
364 *value = soc15_get_register_value(adev,
365 asic_register_entry->grbm_indexed,
366 se_num, sh_num, reg_offset);
367 return 0;
368 }
369 }
370
371 for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { 356 for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) {
372 if (reg_offset != soc15_allowed_read_registers[i].reg_offset) 357 if (reg_offset != soc15_allowed_read_registers[i].reg_offset)
373 continue; 358 continue;
374 359
375 if (!soc15_allowed_read_registers[i].untouched) 360 *value = soc15_get_register_value(adev,
376 *value = soc15_get_register_value(adev, 361 soc15_allowed_read_registers[i].grbm_indexed,
377 soc15_allowed_read_registers[i].grbm_indexed, 362 se_num, sh_num, reg_offset);
378 se_num, sh_num, reg_offset);
379 return 0; 363 return 0;
380 } 364 }
381 return -EINVAL; 365 return -EINVAL;
@@ -396,7 +380,10 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
396 380
397 /* wait for asic to come out of reset */ 381 /* wait for asic to come out of reset */
398 for (i = 0; i < adev->usec_timeout; i++) { 382 for (i = 0; i < adev->usec_timeout; i++) {
399 if (nbio_v6_1_get_memsize(adev) != 0xffffffff) 383 u32 memsize = (adev->flags & AMD_IS_APU) ?
384 nbio_v7_0_get_memsize(adev) :
385 nbio_v6_1_get_memsize(adev);
386 if (memsize != 0xffffffff)
400 break; 387 break;
401 udelay(1); 388 udelay(1);
402 } 389 }
@@ -470,8 +457,12 @@ static void soc15_program_aspm(struct amdgpu_device *adev)
470static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, 457static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev,
471 bool enable) 458 bool enable)
472{ 459{
473 nbio_v6_1_enable_doorbell_aperture(adev, enable); 460 if (adev->flags & AMD_IS_APU) {
474 nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); 461 nbio_v7_0_enable_doorbell_aperture(adev, enable);
462 } else {
463 nbio_v6_1_enable_doorbell_aperture(adev, enable);
464 nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable);
465 }
475} 466}
476 467
477static const struct amdgpu_ip_block_version vega10_common_ip_block = 468static const struct amdgpu_ip_block_version vega10_common_ip_block =
@@ -493,8 +484,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
493 switch (adev->asic_type) { 484 switch (adev->asic_type) {
494 case CHIP_VEGA10: 485 case CHIP_VEGA10:
495 amdgpu_ip_block_add(adev, &vega10_common_ip_block); 486 amdgpu_ip_block_add(adev, &vega10_common_ip_block);
496 amdgpu_ip_block_add(adev, &gfxhub_v1_0_ip_block);
497 amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block);
498 amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); 487 amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block);
499 amdgpu_ip_block_add(adev, &vega10_ih_ip_block); 488 amdgpu_ip_block_add(adev, &vega10_ih_ip_block);
500 if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) 489 if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1)
@@ -508,6 +497,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
508 amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); 497 amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block);
509 amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); 498 amdgpu_ip_block_add(adev, &vce_v4_0_ip_block);
510 break; 499 break;
500 case CHIP_RAVEN:
501 amdgpu_ip_block_add(adev, &vega10_common_ip_block);
502 amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block);
503 amdgpu_ip_block_add(adev, &vega10_ih_ip_block);
504 amdgpu_ip_block_add(adev, &psp_v10_0_ip_block);
505 amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
506 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
507 amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
508 amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block);
509 amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block);
510 amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block);
511 break;
511 default: 512 default:
512 return -EINVAL; 513 return -EINVAL;
513 } 514 }
@@ -517,7 +518,10 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
517 518
518static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) 519static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
519{ 520{
520 return nbio_v6_1_get_rev_id(adev); 521 if (adev->flags & AMD_IS_APU)
522 return nbio_v7_0_get_rev_id(adev);
523 else
524 return nbio_v6_1_get_rev_id(adev);
521} 525}
522 526
523 527
@@ -560,11 +564,6 @@ static int soc15_common_early_init(void *handle)
560 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) 564 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
561 psp_enabled = true; 565 psp_enabled = true;
562 566
563 if (amdgpu_sriov_vf(adev)) {
564 amdgpu_virt_init_setting(adev);
565 xgpu_ai_mailbox_set_irq_funcs(adev);
566 }
567
568 /* 567 /*
569 * nbio need be used for both sdma and gfx9, but only 568 * nbio need be used for both sdma and gfx9, but only
570 * initializes once 569 * initializes once
@@ -573,6 +572,9 @@ static int soc15_common_early_init(void *handle)
573 case CHIP_VEGA10: 572 case CHIP_VEGA10:
574 nbio_v6_1_init(adev); 573 nbio_v6_1_init(adev);
575 break; 574 break;
575 case CHIP_RAVEN:
576 nbio_v7_0_init(adev);
577 break;
576 default: 578 default:
577 return -EINVAL; 579 return -EINVAL;
578 } 580 }
@@ -603,11 +605,40 @@ static int soc15_common_early_init(void *handle)
603 adev->pg_flags = 0; 605 adev->pg_flags = 0;
604 adev->external_rev_id = 0x1; 606 adev->external_rev_id = 0x1;
605 break; 607 break;
608 case CHIP_RAVEN:
609 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
610 AMD_CG_SUPPORT_GFX_MGLS |
611 AMD_CG_SUPPORT_GFX_RLC_LS |
612 AMD_CG_SUPPORT_GFX_CP_LS |
613 AMD_CG_SUPPORT_GFX_3D_CGCG |
614 AMD_CG_SUPPORT_GFX_3D_CGLS |
615 AMD_CG_SUPPORT_GFX_CGCG |
616 AMD_CG_SUPPORT_GFX_CGLS |
617 AMD_CG_SUPPORT_BIF_MGCG |
618 AMD_CG_SUPPORT_BIF_LS |
619 AMD_CG_SUPPORT_HDP_MGCG |
620 AMD_CG_SUPPORT_HDP_LS |
621 AMD_CG_SUPPORT_DRM_MGCG |
622 AMD_CG_SUPPORT_DRM_LS |
623 AMD_CG_SUPPORT_ROM_MGCG |
624 AMD_CG_SUPPORT_MC_MGCG |
625 AMD_CG_SUPPORT_MC_LS |
626 AMD_CG_SUPPORT_SDMA_MGCG |
627 AMD_CG_SUPPORT_SDMA_LS;
628 adev->pg_flags = AMD_PG_SUPPORT_SDMA |
629 AMD_PG_SUPPORT_MMHUB;
630 adev->external_rev_id = 0x1;
631 break;
606 default: 632 default:
607 /* FIXME: not supported yet */ 633 /* FIXME: not supported yet */
608 return -EINVAL; 634 return -EINVAL;
609 } 635 }
610 636
637 if (amdgpu_sriov_vf(adev)) {
638 amdgpu_virt_init_setting(adev);
639 xgpu_ai_mailbox_set_irq_funcs(adev);
640 }
641
611 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 642 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
612 643
613 amdgpu_get_pcie_info(adev); 644 amdgpu_get_pcie_info(adev);
@@ -825,6 +856,20 @@ static int soc15_common_set_clockgating_state(void *handle,
825 soc15_update_df_medium_grain_clock_gating(adev, 856 soc15_update_df_medium_grain_clock_gating(adev,
826 state == AMD_CG_STATE_GATE ? true : false); 857 state == AMD_CG_STATE_GATE ? true : false);
827 break; 858 break;
859 case CHIP_RAVEN:
860 nbio_v7_0_update_medium_grain_clock_gating(adev,
861 state == AMD_CG_STATE_GATE ? true : false);
862 nbio_v6_1_update_medium_grain_light_sleep(adev,
863 state == AMD_CG_STATE_GATE ? true : false);
864 soc15_update_hdp_light_sleep(adev,
865 state == AMD_CG_STATE_GATE ? true : false);
866 soc15_update_drm_clock_gating(adev,
867 state == AMD_CG_STATE_GATE ? true : false);
868 soc15_update_drm_light_sleep(adev,
869 state == AMD_CG_STATE_GATE ? true : false);
870 soc15_update_rom_medium_grain_clock_gating(adev,
871 state == AMD_CG_STATE_GATE ? true : false);
872 break;
828 default: 873 default:
829 break; 874 break;
830 } 875 }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 378a46da585a..acb3cdb119f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -25,6 +25,7 @@
25#define __SOC15_H__ 25#define __SOC15_H__
26 26
27#include "nbio_v6_1.h" 27#include "nbio_v6_1.h"
28#include "nbio_v7_0.h"
28 29
29extern const struct amd_ip_funcs soc15_common_ip_funcs; 30extern const struct amd_ip_funcs soc15_common_ip_funcs;
30 31
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index e8df6d820dbe..e2d330eed952 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -63,6 +63,13 @@ struct nbio_pcie_index_data {
63 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ 63 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
64 (ip##_BASE__INST##inst##_SEG4 + reg)))))) 64 (ip##_BASE__INST##inst##_SEG4 + reg))))))
65 65
66#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \
67 RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
68 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
69 (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
70 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
71 (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset)
72
66#define WREG32_SOC15(ip, inst, reg, value) \ 73#define WREG32_SOC15(ip, inst, reg, value) \
67 WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ 74 WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
68 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ 75 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
@@ -70,6 +77,13 @@ struct nbio_pcie_index_data {
70 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ 77 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
71 (ip##_BASE__INST##inst##_SEG4 + reg))))), value) 78 (ip##_BASE__INST##inst##_SEG4 + reg))))), value)
72 79
80#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
81 WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \
82 (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \
83 (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \
84 (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \
85 (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value)
86
73#endif 87#endif
74 88
75 89
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 75403c7c8c9e..e79befd80eed 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -132,6 +132,7 @@
132 * 1 - pfp 132 * 1 - pfp
133 */ 133 */
134#define PACKET3_INDIRECT_BUFFER 0x3F 134#define PACKET3_INDIRECT_BUFFER 0x3F
135#define INDIRECT_BUFFER_VALID (1 << 23)
135#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) 136#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
136 /* 0 - LRU 137 /* 0 - LRU
137 * 1 - Stream 138 * 1 - Stream
@@ -259,8 +260,97 @@
259#define PACKET3_WAIT_ON_CE_COUNTER 0x86 260#define PACKET3_WAIT_ON_CE_COUNTER 0x86
260#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 261#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
261#define PACKET3_SWITCH_BUFFER 0x8B 262#define PACKET3_SWITCH_BUFFER 0x8B
263#define PACKET3_FRAME_CONTROL 0x90
264# define FRAME_CMD(x) ((x) << 28)
265 /*
266 * x=0: tmz_begin
267 * x=1: tmz_end
268 */
269
262#define PACKET3_SET_RESOURCES 0xA0 270#define PACKET3_SET_RESOURCES 0xA0
271/* 1. header
272 * 2. CONTROL
273 * 3. QUEUE_MASK_LO [31:0]
274 * 4. QUEUE_MASK_HI [31:0]
275 * 5. GWS_MASK_LO [31:0]
276 * 6. GWS_MASK_HI [31:0]
277 * 7. OAC_MASK [15:0]
278 * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
279 */
280# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
281# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
282# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
263#define PACKET3_MAP_QUEUES 0xA2 283#define PACKET3_MAP_QUEUES 0xA2
284/* 1. header
285 * 2. CONTROL
286 * 3. CONTROL2
287 * 4. MQD_ADDR_LO [31:0]
288 * 5. MQD_ADDR_HI [31:0]
289 * 6. WPTR_ADDR_LO [31:0]
290 * 7. WPTR_ADDR_HI [31:0]
291 */
292/* CONTROL */
293# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
294# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
295# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
296# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
297# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
298# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
299# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
300# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
301# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
302/* CONTROL2 */
303# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
304# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
305#define PACKET3_UNMAP_QUEUES 0xA3
306/* 1. header
307 * 2. CONTROL
308 * 3. CONTROL2
309 * 4. CONTROL3
310 * 5. CONTROL4
311 * 6. CONTROL5
312 */
313/* CONTROL */
314# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
315 /* 0 - PREEMPT_QUEUES
316 * 1 - RESET_QUEUES
317 * 2 - DISABLE_PROCESS_QUEUES
318 * 3 - PREEMPT_QUEUES_NO_UNMAP
319 */
320# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
321# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
322# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
323/* CONTROL2a */
324# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
325/* CONTROL2b */
326# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
327/* CONTROL3a */
328# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
329/* CONTROL3b */
330# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
331/* CONTROL4 */
332# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
333/* CONTROL5 */
334# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
335#define PACKET3_QUERY_STATUS 0xA4
336/* 1. header
337 * 2. CONTROL
338 * 3. CONTROL2
339 * 4. ADDR_LO [31:0]
340 * 5. ADDR_HI [31:0]
341 * 6. DATA_LO [31:0]
342 * 7. DATA_HI [31:0]
343 */
344/* CONTROL */
345# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
346# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
347# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
348/* CONTROL2a */
349# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
350/* CONTROL2b */
351# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
352# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
353
264 354
265#define VCE_CMD_NO_OP 0x00000000 355#define VCE_CMD_NO_OP 0x00000000
266#define VCE_CMD_END 0x00000001 356#define VCE_CMD_END 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 3a5097ac2bb4..923df2c0e535 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "vid.h" 26#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index eca8f6e01e97..987b958368ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
58{ 58{
59 struct amdgpu_device *adev = ring->adev; 59 struct amdgpu_device *adev = ring->adev;
60 60
61 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); 61 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
62} 62}
63 63
64/** 64/**
@@ -73,9 +73,9 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
73 struct amdgpu_device *adev = ring->adev; 73 struct amdgpu_device *adev = ring->adev;
74 74
75 if (ring == &adev->uvd.ring_enc[0]) 75 if (ring == &adev->uvd.ring_enc[0])
76 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR)); 76 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
77 else 77 else
78 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2)); 78 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
79} 79}
80 80
81/** 81/**
@@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
89{ 89{
90 struct amdgpu_device *adev = ring->adev; 90 struct amdgpu_device *adev = ring->adev;
91 91
92 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR)); 92 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
93} 93}
94 94
95/** 95/**
@@ -107,9 +107,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
107 return adev->wb.wb[ring->wptr_offs]; 107 return adev->wb.wb[ring->wptr_offs];
108 108
109 if (ring == &adev->uvd.ring_enc[0]) 109 if (ring == &adev->uvd.ring_enc[0])
110 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); 110 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
111 else 111 else
112 return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2)); 112 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
113} 113}
114 114
115/** 115/**
@@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
123{ 123{
124 struct amdgpu_device *adev = ring->adev; 124 struct amdgpu_device *adev = ring->adev;
125 125
126 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), lower_32_bits(ring->wptr)); 126 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
127} 127}
128 128
129/** 129/**
@@ -145,10 +145,10 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
145 } 145 }
146 146
147 if (ring == &adev->uvd.ring_enc[0]) 147 if (ring == &adev->uvd.ring_enc[0])
148 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), 148 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
149 lower_32_bits(ring->wptr)); 149 lower_32_bits(ring->wptr));
150 else 150 else
151 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), 151 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
152 lower_32_bits(ring->wptr)); 152 lower_32_bits(ring->wptr));
153} 153}
154 154
@@ -562,7 +562,13 @@ static int uvd_v7_0_hw_fini(void *handle)
562 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 562 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 struct amdgpu_ring *ring = &adev->uvd.ring; 563 struct amdgpu_ring *ring = &adev->uvd.ring;
564 564
565 uvd_v7_0_stop(adev); 565 if (!amdgpu_sriov_vf(adev))
566 uvd_v7_0_stop(adev);
567 else {
568 /* full access mode, so don't touch any UVD register */
569 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
570 }
571
566 ring->ready = false; 572 ring->ready = false;
567 573
568 return 0; 574 return 0;
@@ -611,46 +617,46 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
611 uint32_t offset; 617 uint32_t offset;
612 618
613 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 619 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
614 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 620 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
615 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 621 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
616 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 622 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
617 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 623 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
618 offset = 0; 624 offset = 0;
619 } else { 625 } else {
620 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 626 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
621 lower_32_bits(adev->uvd.gpu_addr)); 627 lower_32_bits(adev->uvd.gpu_addr));
622 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 628 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
623 upper_32_bits(adev->uvd.gpu_addr)); 629 upper_32_bits(adev->uvd.gpu_addr));
624 offset = size; 630 offset = size;
625 } 631 }
626 632
627 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 633 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
628 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 634 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
629 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); 635 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
630 636
631 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 637 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
632 lower_32_bits(adev->uvd.gpu_addr + offset)); 638 lower_32_bits(adev->uvd.gpu_addr + offset));
633 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 639 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
634 upper_32_bits(adev->uvd.gpu_addr + offset)); 640 upper_32_bits(adev->uvd.gpu_addr + offset));
635 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); 641 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
636 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); 642 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
637 643
638 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), 644 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
639 lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 645 lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
640 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), 646 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
641 upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 647 upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
642 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); 648 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
643 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), 649 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
644 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); 650 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
645 651
646 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), 652 WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
647 adev->gfx.config.gb_addr_config); 653 adev->gfx.config.gb_addr_config);
648 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), 654 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
649 adev->gfx.config.gb_addr_config); 655 adev->gfx.config.gb_addr_config);
650 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), 656 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
651 adev->gfx.config.gb_addr_config); 657 adev->gfx.config.gb_addr_config);
652 658
653 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); 659 WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
654} 660}
655 661
656static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, 662static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -664,29 +670,29 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
664 size = header->header_size + header->vce_table_size + header->uvd_table_size; 670 size = header->header_size + header->vce_table_size + header->uvd_table_size;
665 671
666 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ 672 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
667 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); 673 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
668 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); 674 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
669 675
670 /* 2, update vmid of descriptor */ 676 /* 2, update vmid of descriptor */
671 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); 677 data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID);
672 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; 678 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
673 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ 679 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
674 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); 680 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data);
675 681
676 /* 3, notify mmsch about the size of this descriptor */ 682 /* 3, notify mmsch about the size of this descriptor */
677 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); 683 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size);
678 684
679 /* 4, set resp to zero */ 685 /* 4, set resp to zero */
680 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); 686 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
681 687
682 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 688 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
683 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); 689 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
684 690
685 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 691 data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
686 loop = 1000; 692 loop = 1000;
687 while ((data & 0x10000002) != 0x10000002) { 693 while ((data & 0x10000002) != 0x10000002) {
688 udelay(10); 694 udelay(10);
689 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); 695 data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP);
690 loop--; 696 loop--;
691 if (!loop) 697 if (!loop)
692 break; 698 break;
@@ -696,6 +702,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
696 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 702 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
697 return -EBUSY; 703 return -EBUSY;
698 } 704 }
705 WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0);
699 706
700 return 0; 707 return 0;
701} 708}
@@ -928,7 +935,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
928 mdelay(1); 935 mdelay(1);
929 936
930 /* put LMI, VCPU, RBC etc... into reset */ 937 /* put LMI, VCPU, RBC etc... into reset */
931 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 938 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
932 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | 939 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
933 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | 940 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
934 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | 941 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
@@ -940,7 +947,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
940 mdelay(5); 947 mdelay(5);
941 948
942 /* initialize UVD memory controller */ 949 /* initialize UVD memory controller */
943 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), 950 WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
944 (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 951 (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
945 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 952 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
946 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 953 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
@@ -953,23 +960,23 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
953 lmi_swap_cntl = 0xa; 960 lmi_swap_cntl = 0xa;
954 mp_swap_cntl = 0; 961 mp_swap_cntl = 0;
955#endif 962#endif
956 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), lmi_swap_cntl); 963 WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
957 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), mp_swap_cntl); 964 WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
958 965
959 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); 966 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
960 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); 967 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
961 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); 968 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
962 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); 969 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
963 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); 970 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
964 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); 971 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
965 972
966 /* take all subblocks out of reset, except VCPU */ 973 /* take all subblocks out of reset, except VCPU */
967 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 974 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
968 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); 975 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
969 mdelay(5); 976 mdelay(5);
970 977
971 /* enable VCPU clock */ 978 /* enable VCPU clock */
972 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 979 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
973 UVD_VCPU_CNTL__CLK_EN_MASK); 980 UVD_VCPU_CNTL__CLK_EN_MASK);
974 981
975 /* enable UMC */ 982 /* enable UMC */
@@ -977,14 +984,14 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
977 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 984 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
978 985
979 /* boot up the VCPU */ 986 /* boot up the VCPU */
980 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); 987 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
981 mdelay(10); 988 mdelay(10);
982 989
983 for (i = 0; i < 10; ++i) { 990 for (i = 0; i < 10; ++i) {
984 uint32_t status; 991 uint32_t status;
985 992
986 for (j = 0; j < 100; ++j) { 993 for (j = 0; j < 100; ++j) {
987 status = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS)); 994 status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
988 if (status & 2) 995 if (status & 2)
989 break; 996 break;
990 mdelay(10); 997 mdelay(10);
@@ -1025,44 +1032,44 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
1025 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); 1032 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
1026 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); 1033 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1027 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); 1034 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1028 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); 1035 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
1029 1036
1030 /* set the write pointer delay */ 1037 /* set the write pointer delay */
1031 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); 1038 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
1032 1039
1033 /* set the wb address */ 1040 /* set the wb address */
1034 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), 1041 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
1035 (upper_32_bits(ring->gpu_addr) >> 2)); 1042 (upper_32_bits(ring->gpu_addr) >> 2));
1036 1043
1037 /* programm the RB_BASE for ring buffer */ 1044 /* programm the RB_BASE for ring buffer */
1038 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), 1045 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1039 lower_32_bits(ring->gpu_addr)); 1046 lower_32_bits(ring->gpu_addr));
1040 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), 1047 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1041 upper_32_bits(ring->gpu_addr)); 1048 upper_32_bits(ring->gpu_addr));
1042 1049
1043 /* Initialize the ring buffer's read and write pointers */ 1050 /* Initialize the ring buffer's read and write pointers */
1044 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR), 0); 1051 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
1045 1052
1046 ring->wptr = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); 1053 ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
1047 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), 1054 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1048 lower_32_bits(ring->wptr)); 1055 lower_32_bits(ring->wptr));
1049 1056
1050 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, 1057 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
1051 ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); 1058 ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
1052 1059
1053 ring = &adev->uvd.ring_enc[0]; 1060 ring = &adev->uvd.ring_enc[0];
1054 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR), lower_32_bits(ring->wptr)); 1061 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1055 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); 1062 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1056 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); 1063 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
1057 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); 1064 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1058 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); 1065 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
1059 1066
1060 ring = &adev->uvd.ring_enc[1]; 1067 ring = &adev->uvd.ring_enc[1];
1061 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2), lower_32_bits(ring->wptr)); 1068 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1062 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), lower_32_bits(ring->wptr)); 1069 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1063 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); 1070 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1064 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); 1071 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1065 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); 1072 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
1066 1073
1067 return 0; 1074 return 0;
1068} 1075}
@@ -1077,7 +1084,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
1077static void uvd_v7_0_stop(struct amdgpu_device *adev) 1084static void uvd_v7_0_stop(struct amdgpu_device *adev)
1078{ 1085{
1079 /* force RBC into idle state */ 1086 /* force RBC into idle state */
1080 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0x11010101); 1087 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
1081 1088
1082 /* Stall UMC and register bus before resetting VCPU */ 1089 /* Stall UMC and register bus before resetting VCPU */
1083 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 1090 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
@@ -1086,12 +1093,12 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev)
1086 mdelay(1); 1093 mdelay(1);
1087 1094
1088 /* put VCPU into reset */ 1095 /* put VCPU into reset */
1089 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 1096 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
1090 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); 1097 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1091 mdelay(5); 1098 mdelay(5);
1092 1099
1093 /* disable VCPU clock */ 1100 /* disable VCPU clock */
1094 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0x0); 1101 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
1095 1102
1096 /* Unstall UMC and register bus */ 1103 /* Unstall UMC and register bus */
1097 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, 1104 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
@@ -1196,7 +1203,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1196 unsigned i; 1203 unsigned i;
1197 int r; 1204 int r;
1198 1205
1199 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); 1206 WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
1200 r = amdgpu_ring_alloc(ring, 3); 1207 r = amdgpu_ring_alloc(ring, 3);
1201 if (r) { 1208 if (r) {
1202 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 1209 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@@ -1208,7 +1215,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1208 amdgpu_ring_write(ring, 0xDEADBEEF); 1215 amdgpu_ring_write(ring, 0xDEADBEEF);
1209 amdgpu_ring_commit(ring); 1216 amdgpu_ring_commit(ring);
1210 for (i = 0; i < adev->usec_timeout; i++) { 1217 for (i = 0; i < adev->usec_timeout; i++) {
1211 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); 1218 tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
1212 if (tmp == 0xDEADBEEF) 1219 if (tmp == 0xDEADBEEF)
1213 break; 1220 break;
1214 DRM_UDELAY(1); 1221 DRM_UDELAY(1);
@@ -1309,9 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1309 uint32_t data0, data1, mask; 1316 uint32_t data0, data1, mask;
1310 unsigned eng = ring->vm_inv_eng; 1317 unsigned eng = ring->vm_inv_eng;
1311 1318
1312 pd_addr = pd_addr | 0x1; /* valid bit */ 1319 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
1313 /* now only use physical base address of PDE and valid */ 1320 pd_addr |= AMDGPU_PTE_VALID;
1314 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
1315 1321
1316 data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; 1322 data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
1317 data1 = upper_32_bits(pd_addr); 1323 data1 = upper_32_bits(pd_addr);
@@ -1350,9 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1350 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 1356 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
1351 unsigned eng = ring->vm_inv_eng; 1357 unsigned eng = ring->vm_inv_eng;
1352 1358
1353 pd_addr = pd_addr | 0x1; /* valid bit */ 1359 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
1354 /* now only use physical base address of PDE and valid */ 1360 pd_addr |= AMDGPU_PTE_VALID;
1355 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
1356 1361
1357 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1362 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
1358 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 1363 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
@@ -1408,8 +1413,8 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
1408 1413
1409 if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || 1414 if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
1410 REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || 1415 REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
1411 (RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS) & 1416 (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
1412 AMDGPU_UVD_STATUS_BUSY_MASK))) 1417 AMDGPU_UVD_STATUS_BUSY_MASK))
1413 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 1418 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
1414 SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); 1419 SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
1415 1420
@@ -1516,9 +1521,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
1516{ 1521{
1517 uint32_t data, data1, data2, suvd_flags; 1522 uint32_t data, data1, data2, suvd_flags;
1518 1523
1519 data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL)); 1524 data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
1520 data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); 1525 data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
1521 data2 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL)); 1526 data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
1522 1527
1523 data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | 1528 data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
1524 UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); 1529 UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1562,18 +1567,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
1562 UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); 1567 UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
1563 data1 |= suvd_flags; 1568 data1 |= suvd_flags;
1564 1569
1565 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), data); 1570 WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
1566 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), 0); 1571 WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
1567 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); 1572 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
1568 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL), data2); 1573 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
1569} 1574}
1570 1575
1571static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) 1576static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
1572{ 1577{
1573 uint32_t data, data1, cgc_flags, suvd_flags; 1578 uint32_t data, data1, cgc_flags, suvd_flags;
1574 1579
1575 data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE)); 1580 data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
1576 data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); 1581 data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
1577 1582
1578 cgc_flags = UVD_CGC_GATE__SYS_MASK | 1583 cgc_flags = UVD_CGC_GATE__SYS_MASK |
1579 UVD_CGC_GATE__UDEC_MASK | 1584 UVD_CGC_GATE__UDEC_MASK |
@@ -1605,8 +1610,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
1605 data |= cgc_flags; 1610 data |= cgc_flags;
1606 data1 |= suvd_flags; 1611 data1 |= suvd_flags;
1607 1612
1608 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), data); 1613 WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
1609 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); 1614 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
1610} 1615}
1611 1616
1612static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 1617static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1665,7 +1670,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
1665 if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) 1670 if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
1666 return 0; 1671 return 0;
1667 1672
1668 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_PG_EN_MASK); 1673 WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
1669 1674
1670 if (state == AMD_PG_STATE_GATE) { 1675 if (state == AMD_PG_STATE_GATE) {
1671 uvd_v7_0_stop(adev); 1676 uvd_v7_0_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index fb0819359909..90332f55cfba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle,
77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) 77static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78{ 78{
79 struct amdgpu_device *adev = ring->adev; 79 struct amdgpu_device *adev = ring->adev;
80 u32 v;
81
82 mutex_lock(&adev->grbm_idx_mutex);
83 if (adev->vce.harvest_config == 0 ||
84 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
85 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
86 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
87 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
80 88
81 if (ring == &adev->vce.ring[0]) 89 if (ring == &adev->vce.ring[0])
82 return RREG32(mmVCE_RB_RPTR); 90 v = RREG32(mmVCE_RB_RPTR);
83 else if (ring == &adev->vce.ring[1]) 91 else if (ring == &adev->vce.ring[1])
84 return RREG32(mmVCE_RB_RPTR2); 92 v = RREG32(mmVCE_RB_RPTR2);
85 else 93 else
86 return RREG32(mmVCE_RB_RPTR3); 94 v = RREG32(mmVCE_RB_RPTR3);
95
96 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
97 mutex_unlock(&adev->grbm_idx_mutex);
98
99 return v;
87} 100}
88 101
89/** 102/**
@@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
96static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) 109static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
97{ 110{
98 struct amdgpu_device *adev = ring->adev; 111 struct amdgpu_device *adev = ring->adev;
112 u32 v;
113
114 mutex_lock(&adev->grbm_idx_mutex);
115 if (adev->vce.harvest_config == 0 ||
116 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
117 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
118 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
119 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
99 120
100 if (ring == &adev->vce.ring[0]) 121 if (ring == &adev->vce.ring[0])
101 return RREG32(mmVCE_RB_WPTR); 122 v = RREG32(mmVCE_RB_WPTR);
102 else if (ring == &adev->vce.ring[1]) 123 else if (ring == &adev->vce.ring[1])
103 return RREG32(mmVCE_RB_WPTR2); 124 v = RREG32(mmVCE_RB_WPTR2);
104 else 125 else
105 return RREG32(mmVCE_RB_WPTR3); 126 v = RREG32(mmVCE_RB_WPTR3);
127
128 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
129 mutex_unlock(&adev->grbm_idx_mutex);
130
131 return v;
106} 132}
107 133
108/** 134/**
@@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
116{ 142{
117 struct amdgpu_device *adev = ring->adev; 143 struct amdgpu_device *adev = ring->adev;
118 144
145 mutex_lock(&adev->grbm_idx_mutex);
146 if (adev->vce.harvest_config == 0 ||
147 adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
148 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
149 else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
150 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
151
119 if (ring == &adev->vce.ring[0]) 152 if (ring == &adev->vce.ring[0])
120 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); 153 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
121 else if (ring == &adev->vce.ring[1]) 154 else if (ring == &adev->vce.ring[1])
122 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); 155 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
123 else 156 else
124 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); 157 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
158
159 WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
160 mutex_unlock(&adev->grbm_idx_mutex);
125} 161}
126 162
127static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) 163static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
@@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
231 struct amdgpu_ring *ring; 267 struct amdgpu_ring *ring;
232 int idx, r; 268 int idx, r;
233 269
234 ring = &adev->vce.ring[0];
235 WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
236 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
237 WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
238 WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
239 WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
240
241 ring = &adev->vce.ring[1];
242 WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
243 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
244 WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
245 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
246 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
247
248 ring = &adev->vce.ring[2];
249 WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
250 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
251 WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
252 WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
253 WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
254
255 mutex_lock(&adev->grbm_idx_mutex); 270 mutex_lock(&adev->grbm_idx_mutex);
256 for (idx = 0; idx < 2; ++idx) { 271 for (idx = 0; idx < 2; ++idx) {
257 if (adev->vce.harvest_config & (1 << idx)) 272 if (adev->vce.harvest_config & (1 << idx))
258 continue; 273 continue;
259 274
260 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); 275 WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
276
277 /* Program instance 0 reg space for two instances or instance 0 case
278 program instance 1 reg space for only instance 1 available case */
279 if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
280 ring = &adev->vce.ring[0];
281 WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
282 WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
283 WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
284 WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
285 WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
286
287 ring = &adev->vce.ring[1];
288 WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
289 WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
290 WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
291 WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
292 WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
293
294 ring = &adev->vce.ring[2];
295 WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
296 WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
297 WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
298 WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
299 WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
300 }
301
261 vce_v3_0_mc_resume(adev, idx); 302 vce_v3_0_mc_resume(adev, idx);
262 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); 303 WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
263 304
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 139f964196b4..1ecd6bb90c1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -190,6 +190,7 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); 190 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191 return -EBUSY; 191 return -EBUSY;
192 } 192 }
193 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
193 194
194 return 0; 195 return 0;
195} 196}
@@ -418,15 +419,19 @@ static int vce_v4_0_sw_init(void *handle)
418 419
419 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 420 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
420 const struct common_firmware_header *hdr; 421 const struct common_firmware_header *hdr;
422 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
423
424 adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
425 if (!adev->vce.saved_bo)
426 return -ENOMEM;
427
421 hdr = (const struct common_firmware_header *)adev->vce.fw->data; 428 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
422 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; 429 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
423 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; 430 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
424 adev->firmware.fw_size += 431 adev->firmware.fw_size +=
425 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); 432 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
426 DRM_INFO("PSP loading VCE firmware\n"); 433 DRM_INFO("PSP loading VCE firmware\n");
427 } 434 } else {
428
429 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
430 r = amdgpu_vce_resume(adev); 435 r = amdgpu_vce_resume(adev);
431 if (r) 436 if (r)
432 return r; 437 return r;
@@ -465,6 +470,11 @@ static int vce_v4_0_sw_fini(void *handle)
465 /* free MM table */ 470 /* free MM table */
466 amdgpu_virt_free_mm_table(adev); 471 amdgpu_virt_free_mm_table(adev);
467 472
473 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
474 kfree(adev->vce.saved_bo);
475 adev->vce.saved_bo = NULL;
476 }
477
468 r = amdgpu_vce_suspend(adev); 478 r = amdgpu_vce_suspend(adev);
469 if (r) 479 if (r)
470 return r; 480 return r;
@@ -505,8 +515,14 @@ static int vce_v4_0_hw_fini(void *handle)
505 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 515 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 int i; 516 int i;
507 517
508 /* vce_v4_0_wait_for_idle(handle); */ 518 if (!amdgpu_sriov_vf(adev)) {
509 vce_v4_0_stop(adev); 519 /* vce_v4_0_wait_for_idle(handle); */
520 vce_v4_0_stop(adev);
521 } else {
522 /* full access mode, so don't touch any VCE register */
523 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
524 }
525
510 for (i = 0; i < adev->vce.num_rings; i++) 526 for (i = 0; i < adev->vce.num_rings; i++)
511 adev->vce.ring[i].ready = false; 527 adev->vce.ring[i].ready = false;
512 528
@@ -515,8 +531,18 @@ static int vce_v4_0_hw_fini(void *handle)
515 531
516static int vce_v4_0_suspend(void *handle) 532static int vce_v4_0_suspend(void *handle)
517{ 533{
518 int r;
519 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 534 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
535 int r;
536
537 if (adev->vce.vcpu_bo == NULL)
538 return 0;
539
540 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
541 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
542 void *ptr = adev->vce.cpu_addr;
543
544 memcpy_fromio(adev->vce.saved_bo, ptr, size);
545 }
520 546
521 r = vce_v4_0_hw_fini(adev); 547 r = vce_v4_0_hw_fini(adev);
522 if (r) 548 if (r)
@@ -527,12 +553,22 @@ static int vce_v4_0_suspend(void *handle)
527 553
528static int vce_v4_0_resume(void *handle) 554static int vce_v4_0_resume(void *handle)
529{ 555{
530 int r;
531 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 556 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
557 int r;
532 558
533 r = amdgpu_vce_resume(adev); 559 if (adev->vce.vcpu_bo == NULL)
534 if (r) 560 return -EINVAL;
535 return r; 561
562 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
563 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
564 void *ptr = adev->vce.cpu_addr;
565
566 memcpy_toio(ptr, adev->vce.saved_bo, size);
567 } else {
568 r = amdgpu_vce_resume(adev);
569 if (r)
570 return r;
571 }
536 572
537 return vce_v4_0_hw_init(adev); 573 return vce_v4_0_hw_init(adev);
538} 574}
@@ -919,9 +955,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
919 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); 955 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
920 unsigned eng = ring->vm_inv_eng; 956 unsigned eng = ring->vm_inv_eng;
921 957
922 pd_addr = pd_addr | 0x1; /* valid bit */ 958 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
923 /* now only use physical base address of PDE and valid */ 959 pd_addr |= AMDGPU_PTE_VALID;
924 BUG_ON(pd_addr & 0xFFFF00000000003EULL);
925 960
926 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 961 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
927 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); 962 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
new file mode 100644
index 000000000000..21e7b88401e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -0,0 +1,1189 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/firmware.h>
25#include <drm/drmP.h>
26#include "amdgpu.h"
27#include "amdgpu_vcn.h"
28#include "soc15d.h"
29#include "soc15_common.h"
30
31#include "vega10/soc15ip.h"
32#include "raven1/VCN/vcn_1_0_offset.h"
33#include "raven1/VCN/vcn_1_0_sh_mask.h"
34#include "vega10/HDP/hdp_4_0_offset.h"
35#include "raven1/MMHUB/mmhub_9_1_offset.h"
36#include "raven1/MMHUB/mmhub_9_1_sh_mask.h"
37
38static int vcn_v1_0_start(struct amdgpu_device *adev);
39static int vcn_v1_0_stop(struct amdgpu_device *adev);
40static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
41static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
42static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
43
44/**
45 * vcn_v1_0_early_init - set function pointers
46 *
47 * @handle: amdgpu_device pointer
48 *
49 * Set ring and irq function pointers
50 */
51static int vcn_v1_0_early_init(void *handle)
52{
53 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
54
55 adev->vcn.num_enc_rings = 2;
56
57 vcn_v1_0_set_dec_ring_funcs(adev);
58 vcn_v1_0_set_enc_ring_funcs(adev);
59 vcn_v1_0_set_irq_funcs(adev);
60
61 return 0;
62}
63
64/**
65 * vcn_v1_0_sw_init - sw init for VCN block
66 *
67 * @handle: amdgpu_device pointer
68 *
69 * Load firmware and sw initialization
70 */
71static int vcn_v1_0_sw_init(void *handle)
72{
73 struct amdgpu_ring *ring;
74 int i, r;
75 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
76
77 /* VCN DEC TRAP */
78 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
79 if (r)
80 return r;
81
82 /* VCN ENC TRAP */
83 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
84 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119,
85 &adev->vcn.irq);
86 if (r)
87 return r;
88 }
89
90 r = amdgpu_vcn_sw_init(adev);
91 if (r)
92 return r;
93
94 r = amdgpu_vcn_resume(adev);
95 if (r)
96 return r;
97
98 ring = &adev->vcn.ring_dec;
99 sprintf(ring->name, "vcn_dec");
100 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
101 if (r)
102 return r;
103
104 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
105 ring = &adev->vcn.ring_enc[i];
106 sprintf(ring->name, "vcn_enc%d", i);
107 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
108 if (r)
109 return r;
110 }
111
112 return r;
113}
114
115/**
116 * vcn_v1_0_sw_fini - sw fini for VCN block
117 *
118 * @handle: amdgpu_device pointer
119 *
120 * VCN suspend and free up sw allocation
121 */
122static int vcn_v1_0_sw_fini(void *handle)
123{
124 int r;
125 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
126
127 r = amdgpu_vcn_suspend(adev);
128 if (r)
129 return r;
130
131 r = amdgpu_vcn_sw_fini(adev);
132
133 return r;
134}
135
136/**
137 * vcn_v1_0_hw_init - start and test VCN block
138 *
139 * @handle: amdgpu_device pointer
140 *
141 * Initialize the hardware, boot up the VCPU and do some testing
142 */
143static int vcn_v1_0_hw_init(void *handle)
144{
145 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
146 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
147 int i, r;
148
149 r = vcn_v1_0_start(adev);
150 if (r)
151 goto done;
152
153 ring->ready = true;
154 r = amdgpu_ring_test_ring(ring);
155 if (r) {
156 ring->ready = false;
157 goto done;
158 }
159
160 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
161 ring = &adev->vcn.ring_enc[i];
162 ring->ready = true;
163 r = amdgpu_ring_test_ring(ring);
164 if (r) {
165 ring->ready = false;
166 goto done;
167 }
168 }
169
170done:
171 if (!r)
172 DRM_INFO("VCN decode and encode initialized successfully.\n");
173
174 return r;
175}
176
177/**
178 * vcn_v1_0_hw_fini - stop the hardware block
179 *
180 * @handle: amdgpu_device pointer
181 *
182 * Stop the VCN block, mark ring as not ready any more
183 */
184static int vcn_v1_0_hw_fini(void *handle)
185{
186 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
187 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
188 int r;
189
190 r = vcn_v1_0_stop(adev);
191 if (r)
192 return r;
193
194 ring->ready = false;
195
196 return 0;
197}
198
199/**
200 * vcn_v1_0_suspend - suspend VCN block
201 *
202 * @handle: amdgpu_device pointer
203 *
204 * HW fini and suspend VCN block
205 */
206static int vcn_v1_0_suspend(void *handle)
207{
208 int r;
209 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
210
211 r = vcn_v1_0_hw_fini(adev);
212 if (r)
213 return r;
214
215 r = amdgpu_vcn_suspend(adev);
216
217 return r;
218}
219
220/**
221 * vcn_v1_0_resume - resume VCN block
222 *
223 * @handle: amdgpu_device pointer
224 *
225 * Resume firmware and hw init VCN block
226 */
227static int vcn_v1_0_resume(void *handle)
228{
229 int r;
230 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
231
232 r = amdgpu_vcn_resume(adev);
233 if (r)
234 return r;
235
236 r = vcn_v1_0_hw_init(adev);
237
238 return r;
239}
240
241/**
242 * vcn_v1_0_mc_resume - memory controller programming
243 *
244 * @adev: amdgpu_device pointer
245 *
246 * Let the VCN memory controller know it's offsets
247 */
248static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
249{
250 uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
251
252 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
253 lower_32_bits(adev->vcn.gpu_addr));
254 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
255 upper_32_bits(adev->vcn.gpu_addr));
256 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
257 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
258 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
259
260 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
261 lower_32_bits(adev->vcn.gpu_addr + size));
262 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
263 upper_32_bits(adev->vcn.gpu_addr + size));
264 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
265 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE);
266
267 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
268 lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
269 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
270 upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
271 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
272 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
273 AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40));
274
275 WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
276 adev->gfx.config.gb_addr_config);
277 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
278 adev->gfx.config.gb_addr_config);
279 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
280 adev->gfx.config.gb_addr_config);
281}
282
283/**
284 * vcn_v1_0_disable_clock_gating - disable VCN clock gating
285 *
286 * @adev: amdgpu_device pointer
287 * @sw: enable SW clock gating
288 *
289 * Disable clock gating for VCN block
290 */
291static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
292{
293 uint32_t data;
294
295 /* JPEG disable CGC */
296 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
297
298 if (sw)
299 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
300 else
301 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
302
303 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
304 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
305 WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data);
306
307 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
308 data &= ~(JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK);
309 WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data);
310
311 /* UVD disable CGC */
312 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
313 if (sw)
314 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
315 else
316 data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
317
318 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
319 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
320 WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
321
322 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
323 data &= ~(UVD_CGC_GATE__SYS_MASK
324 | UVD_CGC_GATE__UDEC_MASK
325 | UVD_CGC_GATE__MPEG2_MASK
326 | UVD_CGC_GATE__REGS_MASK
327 | UVD_CGC_GATE__RBC_MASK
328 | UVD_CGC_GATE__LMI_MC_MASK
329 | UVD_CGC_GATE__LMI_UMC_MASK
330 | UVD_CGC_GATE__IDCT_MASK
331 | UVD_CGC_GATE__MPRD_MASK
332 | UVD_CGC_GATE__MPC_MASK
333 | UVD_CGC_GATE__LBSI_MASK
334 | UVD_CGC_GATE__LRBBM_MASK
335 | UVD_CGC_GATE__UDEC_RE_MASK
336 | UVD_CGC_GATE__UDEC_CM_MASK
337 | UVD_CGC_GATE__UDEC_IT_MASK
338 | UVD_CGC_GATE__UDEC_DB_MASK
339 | UVD_CGC_GATE__UDEC_MP_MASK
340 | UVD_CGC_GATE__WCB_MASK
341 | UVD_CGC_GATE__VCPU_MASK
342 | UVD_CGC_GATE__SCPU_MASK);
343 WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
344
345 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
346 data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
347 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
348 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
349 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
350 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
351 | UVD_CGC_CTRL__SYS_MODE_MASK
352 | UVD_CGC_CTRL__UDEC_MODE_MASK
353 | UVD_CGC_CTRL__MPEG2_MODE_MASK
354 | UVD_CGC_CTRL__REGS_MODE_MASK
355 | UVD_CGC_CTRL__RBC_MODE_MASK
356 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
357 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
358 | UVD_CGC_CTRL__IDCT_MODE_MASK
359 | UVD_CGC_CTRL__MPRD_MODE_MASK
360 | UVD_CGC_CTRL__MPC_MODE_MASK
361 | UVD_CGC_CTRL__LBSI_MODE_MASK
362 | UVD_CGC_CTRL__LRBBM_MODE_MASK
363 | UVD_CGC_CTRL__WCB_MODE_MASK
364 | UVD_CGC_CTRL__VCPU_MODE_MASK
365 | UVD_CGC_CTRL__SCPU_MODE_MASK);
366 WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
367
368 /* turn on */
369 data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
370 data |= (UVD_SUVD_CGC_GATE__SRE_MASK
371 | UVD_SUVD_CGC_GATE__SIT_MASK
372 | UVD_SUVD_CGC_GATE__SMP_MASK
373 | UVD_SUVD_CGC_GATE__SCM_MASK
374 | UVD_SUVD_CGC_GATE__SDB_MASK
375 | UVD_SUVD_CGC_GATE__SRE_H264_MASK
376 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
377 | UVD_SUVD_CGC_GATE__SIT_H264_MASK
378 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
379 | UVD_SUVD_CGC_GATE__SCM_H264_MASK
380 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
381 | UVD_SUVD_CGC_GATE__SDB_H264_MASK
382 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
383 | UVD_SUVD_CGC_GATE__SCLR_MASK
384 | UVD_SUVD_CGC_GATE__UVD_SC_MASK
385 | UVD_SUVD_CGC_GATE__ENT_MASK
386 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
387 | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
388 | UVD_SUVD_CGC_GATE__SITE_MASK
389 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
390 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
391 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
392 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
393 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
394 WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
395
396 data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
397 data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
398 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
399 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
400 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
401 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
402 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
403 | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
404 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
405 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
406 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
407 WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
408}
409
410/**
411 * vcn_v1_0_enable_clock_gating - enable VCN clock gating
412 *
413 * @adev: amdgpu_device pointer
414 * @sw: enable SW clock gating
415 *
416 * Enable clock gating for VCN block
417 */
418static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
419{
420 uint32_t data = 0;
421
422 /* enable JPEG CGC */
423 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
424 if (sw)
425 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
426 else
427 data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
428 data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
429 data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
430 WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data);
431
432 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
433 data |= (JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK);
434 WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data);
435
436 /* enable UVD CGC */
437 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
438 if (sw)
439 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
440 else
441 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
442 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
443 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
444 WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
445
446 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
447 data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
448 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
449 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
450 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
451 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
452 | UVD_CGC_CTRL__SYS_MODE_MASK
453 | UVD_CGC_CTRL__UDEC_MODE_MASK
454 | UVD_CGC_CTRL__MPEG2_MODE_MASK
455 | UVD_CGC_CTRL__REGS_MODE_MASK
456 | UVD_CGC_CTRL__RBC_MODE_MASK
457 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
458 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
459 | UVD_CGC_CTRL__IDCT_MODE_MASK
460 | UVD_CGC_CTRL__MPRD_MODE_MASK
461 | UVD_CGC_CTRL__MPC_MODE_MASK
462 | UVD_CGC_CTRL__LBSI_MODE_MASK
463 | UVD_CGC_CTRL__LRBBM_MODE_MASK
464 | UVD_CGC_CTRL__WCB_MODE_MASK
465 | UVD_CGC_CTRL__VCPU_MODE_MASK
466 | UVD_CGC_CTRL__SCPU_MODE_MASK);
467 WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
468
469 data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
470 data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
471 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
472 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
473 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
474 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
475 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
476 | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
477 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
478 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
479 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
480 WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
481}
482
483/**
484 * vcn_v1_0_start - start VCN block
485 *
486 * @adev: amdgpu_device pointer
487 *
488 * Setup and start the VCN block
489 */
490static int vcn_v1_0_start(struct amdgpu_device *adev)
491{
492 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
493 uint32_t rb_bufsz, tmp;
494 uint32_t lmi_swap_cntl;
495 int i, j, r;
496
497 /* disable byte swapping */
498 lmi_swap_cntl = 0;
499
500 vcn_v1_0_mc_resume(adev);
501
502 /* disable clock gating */
503 vcn_v1_0_disable_clock_gating(adev, true);
504
505 /* disable interupt */
506 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
507 ~UVD_MASTINT_EN__VCPU_EN_MASK);
508
509 /* stall UMC and register bus before resetting VCPU */
510 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
511 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
512 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
513 mdelay(1);
514
515 /* put LMI, VCPU, RBC etc... into reset */
516 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
517 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
518 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
519 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
520 UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
521 UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
522 UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
523 UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
524 UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
525 mdelay(5);
526
527 /* initialize VCN memory controller */
528 WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
529 (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
530 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
531 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
532 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
533 UVD_LMI_CTRL__REQ_MODE_MASK |
534 0x00100000L);
535
536#ifdef __BIG_ENDIAN
537 /* swap (8 in 32) RB and IB */
538 lmi_swap_cntl = 0xa;
539#endif
540 WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
541
542 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
543 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
544 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
545 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
546 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
547 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
548
549 /* take all subblocks out of reset, except VCPU */
550 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
551 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
552 mdelay(5);
553
554 /* enable VCPU clock */
555 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
556 UVD_VCPU_CNTL__CLK_EN_MASK);
557
558 /* enable UMC */
559 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
560 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
561
562 /* boot up the VCPU */
563 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
564 mdelay(10);
565
566 for (i = 0; i < 10; ++i) {
567 uint32_t status;
568
569 for (j = 0; j < 100; ++j) {
570 status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
571 if (status & 2)
572 break;
573 mdelay(10);
574 }
575 r = 0;
576 if (status & 2)
577 break;
578
579 DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
580 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
581 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
582 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
583 mdelay(10);
584 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
585 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
586 mdelay(10);
587 r = -1;
588 }
589
590 if (r) {
591 DRM_ERROR("VCN decode not responding, giving up!!!\n");
592 return r;
593 }
594 /* enable master interrupt */
595 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
596 (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
597 ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
598
599 /* clear the bit 4 of VCN_STATUS */
600 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
601 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
602
603 /* force RBC into idle state */
604 rb_bufsz = order_base_2(ring->ring_size);
605 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
606 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
607 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
608 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
609 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
610 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
611 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
612
613 /* set the write pointer delay */
614 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
615
616 /* set the wb address */
617 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
618 (upper_32_bits(ring->gpu_addr) >> 2));
619
620 /* programm the RB_BASE for ring buffer */
621 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
622 lower_32_bits(ring->gpu_addr));
623 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
624 upper_32_bits(ring->gpu_addr));
625
626 /* Initialize the ring buffer's read and write pointers */
627 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
628
629 ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
630 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
631 lower_32_bits(ring->wptr));
632
633 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
634 ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
635
636 ring = &adev->vcn.ring_enc[0];
637 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
638 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
639 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
640 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
641 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
642
643 ring = &adev->vcn.ring_enc[1];
644 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
645 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
646 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
647 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
648 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
649
650 return 0;
651}
652
653/**
654 * vcn_v1_0_stop - stop VCN block
655 *
656 * @adev: amdgpu_device pointer
657 *
658 * stop the VCN block
659 */
660static int vcn_v1_0_stop(struct amdgpu_device *adev)
661{
662 /* force RBC into idle state */
663 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
664
665 /* Stall UMC and register bus before resetting VCPU */
666 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
667 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
668 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
669 mdelay(1);
670
671 /* put VCPU into reset */
672 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
673 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
674 mdelay(5);
675
676 /* disable VCPU clock */
677 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
678
679 /* Unstall UMC and register bus */
680 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
681 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
682
683 /* enable clock gating */
684 vcn_v1_0_enable_clock_gating(adev, true);
685
686 return 0;
687}
688
689static int vcn_v1_0_set_clockgating_state(void *handle,
690 enum amd_clockgating_state state)
691{
692 /* needed for driver unload*/
693 return 0;
694}
695
696/**
697 * vcn_v1_0_dec_ring_get_rptr - get read pointer
698 *
699 * @ring: amdgpu_ring pointer
700 *
701 * Returns the current hardware read pointer
702 */
703static uint64_t vcn_v1_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
704{
705 struct amdgpu_device *adev = ring->adev;
706
707 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
708}
709
710/**
711 * vcn_v1_0_dec_ring_get_wptr - get write pointer
712 *
713 * @ring: amdgpu_ring pointer
714 *
715 * Returns the current hardware write pointer
716 */
717static uint64_t vcn_v1_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
718{
719 struct amdgpu_device *adev = ring->adev;
720
721 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
722}
723
724/**
725 * vcn_v1_0_dec_ring_set_wptr - set write pointer
726 *
727 * @ring: amdgpu_ring pointer
728 *
729 * Commits the write pointer to the hardware
730 */
731static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
732{
733 struct amdgpu_device *adev = ring->adev;
734
735 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
736}
737
738/**
739 * vcn_v1_0_dec_ring_insert_start - insert a start command
740 *
741 * @ring: amdgpu_ring pointer
742 *
743 * Write a start command to the ring.
744 */
745static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring)
746{
747 amdgpu_ring_write(ring,
748 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
749 amdgpu_ring_write(ring, 0);
750 amdgpu_ring_write(ring,
751 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
752 amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1);
753}
754
755/**
756 * vcn_v1_0_dec_ring_insert_end - insert a end command
757 *
758 * @ring: amdgpu_ring pointer
759 *
760 * Write a end command to the ring.
761 */
762static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring)
763{
764 amdgpu_ring_write(ring,
765 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
766 amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
767}
768
769/**
770 * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command
771 *
772 * @ring: amdgpu_ring pointer
773 * @fence: fence to emit
774 *
775 * Write a fence and a trap command to the ring.
776 */
777static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
778 unsigned flags)
779{
780 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
781
782 amdgpu_ring_write(ring,
783 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
784 amdgpu_ring_write(ring, seq);
785 amdgpu_ring_write(ring,
786 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
787 amdgpu_ring_write(ring, addr & 0xffffffff);
788 amdgpu_ring_write(ring,
789 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
790 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
791 amdgpu_ring_write(ring,
792 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
793 amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1);
794
795 amdgpu_ring_write(ring,
796 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
797 amdgpu_ring_write(ring, 0);
798 amdgpu_ring_write(ring,
799 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
800 amdgpu_ring_write(ring, 0);
801 amdgpu_ring_write(ring,
802 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
803 amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1);
804}
805
806/**
807 * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate
808 *
809 * @ring: amdgpu_ring pointer
810 *
811 * Emits an hdp invalidate.
812 */
813static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
814{
815 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0));
816 amdgpu_ring_write(ring, 1);
817}
818
819/**
820 * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer
821 *
822 * @ring: amdgpu_ring pointer
823 * @ib: indirect buffer to execute
824 *
825 * Write ring commands to execute the indirect buffer
826 */
827static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
828 struct amdgpu_ib *ib,
829 unsigned vm_id, bool ctx_switch)
830{
831 amdgpu_ring_write(ring,
832 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
833 amdgpu_ring_write(ring, vm_id);
834
835 amdgpu_ring_write(ring,
836 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
837 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
838 amdgpu_ring_write(ring,
839 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
840 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
841 amdgpu_ring_write(ring,
842 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
843 amdgpu_ring_write(ring, ib->length_dw);
844}
845
846static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring,
847 uint32_t data0, uint32_t data1)
848{
849 amdgpu_ring_write(ring,
850 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
851 amdgpu_ring_write(ring, data0);
852 amdgpu_ring_write(ring,
853 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
854 amdgpu_ring_write(ring, data1);
855 amdgpu_ring_write(ring,
856 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
857 amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
858}
859
860static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
861 uint32_t data0, uint32_t data1, uint32_t mask)
862{
863 amdgpu_ring_write(ring,
864 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
865 amdgpu_ring_write(ring, data0);
866 amdgpu_ring_write(ring,
867 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
868 amdgpu_ring_write(ring, data1);
869 amdgpu_ring_write(ring,
870 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
871 amdgpu_ring_write(ring, mask);
872 amdgpu_ring_write(ring,
873 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
874 amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1);
875}
876
877static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
878 unsigned vm_id, uint64_t pd_addr)
879{
880 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
881 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
882 uint32_t data0, data1, mask;
883 unsigned eng = ring->vm_inv_eng;
884
885 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
886 pd_addr |= AMDGPU_PTE_VALID;
887
888 data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
889 data1 = upper_32_bits(pd_addr);
890 vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
891
892 data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
893 data1 = lower_32_bits(pd_addr);
894 vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
895
896 data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
897 data1 = lower_32_bits(pd_addr);
898 mask = 0xffffffff;
899 vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
900
901 /* flush TLB */
902 data0 = (hub->vm_inv_eng0_req + eng) << 2;
903 data1 = req;
904 vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
905
906 /* wait for flush */
907 data0 = (hub->vm_inv_eng0_ack + eng) << 2;
908 data1 = 1 << vm_id;
909 mask = 1 << vm_id;
910 vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
911}
912
913/**
914 * vcn_v1_0_enc_ring_get_rptr - get enc read pointer
915 *
916 * @ring: amdgpu_ring pointer
917 *
918 * Returns the current hardware enc read pointer
919 */
920static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
921{
922 struct amdgpu_device *adev = ring->adev;
923
924 if (ring == &adev->vcn.ring_enc[0])
925 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
926 else
927 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
928}
929
930 /**
931 * vcn_v1_0_enc_ring_get_wptr - get enc write pointer
932 *
933 * @ring: amdgpu_ring pointer
934 *
935 * Returns the current hardware enc write pointer
936 */
937static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
938{
939 struct amdgpu_device *adev = ring->adev;
940
941 if (ring == &adev->vcn.ring_enc[0])
942 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
943 else
944 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
945}
946
947 /**
948 * vcn_v1_0_enc_ring_set_wptr - set enc write pointer
949 *
950 * @ring: amdgpu_ring pointer
951 *
952 * Commits the enc write pointer to the hardware
953 */
954static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
955{
956 struct amdgpu_device *adev = ring->adev;
957
958 if (ring == &adev->vcn.ring_enc[0])
959 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
960 lower_32_bits(ring->wptr));
961 else
962 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
963 lower_32_bits(ring->wptr));
964}
965
966/**
967 * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command
968 *
969 * @ring: amdgpu_ring pointer
970 * @fence: fence to emit
971 *
972 * Write enc a fence and a trap command to the ring.
973 */
974static void vcn_v1_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
975 u64 seq, unsigned flags)
976{
977 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
978
979 amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE);
980 amdgpu_ring_write(ring, addr);
981 amdgpu_ring_write(ring, upper_32_bits(addr));
982 amdgpu_ring_write(ring, seq);
983 amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
984}
985
986static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
987{
988 amdgpu_ring_write(ring, VCN_ENC_CMD_END);
989}
990
991/**
992 * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer
993 *
994 * @ring: amdgpu_ring pointer
995 * @ib: indirect buffer to execute
996 *
997 * Write enc ring commands to execute the indirect buffer
998 */
999static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1000 struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
1001{
1002 amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
1003 amdgpu_ring_write(ring, vm_id);
1004 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1005 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1006 amdgpu_ring_write(ring, ib->length_dw);
1007}
1008
1009static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1010 unsigned int vm_id, uint64_t pd_addr)
1011{
1012 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1013 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
1014 unsigned eng = ring->vm_inv_eng;
1015
1016 pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
1017 pd_addr |= AMDGPU_PTE_VALID;
1018
1019 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1020 amdgpu_ring_write(ring,
1021 (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
1022 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
1023
1024 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1025 amdgpu_ring_write(ring,
1026 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1027 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1028
1029 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
1030 amdgpu_ring_write(ring,
1031 (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1032 amdgpu_ring_write(ring, 0xffffffff);
1033 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1034
1035 /* flush TLB */
1036 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1037 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
1038 amdgpu_ring_write(ring, req);
1039
1040 /* wait for flush */
1041 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
1042 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1043 amdgpu_ring_write(ring, 1 << vm_id);
1044 amdgpu_ring_write(ring, 1 << vm_id);
1045}
1046
1047static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
1048 struct amdgpu_irq_src *source,
1049 unsigned type,
1050 enum amdgpu_interrupt_state state)
1051{
1052 return 0;
1053}
1054
1055static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
1056 struct amdgpu_irq_src *source,
1057 struct amdgpu_iv_entry *entry)
1058{
1059 DRM_DEBUG("IH: VCN TRAP\n");
1060
1061 switch (entry->src_id) {
1062 case 124:
1063 amdgpu_fence_process(&adev->vcn.ring_dec);
1064 break;
1065 case 119:
1066 amdgpu_fence_process(&adev->vcn.ring_enc[0]);
1067 break;
1068 case 120:
1069 amdgpu_fence_process(&adev->vcn.ring_enc[1]);
1070 break;
1071 default:
1072 DRM_ERROR("Unhandled interrupt: %d %d\n",
1073 entry->src_id, entry->src_data[0]);
1074 break;
1075 }
1076
1077 return 0;
1078}
1079
1080static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
1081 .name = "vcn_v1_0",
1082 .early_init = vcn_v1_0_early_init,
1083 .late_init = NULL,
1084 .sw_init = vcn_v1_0_sw_init,
1085 .sw_fini = vcn_v1_0_sw_fini,
1086 .hw_init = vcn_v1_0_hw_init,
1087 .hw_fini = vcn_v1_0_hw_fini,
1088 .suspend = vcn_v1_0_suspend,
1089 .resume = vcn_v1_0_resume,
1090 .is_idle = NULL /* vcn_v1_0_is_idle */,
1091 .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */,
1092 .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
1093 .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
1094 .soft_reset = NULL /* vcn_v1_0_soft_reset */,
1095 .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
1096 .set_clockgating_state = vcn_v1_0_set_clockgating_state,
1097 .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */,
1098};
1099
1100static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1101 .type = AMDGPU_RING_TYPE_VCN_DEC,
1102 .align_mask = 0xf,
1103 .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0),
1104 .support_64bit_ptrs = false,
1105 .vmhub = AMDGPU_MMHUB,
1106 .get_rptr = vcn_v1_0_dec_ring_get_rptr,
1107 .get_wptr = vcn_v1_0_dec_ring_get_wptr,
1108 .set_wptr = vcn_v1_0_dec_ring_set_wptr,
1109 .emit_frame_size =
1110 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */
1111 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */
1112 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
1113 6,
1114 .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
1115 .emit_ib = vcn_v1_0_dec_ring_emit_ib,
1116 .emit_fence = vcn_v1_0_dec_ring_emit_fence,
1117 .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
1118 .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate,
1119 .test_ring = amdgpu_vcn_dec_ring_test_ring,
1120 .test_ib = amdgpu_vcn_dec_ring_test_ib,
1121 .insert_nop = amdgpu_ring_insert_nop,
1122 .insert_start = vcn_v1_0_dec_ring_insert_start,
1123 .insert_end = vcn_v1_0_dec_ring_insert_end,
1124 .pad_ib = amdgpu_ring_generic_pad_ib,
1125 .begin_use = amdgpu_vcn_ring_begin_use,
1126 .end_use = amdgpu_vcn_ring_end_use,
1127};
1128
1129static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
1130 .type = AMDGPU_RING_TYPE_VCN_ENC,
1131 .align_mask = 0x3f,
1132 .nop = VCN_ENC_CMD_NO_OP,
1133 .support_64bit_ptrs = false,
1134 .vmhub = AMDGPU_MMHUB,
1135 .get_rptr = vcn_v1_0_enc_ring_get_rptr,
1136 .get_wptr = vcn_v1_0_enc_ring_get_wptr,
1137 .set_wptr = vcn_v1_0_enc_ring_set_wptr,
1138 .emit_frame_size =
1139 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */
1140 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
1141 1, /* vcn_v1_0_enc_ring_insert_end */
1142 .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
1143 .emit_ib = vcn_v1_0_enc_ring_emit_ib,
1144 .emit_fence = vcn_v1_0_enc_ring_emit_fence,
1145 .emit_vm_flush = vcn_v1_0_enc_ring_emit_vm_flush,
1146 .test_ring = amdgpu_vcn_enc_ring_test_ring,
1147 .test_ib = amdgpu_vcn_enc_ring_test_ib,
1148 .insert_nop = amdgpu_ring_insert_nop,
1149 .insert_end = vcn_v1_0_enc_ring_insert_end,
1150 .pad_ib = amdgpu_ring_generic_pad_ib,
1151 .begin_use = amdgpu_vcn_ring_begin_use,
1152 .end_use = amdgpu_vcn_ring_end_use,
1153};
1154
1155static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
1156{
1157 adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
1158 DRM_INFO("VCN decode is enabled in VM mode\n");
1159}
1160
1161static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
1162{
1163 int i;
1164
1165 for (i = 0; i < adev->vcn.num_enc_rings; ++i)
1166 adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
1167
1168 DRM_INFO("VCN encode is enabled in VM mode\n");
1169}
1170
1171static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
1172 .set = vcn_v1_0_set_interrupt_state,
1173 .process = vcn_v1_0_process_interrupt,
1174};
1175
1176static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
1177{
1178 adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1;
1179 adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs;
1180}
1181
1182const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
1183{
1184 .type = AMD_IP_BLOCK_TYPE_VCN,
1185 .major = 1,
1186 .minor = 0,
1187 .rev = 0,
1188 .funcs = &vcn_v1_0_ip_funcs,
1189};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
new file mode 100644
index 000000000000..2a497a7a4840
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h
@@ -0,0 +1,29 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __VCN_V1_0_H__
25#define __VCN_V1_0_H__
26
27extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block;
28
29#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 071f56e439bb..56150e8d1ed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -20,7 +20,7 @@
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 */ 22 */
23#include "drmP.h" 23#include <drm/drmP.h>
24#include "amdgpu.h" 24#include "amdgpu.h"
25#include "amdgpu_ih.h" 25#include "amdgpu_ih.h"
26#include "soc15.h" 26#include "soc15.h"
@@ -97,7 +97,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
97 /* disable irqs */ 97 /* disable irqs */
98 vega10_ih_disable_interrupts(adev); 98 vega10_ih_disable_interrupts(adev);
99 99
100 nbio_v6_1_ih_control(adev); 100 if (adev->flags & AMD_IS_APU)
101 nbio_v7_0_ih_control(adev);
102 else
103 nbio_v6_1_ih_control(adev);
101 104
102 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); 105 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL));
103 /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ 106 /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
@@ -148,7 +151,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
148 ENABLE, 0); 151 ENABLE, 0);
149 } 152 }
150 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); 153 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr);
151 nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); 154 if (adev->flags & AMD_IS_APU)
155 nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index);
156 else
157 nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index);
152 158
153 tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); 159 tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL));
154 tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, 160 tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index b1132f5e84fc..6cac291c96da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -21,7 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include "drmP.h" 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_atombios.h" 26#include "amdgpu_atombios.h"
27#include "amdgpu_ih.h" 27#include "amdgpu_ih.h"
@@ -463,89 +463,83 @@ static void vi_detect_hw_virtualization(struct amdgpu_device *adev)
463 } 463 }
464} 464}
465 465
466static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
467};
468
469static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = {
470};
471
472static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { 466static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = {
473 {mmGRBM_STATUS, false}, 467 {mmGRBM_STATUS},
474 {mmGRBM_STATUS2, false}, 468 {mmGRBM_STATUS2},
475 {mmGRBM_STATUS_SE0, false}, 469 {mmGRBM_STATUS_SE0},
476 {mmGRBM_STATUS_SE1, false}, 470 {mmGRBM_STATUS_SE1},
477 {mmGRBM_STATUS_SE2, false}, 471 {mmGRBM_STATUS_SE2},
478 {mmGRBM_STATUS_SE3, false}, 472 {mmGRBM_STATUS_SE3},
479 {mmSRBM_STATUS, false}, 473 {mmSRBM_STATUS},
480 {mmSRBM_STATUS2, false}, 474 {mmSRBM_STATUS2},
481 {mmSRBM_STATUS3, false}, 475 {mmSRBM_STATUS3},
482 {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET, false}, 476 {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
483 {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET, false}, 477 {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
484 {mmCP_STAT, false}, 478 {mmCP_STAT},
485 {mmCP_STALLED_STAT1, false}, 479 {mmCP_STALLED_STAT1},
486 {mmCP_STALLED_STAT2, false}, 480 {mmCP_STALLED_STAT2},
487 {mmCP_STALLED_STAT3, false}, 481 {mmCP_STALLED_STAT3},
488 {mmCP_CPF_BUSY_STAT, false}, 482 {mmCP_CPF_BUSY_STAT},
489 {mmCP_CPF_STALLED_STAT1, false}, 483 {mmCP_CPF_STALLED_STAT1},
490 {mmCP_CPF_STATUS, false}, 484 {mmCP_CPF_STATUS},
491 {mmCP_CPC_BUSY_STAT, false}, 485 {mmCP_CPC_BUSY_STAT},
492 {mmCP_CPC_STALLED_STAT1, false}, 486 {mmCP_CPC_STALLED_STAT1},
493 {mmCP_CPC_STATUS, false}, 487 {mmCP_CPC_STATUS},
494 {mmGB_ADDR_CONFIG, false}, 488 {mmGB_ADDR_CONFIG},
495 {mmMC_ARB_RAMCFG, false}, 489 {mmMC_ARB_RAMCFG},
496 {mmGB_TILE_MODE0, false}, 490 {mmGB_TILE_MODE0},
497 {mmGB_TILE_MODE1, false}, 491 {mmGB_TILE_MODE1},
498 {mmGB_TILE_MODE2, false}, 492 {mmGB_TILE_MODE2},
499 {mmGB_TILE_MODE3, false}, 493 {mmGB_TILE_MODE3},
500 {mmGB_TILE_MODE4, false}, 494 {mmGB_TILE_MODE4},
501 {mmGB_TILE_MODE5, false}, 495 {mmGB_TILE_MODE5},
502 {mmGB_TILE_MODE6, false}, 496 {mmGB_TILE_MODE6},
503 {mmGB_TILE_MODE7, false}, 497 {mmGB_TILE_MODE7},
504 {mmGB_TILE_MODE8, false}, 498 {mmGB_TILE_MODE8},
505 {mmGB_TILE_MODE9, false}, 499 {mmGB_TILE_MODE9},
506 {mmGB_TILE_MODE10, false}, 500 {mmGB_TILE_MODE10},
507 {mmGB_TILE_MODE11, false}, 501 {mmGB_TILE_MODE11},
508 {mmGB_TILE_MODE12, false}, 502 {mmGB_TILE_MODE12},
509 {mmGB_TILE_MODE13, false}, 503 {mmGB_TILE_MODE13},
510 {mmGB_TILE_MODE14, false}, 504 {mmGB_TILE_MODE14},
511 {mmGB_TILE_MODE15, false}, 505 {mmGB_TILE_MODE15},
512 {mmGB_TILE_MODE16, false}, 506 {mmGB_TILE_MODE16},
513 {mmGB_TILE_MODE17, false}, 507 {mmGB_TILE_MODE17},
514 {mmGB_TILE_MODE18, false}, 508 {mmGB_TILE_MODE18},
515 {mmGB_TILE_MODE19, false}, 509 {mmGB_TILE_MODE19},
516 {mmGB_TILE_MODE20, false}, 510 {mmGB_TILE_MODE20},
517 {mmGB_TILE_MODE21, false}, 511 {mmGB_TILE_MODE21},
518 {mmGB_TILE_MODE22, false}, 512 {mmGB_TILE_MODE22},
519 {mmGB_TILE_MODE23, false}, 513 {mmGB_TILE_MODE23},
520 {mmGB_TILE_MODE24, false}, 514 {mmGB_TILE_MODE24},
521 {mmGB_TILE_MODE25, false}, 515 {mmGB_TILE_MODE25},
522 {mmGB_TILE_MODE26, false}, 516 {mmGB_TILE_MODE26},
523 {mmGB_TILE_MODE27, false}, 517 {mmGB_TILE_MODE27},
524 {mmGB_TILE_MODE28, false}, 518 {mmGB_TILE_MODE28},
525 {mmGB_TILE_MODE29, false}, 519 {mmGB_TILE_MODE29},
526 {mmGB_TILE_MODE30, false}, 520 {mmGB_TILE_MODE30},
527 {mmGB_TILE_MODE31, false}, 521 {mmGB_TILE_MODE31},
528 {mmGB_MACROTILE_MODE0, false}, 522 {mmGB_MACROTILE_MODE0},
529 {mmGB_MACROTILE_MODE1, false}, 523 {mmGB_MACROTILE_MODE1},
530 {mmGB_MACROTILE_MODE2, false}, 524 {mmGB_MACROTILE_MODE2},
531 {mmGB_MACROTILE_MODE3, false}, 525 {mmGB_MACROTILE_MODE3},
532 {mmGB_MACROTILE_MODE4, false}, 526 {mmGB_MACROTILE_MODE4},
533 {mmGB_MACROTILE_MODE5, false}, 527 {mmGB_MACROTILE_MODE5},
534 {mmGB_MACROTILE_MODE6, false}, 528 {mmGB_MACROTILE_MODE6},
535 {mmGB_MACROTILE_MODE7, false}, 529 {mmGB_MACROTILE_MODE7},
536 {mmGB_MACROTILE_MODE8, false}, 530 {mmGB_MACROTILE_MODE8},
537 {mmGB_MACROTILE_MODE9, false}, 531 {mmGB_MACROTILE_MODE9},
538 {mmGB_MACROTILE_MODE10, false}, 532 {mmGB_MACROTILE_MODE10},
539 {mmGB_MACROTILE_MODE11, false}, 533 {mmGB_MACROTILE_MODE11},
540 {mmGB_MACROTILE_MODE12, false}, 534 {mmGB_MACROTILE_MODE12},
541 {mmGB_MACROTILE_MODE13, false}, 535 {mmGB_MACROTILE_MODE13},
542 {mmGB_MACROTILE_MODE14, false}, 536 {mmGB_MACROTILE_MODE14},
543 {mmGB_MACROTILE_MODE15, false}, 537 {mmGB_MACROTILE_MODE15},
544 {mmCC_RB_BACKEND_DISABLE, false, true}, 538 {mmCC_RB_BACKEND_DISABLE, true},
545 {mmGC_USER_RB_BACKEND_DISABLE, false, true}, 539 {mmGC_USER_RB_BACKEND_DISABLE, true},
546 {mmGB_BACKEND_MAP, false, false}, 540 {mmGB_BACKEND_MAP, false},
547 {mmPA_SC_RASTER_CONFIG, false, true}, 541 {mmPA_SC_RASTER_CONFIG, true},
548 {mmPA_SC_RASTER_CONFIG_1, false, true}, 542 {mmPA_SC_RASTER_CONFIG_1, true},
549}; 543};
550 544
551static uint32_t vi_get_register_value(struct amdgpu_device *adev, 545static uint32_t vi_get_register_value(struct amdgpu_device *adev,
@@ -647,51 +641,17 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev,
647static int vi_read_register(struct amdgpu_device *adev, u32 se_num, 641static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
648 u32 sh_num, u32 reg_offset, u32 *value) 642 u32 sh_num, u32 reg_offset, u32 *value)
649{ 643{
650 const struct amdgpu_allowed_register_entry *asic_register_table = NULL; 644 uint32_t i;
651 const struct amdgpu_allowed_register_entry *asic_register_entry;
652 uint32_t size, i;
653 645
654 *value = 0; 646 *value = 0;
655 switch (adev->asic_type) {
656 case CHIP_TOPAZ:
657 asic_register_table = tonga_allowed_read_registers;
658 size = ARRAY_SIZE(tonga_allowed_read_registers);
659 break;
660 case CHIP_FIJI:
661 case CHIP_TONGA:
662 case CHIP_POLARIS11:
663 case CHIP_POLARIS10:
664 case CHIP_POLARIS12:
665 case CHIP_CARRIZO:
666 case CHIP_STONEY:
667 asic_register_table = cz_allowed_read_registers;
668 size = ARRAY_SIZE(cz_allowed_read_registers);
669 break;
670 default:
671 return -EINVAL;
672 }
673
674 if (asic_register_table) {
675 for (i = 0; i < size; i++) {
676 asic_register_entry = asic_register_table + i;
677 if (reg_offset != asic_register_entry->reg_offset)
678 continue;
679 if (!asic_register_entry->untouched)
680 *value = vi_get_register_value(adev,
681 asic_register_entry->grbm_indexed,
682 se_num, sh_num, reg_offset);
683 return 0;
684 }
685 }
686
687 for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) { 647 for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) {
648 bool indexed = vi_allowed_read_registers[i].grbm_indexed;
649
688 if (reg_offset != vi_allowed_read_registers[i].reg_offset) 650 if (reg_offset != vi_allowed_read_registers[i].reg_offset)
689 continue; 651 continue;
690 652
691 if (!vi_allowed_read_registers[i].untouched) 653 *value = vi_get_register_value(adev, indexed, se_num, sh_num,
692 *value = vi_get_register_value(adev, 654 reg_offset);
693 vi_allowed_read_registers[i].grbm_indexed,
694 se_num, sh_num, reg_offset);
695 return 0; 655 return 0;
696 } 656 }
697 return -EINVAL; 657 return -EINVAL;
@@ -934,11 +894,6 @@ static int vi_common_early_init(void *handle)
934 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) 894 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
935 smc_enabled = true; 895 smc_enabled = true;
936 896
937 if (amdgpu_sriov_vf(adev)) {
938 amdgpu_virt_init_setting(adev);
939 xgpu_vi_mailbox_set_irq_funcs(adev);
940 }
941
942 adev->rev_id = vi_get_rev_id(adev); 897 adev->rev_id = vi_get_rev_id(adev);
943 adev->external_rev_id = 0xFF; 898 adev->external_rev_id = 0xFF;
944 switch (adev->asic_type) { 899 switch (adev->asic_type) {
@@ -1073,7 +1028,7 @@ static int vi_common_early_init(void *handle)
1073 /* rev0 hardware requires workarounds to support PG */ 1028 /* rev0 hardware requires workarounds to support PG */
1074 adev->pg_flags = 0; 1029 adev->pg_flags = 0;
1075 if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { 1030 if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) {
1076 adev->pg_flags |= 1031 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1077 AMD_PG_SUPPORT_GFX_SMG | 1032 AMD_PG_SUPPORT_GFX_SMG |
1078 AMD_PG_SUPPORT_GFX_PIPELINE | 1033 AMD_PG_SUPPORT_GFX_PIPELINE |
1079 AMD_PG_SUPPORT_CP | 1034 AMD_PG_SUPPORT_CP |
@@ -1111,6 +1066,11 @@ static int vi_common_early_init(void *handle)
1111 return -EINVAL; 1066 return -EINVAL;
1112 } 1067 }
1113 1068
1069 if (amdgpu_sriov_vf(adev)) {
1070 amdgpu_virt_init_setting(adev);
1071 xgpu_vi_mailbox_set_irq_funcs(adev);
1072 }
1073
1114 /* vi use smc load by default */ 1074 /* vi use smc load by default */
1115 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1075 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1116 1076
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 5f2ab9c1609a..a6485254a169 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -361,6 +361,12 @@
361#define PACKET3_WAIT_ON_CE_COUNTER 0x86 361#define PACKET3_WAIT_ON_CE_COUNTER 0x86
362#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 362#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
363#define PACKET3_SWITCH_BUFFER 0x8B 363#define PACKET3_SWITCH_BUFFER 0x8B
364#define PACKET3_FRAME_CONTROL 0x90
365# define FRAME_CMD(x) ((x) << 28)
366 /*
367 * x=0: tmz_begin
368 * x=1: tmz_end
369 */
364#define PACKET3_SET_RESOURCES 0xA0 370#define PACKET3_SET_RESOURCES 0xA0
365/* 1. header 371/* 1. header
366 * 2. CONTROL 372 * 2. CONTROL