aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c584
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c247
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c195
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c365
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c496
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c292
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c273
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c344
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c146
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c1073
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c130
95 files changed, 5948 insertions, 1975 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b5154d52..bfd332c95b61 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
56 56
57# add asic specific block 57# add asic specific block
58amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ 58amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
59 ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ 59 ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
60 amdgpu_amdkfd_gfx_v7.o
61 60
62amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o 61amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
63 62
64amdgpu-y += \ 63amdgpu-y += \
65 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o 64 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
65 vega20_reg_init.o
66
67# add DF block
68amdgpu-y += \
69 df_v1_7.o \
70 df_v3_6.o
66 71
67# add GMC block 72# add GMC block
68amdgpu-y += \ 73amdgpu-y += \
@@ -126,11 +131,20 @@ amdgpu-y += \
126 vcn_v1_0.o 131 vcn_v1_0.o
127 132
128# add amdkfd interfaces 133# add amdkfd interfaces
134amdgpu-y += amdgpu_amdkfd.o
135
136ifneq ($(CONFIG_HSA_AMD),)
129amdgpu-y += \ 137amdgpu-y += \
130 amdgpu_amdkfd.o \
131 amdgpu_amdkfd_fence.o \ 138 amdgpu_amdkfd_fence.o \
132 amdgpu_amdkfd_gpuvm.o \ 139 amdgpu_amdkfd_gpuvm.o \
133 amdgpu_amdkfd_gfx_v8.o 140 amdgpu_amdkfd_gfx_v8.o \
141 amdgpu_amdkfd_gfx_v9.o
142
143ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
144amdgpu-y += amdgpu_amdkfd_gfx_v7.o
145endif
146
147endif
134 148
135# add cgs 149# add cgs
136amdgpu-y += amdgpu_cgs.o 150amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8b605f3dc05..a59c07590cee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -129,6 +129,7 @@ extern int amdgpu_lbpw;
129extern int amdgpu_compute_multipipe; 129extern int amdgpu_compute_multipipe;
130extern int amdgpu_gpu_recovery; 130extern int amdgpu_gpu_recovery;
131extern int amdgpu_emu_mode; 131extern int amdgpu_emu_mode;
132extern uint amdgpu_smu_memory_pool_size;
132 133
133#ifdef CONFIG_DRM_AMDGPU_SI 134#ifdef CONFIG_DRM_AMDGPU_SI
134extern int amdgpu_si_support; 135extern int amdgpu_si_support;
@@ -137,6 +138,7 @@ extern int amdgpu_si_support;
137extern int amdgpu_cik_support; 138extern int amdgpu_cik_support;
138#endif 139#endif
139 140
141#define AMDGPU_SG_THRESHOLD (256*1024*1024)
140#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ 142#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
141#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 143#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
142#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ 144#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -222,10 +224,10 @@ enum amdgpu_kiq_irq {
222 AMDGPU_CP_KIQ_IRQ_LAST 224 AMDGPU_CP_KIQ_IRQ_LAST
223}; 225};
224 226
225int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, 227int amdgpu_device_ip_set_clockgating_state(void *dev,
226 enum amd_ip_block_type block_type, 228 enum amd_ip_block_type block_type,
227 enum amd_clockgating_state state); 229 enum amd_clockgating_state state);
228int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, 230int amdgpu_device_ip_set_powergating_state(void *dev,
229 enum amd_ip_block_type block_type, 231 enum amd_ip_block_type block_type,
230 enum amd_powergating_state state); 232 enum amd_powergating_state state);
231void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 233void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
681int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); 683int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
682 684
683void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); 685void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
686void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
687void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
684void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); 688void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
685 689
686 690
@@ -771,9 +775,18 @@ struct amdgpu_rlc {
771 u32 starting_offsets_start; 775 u32 starting_offsets_start;
772 u32 reg_list_format_size_bytes; 776 u32 reg_list_format_size_bytes;
773 u32 reg_list_size_bytes; 777 u32 reg_list_size_bytes;
778 u32 reg_list_format_direct_reg_list_length;
779 u32 save_restore_list_cntl_size_bytes;
780 u32 save_restore_list_gpm_size_bytes;
781 u32 save_restore_list_srm_size_bytes;
774 782
775 u32 *register_list_format; 783 u32 *register_list_format;
776 u32 *register_restore; 784 u32 *register_restore;
785 u8 *save_restore_list_cntl;
786 u8 *save_restore_list_gpm;
787 u8 *save_restore_list_srm;
788
789 bool is_rlc_v2_1;
777}; 790};
778 791
779#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES 792#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -867,6 +880,8 @@ struct amdgpu_gfx_config {
867 880
868 /* gfx configure feature */ 881 /* gfx configure feature */
869 uint32_t double_offchip_lds_buf; 882 uint32_t double_offchip_lds_buf;
883 /* cached value of DB_DEBUG2 */
884 uint32_t db_debug2;
870}; 885};
871 886
872struct amdgpu_cu_info { 887struct amdgpu_cu_info {
@@ -938,6 +953,12 @@ struct amdgpu_gfx {
938 uint32_t ce_feature_version; 953 uint32_t ce_feature_version;
939 uint32_t pfp_feature_version; 954 uint32_t pfp_feature_version;
940 uint32_t rlc_feature_version; 955 uint32_t rlc_feature_version;
956 uint32_t rlc_srlc_fw_version;
957 uint32_t rlc_srlc_feature_version;
958 uint32_t rlc_srlg_fw_version;
959 uint32_t rlc_srlg_feature_version;
960 uint32_t rlc_srls_fw_version;
961 uint32_t rlc_srls_feature_version;
941 uint32_t mec_feature_version; 962 uint32_t mec_feature_version;
942 uint32_t mec2_feature_version; 963 uint32_t mec2_feature_version;
943 struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; 964 struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
@@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs {
1204 /* invalidate hdp read cache */ 1225 /* invalidate hdp read cache */
1205 void (*invalidate_hdp)(struct amdgpu_device *adev, 1226 void (*invalidate_hdp)(struct amdgpu_device *adev,
1206 struct amdgpu_ring *ring); 1227 struct amdgpu_ring *ring);
1228 /* check if the asic needs a full reset of if soft reset will work */
1229 bool (*need_full_reset)(struct amdgpu_device *adev);
1207}; 1230};
1208 1231
1209/* 1232/*
@@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs {
1368 void (*detect_hw_virt)(struct amdgpu_device *adev); 1391 void (*detect_hw_virt)(struct amdgpu_device *adev);
1369}; 1392};
1370 1393
1371 1394struct amdgpu_df_funcs {
1395 void (*init)(struct amdgpu_device *adev);
1396 void (*enable_broadcast_mode)(struct amdgpu_device *adev,
1397 bool enable);
1398 u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
1399 u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
1400 void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
1401 bool enable);
1402 void (*get_clockgating_state)(struct amdgpu_device *adev,
1403 u32 *flags);
1404 void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
1405 bool enable);
1406};
1372/* Define the HW IP blocks will be used in driver , add more if necessary */ 1407/* Define the HW IP blocks will be used in driver , add more if necessary */
1373enum amd_hw_ip_block_type { 1408enum amd_hw_ip_block_type {
1374 GC_HWIP = 1, 1409 GC_HWIP = 1,
@@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type {
1398struct amd_powerplay { 1433struct amd_powerplay {
1399 void *pp_handle; 1434 void *pp_handle;
1400 const struct amd_pm_funcs *pp_funcs; 1435 const struct amd_pm_funcs *pp_funcs;
1436 uint32_t pp_feature;
1401}; 1437};
1402 1438
1403#define AMDGPU_RESET_MAGIC_NUM 64 1439#define AMDGPU_RESET_MAGIC_NUM 64
@@ -1590,6 +1626,7 @@ struct amdgpu_device {
1590 uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; 1626 uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
1591 1627
1592 const struct amdgpu_nbio_funcs *nbio_funcs; 1628 const struct amdgpu_nbio_funcs *nbio_funcs;
1629 const struct amdgpu_df_funcs *df_funcs;
1593 1630
1594 /* delayed work_func for deferring clockgating during resume */ 1631 /* delayed work_func for deferring clockgating during resume */
1595 struct delayed_work late_init_work; 1632 struct delayed_work late_init_work;
@@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1764#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) 1801#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
1765#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) 1802#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
1766#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) 1803#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
1804#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
1767#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) 1805#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
1768#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) 1806#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
1769#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) 1807#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1790#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) 1828#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
1791#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) 1829#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
1792#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) 1830#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
1831#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
1793#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) 1832#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
1794#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 1833#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
1795#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 1834#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 12558044acd4..428e5eb3444f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -290,12 +290,11 @@ static int acp_hw_init(void *handle)
290 else if (r) 290 else if (r)
291 return r; 291 return r;
292 292
293 r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, 293 if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
294 0x5289, 0, &acp_base); 294 return -EINVAL;
295 if (r == -ENODEV) 295
296 return 0; 296 acp_base = adev->rmmio_base;
297 else if (r) 297
298 return r;
299 if (adev->asic_type != CHIP_STONEY) { 298 if (adev->asic_type != CHIP_STONEY) {
300 adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); 299 adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
301 if (adev->acp.acp_genpd == NULL) 300 if (adev->acp.acp_genpd == NULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203ffb11..8f6f45567bfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void)
50 kgd2kfd = NULL; 50 kgd2kfd = NULL;
51 } 51 }
52 52
53
53#elif defined(CONFIG_HSA_AMD) 54#elif defined(CONFIG_HSA_AMD)
55
54 ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); 56 ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
55 if (ret) 57 if (ret)
56 kgd2kfd = NULL; 58 kgd2kfd = NULL;
57 59
58#else 60#else
61 kgd2kfd = NULL;
59 ret = -ENOENT; 62 ret = -ENOENT;
60#endif 63#endif
64
65#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
61 amdgpu_amdkfd_gpuvm_init_mem_limits(); 66 amdgpu_amdkfd_gpuvm_init_mem_limits();
67#endif
62 68
63 return ret; 69 return ret;
64} 70}
@@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
92 case CHIP_POLARIS11: 98 case CHIP_POLARIS11:
93 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); 99 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
94 break; 100 break;
101 case CHIP_VEGA10:
102 case CHIP_RAVEN:
103 kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
104 break;
95 default: 105 default:
96 dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); 106 dev_info(adev->dev, "kfd not supported on this ASIC\n");
97 return; 107 return;
98 } 108 }
99 109
@@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
175 &gpu_resources.doorbell_physical_address, 185 &gpu_resources.doorbell_physical_address,
176 &gpu_resources.doorbell_aperture_size, 186 &gpu_resources.doorbell_aperture_size,
177 &gpu_resources.doorbell_start_offset); 187 &gpu_resources.doorbell_start_offset);
188 if (adev->asic_type >= CHIP_VEGA10) {
189 /* On SOC15 the BIF is involved in routing
190 * doorbells using the low 12 bits of the
191 * address. Communicate the assignments to
192 * KFD. KFD uses two doorbell pages per
193 * process in case of 64-bit doorbells so we
194 * can use each doorbell assignment twice.
195 */
196 gpu_resources.sdma_doorbell[0][0] =
197 AMDGPU_DOORBELL64_sDMA_ENGINE0;
198 gpu_resources.sdma_doorbell[0][1] =
199 AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
200 gpu_resources.sdma_doorbell[1][0] =
201 AMDGPU_DOORBELL64_sDMA_ENGINE1;
202 gpu_resources.sdma_doorbell[1][1] =
203 AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
204 /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
205 * SDMA, IH and VCN. So don't use them for the CP.
206 */
207 gpu_resources.reserved_doorbell_mask = 0x1f0;
208 gpu_resources.reserved_doorbell_val = 0x0f0;
209 }
178 210
179 kgd2kfd->device_init(adev->kfd, &gpu_resources); 211 kgd2kfd->device_init(adev->kfd, &gpu_resources);
180 } 212 }
@@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
217{ 249{
218 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 250 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
219 struct amdgpu_bo *bo = NULL; 251 struct amdgpu_bo *bo = NULL;
252 struct amdgpu_bo_param bp;
220 int r; 253 int r;
221 uint64_t gpu_addr_tmp = 0; 254 uint64_t gpu_addr_tmp = 0;
222 void *cpu_ptr_tmp = NULL; 255 void *cpu_ptr_tmp = NULL;
223 256
224 r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 257 memset(&bp, 0, sizeof(bp));
225 AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, 258 bp.size = size;
226 NULL, &bo); 259 bp.byte_align = PAGE_SIZE;
260 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
261 bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
262 bp.type = ttm_bo_type_kernel;
263 bp.resv = NULL;
264 r = amdgpu_bo_create(adev, &bp, &bo);
227 if (r) { 265 if (r) {
228 dev_err(adev->dev, 266 dev_err(adev->dev,
229 "failed to allocate BO for amdkfd (%d)\n", r); 267 "failed to allocate BO for amdkfd (%d)\n", r);
@@ -432,3 +470,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
432 470
433 return false; 471 return false;
434} 472}
473
474#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
475bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
476{
477 return false;
478}
479
480void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
481{
482}
483
484void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
485 struct amdgpu_vm *vm)
486{
487}
488
489struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
490{
491 return NULL;
492}
493
494int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
495{
496 return 0;
497}
498
499struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
500{
501 return NULL;
502}
503
504struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
505{
506 return NULL;
507}
508
509struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
510{
511 return NULL;
512}
513#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea731e0..a8418a3f4e9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/mm.h> 29#include <linux/mm.h>
30#include <linux/mmu_context.h> 30#include <linux/mmu_context.h>
31#include <linux/workqueue.h>
31#include <kgd_kfd_interface.h> 32#include <kgd_kfd_interface.h>
32#include <drm/ttm/ttm_execbuf_util.h> 33#include <drm/ttm/ttm_execbuf_util.h>
33#include "amdgpu_sync.h" 34#include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
59 60
60 uint32_t mapping_flags; 61 uint32_t mapping_flags;
61 62
63 atomic_t invalid;
62 struct amdkfd_process_info *process_info; 64 struct amdkfd_process_info *process_info;
65 struct page **user_pages;
63 66
64 struct amdgpu_sync sync; 67 struct amdgpu_sync sync;
65 68
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
84 struct list_head vm_list_head; 87 struct list_head vm_list_head;
85 /* List head for all KFD BOs that belong to a KFD process. */ 88 /* List head for all KFD BOs that belong to a KFD process. */
86 struct list_head kfd_bo_list; 89 struct list_head kfd_bo_list;
90 /* List of userptr BOs that are valid or invalid */
91 struct list_head userptr_valid_list;
92 struct list_head userptr_inval_list;
87 /* Lock to protect kfd_bo_list */ 93 /* Lock to protect kfd_bo_list */
88 struct mutex lock; 94 struct mutex lock;
89 95
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
91 unsigned int n_vms; 97 unsigned int n_vms;
92 /* Eviction Fence */ 98 /* Eviction Fence */
93 struct amdgpu_amdkfd_fence *eviction_fence; 99 struct amdgpu_amdkfd_fence *eviction_fence;
100
101 /* MMU-notifier related fields */
102 atomic_t evicted_bos;
103 struct delayed_work restore_userptr_work;
104 struct pid *pid;
94}; 105};
95 106
96int amdgpu_amdkfd_init(void); 107int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
104void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); 115void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
105void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); 116void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
106 117
118int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
107int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, 119int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
108 uint32_t vmid, uint64_t gpu_addr, 120 uint32_t vmid, uint64_t gpu_addr,
109 uint32_t *ib_cmd, uint32_t ib_len); 121 uint32_t *ib_cmd, uint32_t ib_len);
110 122
111struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); 123struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
112struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); 124struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
125struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
113 126
114bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); 127bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
115 128
@@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
143 156
144/* GPUVM API */ 157/* GPUVM API */
145int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, 158int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
146 void **process_info, 159 void **process_info,
147 struct dma_fence **ef); 160 struct dma_fence **ef);
148int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, 161int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
149 struct file *filp, 162 struct file *filp,
150 void **vm, void **process_info, 163 void **vm, void **process_info,
151 struct dma_fence **ef); 164 struct dma_fence **ef);
152void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 165void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
153 struct amdgpu_vm *vm); 166 struct amdgpu_vm *vm);
154void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); 167void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
155uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); 168uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
156int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 169int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53172b9..0ff36d45a597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
98static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 98static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
99 unsigned int vmid); 99 unsigned int vmid);
100 100
101static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
102 uint32_t hpd_size, uint64_t hpd_gpu_addr);
103static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 101static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
104static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 102static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
105 uint32_t queue_id, uint32_t __user *wptr, 103 uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
183 .free_pasid = amdgpu_pasid_free, 181 .free_pasid = amdgpu_pasid_free,
184 .program_sh_mem_settings = kgd_program_sh_mem_settings, 182 .program_sh_mem_settings = kgd_program_sh_mem_settings,
185 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 183 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
186 .init_pipeline = kgd_init_pipeline,
187 .init_interrupts = kgd_init_interrupts, 184 .init_interrupts = kgd_init_interrupts,
188 .hqd_load = kgd_hqd_load, 185 .hqd_load = kgd_hqd_load,
189 .hqd_sdma_load = kgd_hqd_sdma_load, 186 .hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
309 return 0; 306 return 0;
310} 307}
311 308
312static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
313 uint32_t hpd_size, uint64_t hpd_gpu_addr)
314{
315 /* amdgpu owns the per-pipe state */
316 return 0;
317}
318
319static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 309static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
320{ 310{
321 struct amdgpu_device *adev = get_amdgpu_device(kgd); 311 struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9a5e9f..6ef9762b4b00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
57 uint32_t sh_mem_bases); 57 uint32_t sh_mem_bases);
58static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, 58static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
59 unsigned int vmid); 59 unsigned int vmid);
60static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
61 uint32_t hpd_size, uint64_t hpd_gpu_addr);
62static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 60static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
63static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 61static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
64 uint32_t queue_id, uint32_t __user *wptr, 62 uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
141 .free_pasid = amdgpu_pasid_free, 139 .free_pasid = amdgpu_pasid_free,
142 .program_sh_mem_settings = kgd_program_sh_mem_settings, 140 .program_sh_mem_settings = kgd_program_sh_mem_settings,
143 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 141 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
144 .init_pipeline = kgd_init_pipeline,
145 .init_interrupts = kgd_init_interrupts, 142 .init_interrupts = kgd_init_interrupts,
146 .hqd_load = kgd_hqd_load, 143 .hqd_load = kgd_hqd_load,
147 .hqd_sdma_load = kgd_hqd_sdma_load, 144 .hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
270 return 0; 267 return 0;
271} 268}
272 269
273static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
274 uint32_t hpd_size, uint64_t hpd_gpu_addr)
275{
276 /* amdgpu owns the per-pipe state */
277 return 0;
278}
279
280static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) 270static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
281{ 271{
282 struct amdgpu_device *adev = get_amdgpu_device(kgd); 272 struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000000..f0c0d3953f69
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
1/*
2 * Copyright 2014-2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#define pr_fmt(fmt) "kfd2kgd: " fmt
24
25#include <linux/module.h>
26#include <linux/fdtable.h>
27#include <linux/uaccess.h>
28#include <linux/firmware.h>
29#include <drm/drmP.h>
30#include "amdgpu.h"
31#include "amdgpu_amdkfd.h"
32#include "amdgpu_ucode.h"
33#include "soc15_hw_ip.h"
34#include "gc/gc_9_0_offset.h"
35#include "gc/gc_9_0_sh_mask.h"
36#include "vega10_enum.h"
37#include "sdma0/sdma0_4_0_offset.h"
38#include "sdma0/sdma0_4_0_sh_mask.h"
39#include "sdma1/sdma1_4_0_offset.h"
40#include "sdma1/sdma1_4_0_sh_mask.h"
41#include "athub/athub_1_0_offset.h"
42#include "athub/athub_1_0_sh_mask.h"
43#include "oss/osssys_4_0_offset.h"
44#include "oss/osssys_4_0_sh_mask.h"
45#include "soc15_common.h"
46#include "v9_structs.h"
47#include "soc15.h"
48#include "soc15d.h"
49
50/* HACK: MMHUB and GC both have VM-related register with the same
51 * names but different offsets. Define the MMHUB register we need here
52 * with a prefix. A proper solution would be to move the functions
53 * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
54 * respectively.
55 */
56#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
57#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
58
59#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
60#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
61
62#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
63#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
64#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
65#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
66
67#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
68#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
69#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
70#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
71
72#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
73#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
74#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
75#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
76
77#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
78#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
79#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
80#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
81
82#define V9_PIPE_PER_MEC (4)
83#define V9_QUEUES_PER_PIPE_MEC (8)
84
85enum hqd_dequeue_request_type {
86 NO_ACTION = 0,
87 DRAIN_PIPE,
88 RESET_WAVES
89};
90
91/*
92 * Register access functions
93 */
94
95static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
96 uint32_t sh_mem_config,
97 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
98 uint32_t sh_mem_bases);
99static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
100 unsigned int vmid);
101static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
102static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
103 uint32_t queue_id, uint32_t __user *wptr,
104 uint32_t wptr_shift, uint32_t wptr_mask,
105 struct mm_struct *mm);
106static int kgd_hqd_dump(struct kgd_dev *kgd,
107 uint32_t pipe_id, uint32_t queue_id,
108 uint32_t (**dump)[2], uint32_t *n_regs);
109static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
110 uint32_t __user *wptr, struct mm_struct *mm);
111static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
112 uint32_t engine_id, uint32_t queue_id,
113 uint32_t (**dump)[2], uint32_t *n_regs);
114static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
115 uint32_t pipe_id, uint32_t queue_id);
116static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
117static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
118 enum kfd_preempt_type reset_type,
119 unsigned int utimeout, uint32_t pipe_id,
120 uint32_t queue_id);
121static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
122 unsigned int utimeout);
123static int kgd_address_watch_disable(struct kgd_dev *kgd);
124static int kgd_address_watch_execute(struct kgd_dev *kgd,
125 unsigned int watch_point_id,
126 uint32_t cntl_val,
127 uint32_t addr_hi,
128 uint32_t addr_lo);
129static int kgd_wave_control_execute(struct kgd_dev *kgd,
130 uint32_t gfx_index_val,
131 uint32_t sq_cmd);
132static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
133 unsigned int watch_point_id,
134 unsigned int reg_offset);
135
136static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
137 uint8_t vmid);
138static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
139 uint8_t vmid);
140static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
141 uint32_t page_table_base);
142static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
143static void set_scratch_backing_va(struct kgd_dev *kgd,
144 uint64_t va, uint32_t vmid);
145static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
146static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
147
148/* Because of REG_GET_FIELD() being used, we put this function in the
149 * asic specific file.
150 */
151static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
152 struct tile_config *config)
153{
154 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
155
156 config->gb_addr_config = adev->gfx.config.gb_addr_config;
157
158 config->tile_config_ptr = adev->gfx.config.tile_mode_array;
159 config->num_tile_configs =
160 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
161 config->macro_tile_config_ptr =
162 adev->gfx.config.macrotile_mode_array;
163 config->num_macro_tile_configs =
164 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
165
166 return 0;
167}
168
169static const struct kfd2kgd_calls kfd2kgd = {
170 .init_gtt_mem_allocation = alloc_gtt_mem,
171 .free_gtt_mem = free_gtt_mem,
172 .get_local_mem_info = get_local_mem_info,
173 .get_gpu_clock_counter = get_gpu_clock_counter,
174 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
175 .alloc_pasid = amdgpu_pasid_alloc,
176 .free_pasid = amdgpu_pasid_free,
177 .program_sh_mem_settings = kgd_program_sh_mem_settings,
178 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
179 .init_interrupts = kgd_init_interrupts,
180 .hqd_load = kgd_hqd_load,
181 .hqd_sdma_load = kgd_hqd_sdma_load,
182 .hqd_dump = kgd_hqd_dump,
183 .hqd_sdma_dump = kgd_hqd_sdma_dump,
184 .hqd_is_occupied = kgd_hqd_is_occupied,
185 .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
186 .hqd_destroy = kgd_hqd_destroy,
187 .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
188 .address_watch_disable = kgd_address_watch_disable,
189 .address_watch_execute = kgd_address_watch_execute,
190 .wave_control_execute = kgd_wave_control_execute,
191 .address_watch_get_offset = kgd_address_watch_get_offset,
192 .get_atc_vmid_pasid_mapping_pasid =
193 get_atc_vmid_pasid_mapping_pasid,
194 .get_atc_vmid_pasid_mapping_valid =
195 get_atc_vmid_pasid_mapping_valid,
196 .get_fw_version = get_fw_version,
197 .set_scratch_backing_va = set_scratch_backing_va,
198 .get_tile_config = amdgpu_amdkfd_get_tile_config,
199 .get_cu_info = get_cu_info,
200 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
201 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
202 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
203 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
204 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
205 .set_vm_context_page_table_base = set_vm_context_page_table_base,
206 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
207 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
208 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
209 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
210 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
211 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
212 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
213 .invalidate_tlbs = invalidate_tlbs,
214 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
215 .submit_ib = amdgpu_amdkfd_submit_ib,
216};
217
218struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
219{
220 return (struct kfd2kgd_calls *)&kfd2kgd;
221}
222
223static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
224{
225 return (struct amdgpu_device *)kgd;
226}
227
228static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
229 uint32_t queue, uint32_t vmid)
230{
231 struct amdgpu_device *adev = get_amdgpu_device(kgd);
232
233 mutex_lock(&adev->srbm_mutex);
234 soc15_grbm_select(adev, mec, pipe, queue, vmid);
235}
236
237static void unlock_srbm(struct kgd_dev *kgd)
238{
239 struct amdgpu_device *adev = get_amdgpu_device(kgd);
240
241 soc15_grbm_select(adev, 0, 0, 0, 0);
242 mutex_unlock(&adev->srbm_mutex);
243}
244
245static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
246 uint32_t queue_id)
247{
248 struct amdgpu_device *adev = get_amdgpu_device(kgd);
249
250 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
251 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
252
253 lock_srbm(kgd, mec, pipe, queue_id, 0);
254}
255
256static uint32_t get_queue_mask(struct amdgpu_device *adev,
257 uint32_t pipe_id, uint32_t queue_id)
258{
259 unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
260 queue_id) & 31;
261
262 return ((uint32_t)1) << bit;
263}
264
265static void release_queue(struct kgd_dev *kgd)
266{
267 unlock_srbm(kgd);
268}
269
270static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
271 uint32_t sh_mem_config,
272 uint32_t sh_mem_ape1_base,
273 uint32_t sh_mem_ape1_limit,
274 uint32_t sh_mem_bases)
275{
276 struct amdgpu_device *adev = get_amdgpu_device(kgd);
277
278 lock_srbm(kgd, 0, 0, 0, vmid);
279
280 WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
281 WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
282 /* APE1 no longer exists on GFX9 */
283
284 unlock_srbm(kgd);
285}
286
287static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
288 unsigned int vmid)
289{
290 struct amdgpu_device *adev = get_amdgpu_device(kgd);
291
292 /*
293 * We have to assume that there is no outstanding mapping.
294 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
295 * a mapping is in progress or because a mapping finished
296 * and the SW cleared it.
297 * So the protocol is to always wait & clear.
298 */
299 uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
300 ATC_VMID0_PASID_MAPPING__VALID_MASK;
301
302 /*
303 * need to do this twice, once for gfx and once for mmhub
304 * for ATC add 16 to VMID for mmhub, for IH different registers.
305 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
306 */
307
308 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
309 pasid_mapping);
310
311 while (!(RREG32(SOC15_REG_OFFSET(
312 ATHUB, 0,
313 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
314 (1U << vmid)))
315 cpu_relax();
316
317 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
318 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
319 1U << vmid);
320
321 /* Mapping vmid to pasid also for IH block */
322 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
323 pasid_mapping);
324
325 WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
326 pasid_mapping);
327
328 while (!(RREG32(SOC15_REG_OFFSET(
329 ATHUB, 0,
330 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
331 (1U << (vmid + 16))))
332 cpu_relax();
333
334 WREG32(SOC15_REG_OFFSET(ATHUB, 0,
335 mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
336 1U << (vmid + 16));
337
338 /* Mapping vmid to pasid also for IH block */
339 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
340 pasid_mapping);
341 return 0;
342}
343
344/* TODO - RING0 form of field is obsolete, seems to date back to SI
345 * but still works
346 */
347
348static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
349{
350 struct amdgpu_device *adev = get_amdgpu_device(kgd);
351 uint32_t mec;
352 uint32_t pipe;
353
354 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
355 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
356
357 lock_srbm(kgd, mec, pipe, 0, 0);
358
359 WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
360 CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
361 CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
362
363 unlock_srbm(kgd);
364
365 return 0;
366}
367
368static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
369 unsigned int engine_id,
370 unsigned int queue_id)
371{
372 uint32_t base[2] = {
373 SOC15_REG_OFFSET(SDMA0, 0,
374 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
375 SOC15_REG_OFFSET(SDMA1, 0,
376 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
377 };
378 uint32_t retval;
379
380 retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
381 mmSDMA0_RLC0_RB_CNTL);
382
383 pr_debug("sdma base address: 0x%x\n", retval);
384
385 return retval;
386}
387
388static inline struct v9_mqd *get_mqd(void *mqd)
389{
390 return (struct v9_mqd *)mqd;
391}
392
393static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
394{
395 return (struct v9_sdma_mqd *)mqd;
396}
397
398static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
399 uint32_t queue_id, uint32_t __user *wptr,
400 uint32_t wptr_shift, uint32_t wptr_mask,
401 struct mm_struct *mm)
402{
403 struct amdgpu_device *adev = get_amdgpu_device(kgd);
404 struct v9_mqd *m;
405 uint32_t *mqd_hqd;
406 uint32_t reg, hqd_base, data;
407
408 m = get_mqd(mqd);
409
410 acquire_queue(kgd, pipe_id, queue_id);
411
412 /* HIQ is set during driver init period with vmid set to 0*/
413 if (m->cp_hqd_vmid == 0) {
414 uint32_t value, mec, pipe;
415
416 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
417 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
418
419 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
420 mec, pipe, queue_id);
421 value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
422 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
423 ((mec << 5) | (pipe << 3) | queue_id | 0x80));
424 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
425 }
426
427 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
428 mqd_hqd = &m->cp_mqd_base_addr_lo;
429 hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
430
431 for (reg = hqd_base;
432 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
433 WREG32(reg, mqd_hqd[reg - hqd_base]);
434
435
436 /* Activate doorbell logic before triggering WPTR poll. */
437 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
438 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
439 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
440
441 if (wptr) {
442 /* Don't read wptr with get_user because the user
443 * context may not be accessible (if this function
444 * runs in a work queue). Instead trigger a one-shot
445 * polling read from memory in the CP. This assumes
446 * that wptr is GPU-accessible in the queue's VMID via
447 * ATC or SVM. WPTR==RPTR before starting the poll so
448 * the CP starts fetching new commands from the right
449 * place.
450 *
451 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
452 * tricky. Assume that the queue didn't overflow. The
453 * number of valid bits in the 32-bit RPTR depends on
454 * the queue size. The remaining bits are taken from
455 * the saved 64-bit WPTR. If the WPTR wrapped, add the
456 * queue size.
457 */
458 uint32_t queue_size =
459 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
460 CP_HQD_PQ_CONTROL, QUEUE_SIZE);
461 uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
462
463 if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
464 guessed_wptr += queue_size;
465 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
466 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
467
468 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
469 lower_32_bits(guessed_wptr));
470 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
471 upper_32_bits(guessed_wptr));
472 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
473 lower_32_bits((uintptr_t)wptr));
474 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
475 upper_32_bits((uintptr_t)wptr));
476 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
477 get_queue_mask(adev, pipe_id, queue_id));
478 }
479
480 /* Start the EOP fetcher */
481 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
482 REG_SET_FIELD(m->cp_hqd_eop_rptr,
483 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
484
485 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
486 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
487
488 release_queue(kgd);
489
490 return 0;
491}
492
493static int kgd_hqd_dump(struct kgd_dev *kgd,
494 uint32_t pipe_id, uint32_t queue_id,
495 uint32_t (**dump)[2], uint32_t *n_regs)
496{
497 struct amdgpu_device *adev = get_amdgpu_device(kgd);
498 uint32_t i = 0, reg;
499#define HQD_N_REGS 56
500#define DUMP_REG(addr) do { \
501 if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
502 break; \
503 (*dump)[i][0] = (addr) << 2; \
504 (*dump)[i++][1] = RREG32(addr); \
505 } while (0)
506
507 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
508 if (*dump == NULL)
509 return -ENOMEM;
510
511 acquire_queue(kgd, pipe_id, queue_id);
512
513 for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
514 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
515 DUMP_REG(reg);
516
517 release_queue(kgd);
518
519 WARN_ON_ONCE(i != HQD_N_REGS);
520 *n_regs = i;
521
522 return 0;
523}
524
525static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
526 uint32_t __user *wptr, struct mm_struct *mm)
527{
528 struct amdgpu_device *adev = get_amdgpu_device(kgd);
529 struct v9_sdma_mqd *m;
530 uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
531 unsigned long end_jiffies;
532 uint32_t data;
533 uint64_t data64;
534 uint64_t __user *wptr64 = (uint64_t __user *)wptr;
535
536 m = get_sdma_mqd(mqd);
537 sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
538 m->sdma_queue_id);
539 sdmax_gfx_context_cntl = m->sdma_engine_id ?
540 SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
541 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
542
543 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
544 m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
545
546 end_jiffies = msecs_to_jiffies(2000) + jiffies;
547 while (true) {
548 data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
549 if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
550 break;
551 if (time_after(jiffies, end_jiffies))
552 return -ETIME;
553 usleep_range(500, 1000);
554 }
555 data = RREG32(sdmax_gfx_context_cntl);
556 data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
557 RESUME_CTX, 0);
558 WREG32(sdmax_gfx_context_cntl, data);
559
560 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
561 m->sdmax_rlcx_doorbell_offset);
562
563 data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
564 ENABLE, 1);
565 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
566 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
567 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
568 m->sdmax_rlcx_rb_rptr_hi);
569
570 WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
571 if (read_user_wptr(mm, wptr64, data64)) {
572 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
573 lower_32_bits(data64));
574 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
575 upper_32_bits(data64));
576 } else {
577 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
578 m->sdmax_rlcx_rb_rptr);
579 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
580 m->sdmax_rlcx_rb_rptr_hi);
581 }
582 WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
583
584 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
585 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
586 m->sdmax_rlcx_rb_base_hi);
587 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
588 m->sdmax_rlcx_rb_rptr_addr_lo);
589 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
590 m->sdmax_rlcx_rb_rptr_addr_hi);
591
592 data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
593 RB_ENABLE, 1);
594 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
595
596 return 0;
597}
598
599static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
600 uint32_t engine_id, uint32_t queue_id,
601 uint32_t (**dump)[2], uint32_t *n_regs)
602{
603 struct amdgpu_device *adev = get_amdgpu_device(kgd);
604 uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
605 uint32_t i = 0, reg;
606#undef HQD_N_REGS
607#define HQD_N_REGS (19+6+7+10)
608
609 *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
610 if (*dump == NULL)
611 return -ENOMEM;
612
613 for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
614 DUMP_REG(sdma_base_addr + reg);
615 for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
616 DUMP_REG(sdma_base_addr + reg);
617 for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
618 reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
619 DUMP_REG(sdma_base_addr + reg);
620 for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
621 reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
622 DUMP_REG(sdma_base_addr + reg);
623
624 WARN_ON_ONCE(i != HQD_N_REGS);
625 *n_regs = i;
626
627 return 0;
628}
629
630static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
631 uint32_t pipe_id, uint32_t queue_id)
632{
633 struct amdgpu_device *adev = get_amdgpu_device(kgd);
634 uint32_t act;
635 bool retval = false;
636 uint32_t low, high;
637
638 acquire_queue(kgd, pipe_id, queue_id);
639 act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
640 if (act) {
641 low = lower_32_bits(queue_address >> 8);
642 high = upper_32_bits(queue_address >> 8);
643
644 if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
645 high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
646 retval = true;
647 }
648 release_queue(kgd);
649 return retval;
650}
651
652static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
653{
654 struct amdgpu_device *adev = get_amdgpu_device(kgd);
655 struct v9_sdma_mqd *m;
656 uint32_t sdma_base_addr;
657 uint32_t sdma_rlc_rb_cntl;
658
659 m = get_sdma_mqd(mqd);
660 sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
661 m->sdma_queue_id);
662
663 sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
664
665 if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
666 return true;
667
668 return false;
669}
670
671static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
672 enum kfd_preempt_type reset_type,
673 unsigned int utimeout, uint32_t pipe_id,
674 uint32_t queue_id)
675{
676 struct amdgpu_device *adev = get_amdgpu_device(kgd);
677 enum hqd_dequeue_request_type type;
678 unsigned long end_jiffies;
679 uint32_t temp;
680 struct v9_mqd *m = get_mqd(mqd);
681
682 acquire_queue(kgd, pipe_id, queue_id);
683
684 if (m->cp_hqd_vmid == 0)
685 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
686
687 switch (reset_type) {
688 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
689 type = DRAIN_PIPE;
690 break;
691 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
692 type = RESET_WAVES;
693 break;
694 default:
695 type = DRAIN_PIPE;
696 break;
697 }
698
699 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
700
701 end_jiffies = (utimeout * HZ / 1000) + jiffies;
702 while (true) {
703 temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
704 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
705 break;
706 if (time_after(jiffies, end_jiffies)) {
707 pr_err("cp queue preemption time out.\n");
708 release_queue(kgd);
709 return -ETIME;
710 }
711 usleep_range(500, 1000);
712 }
713
714 release_queue(kgd);
715 return 0;
716}
717
718static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
719 unsigned int utimeout)
720{
721 struct amdgpu_device *adev = get_amdgpu_device(kgd);
722 struct v9_sdma_mqd *m;
723 uint32_t sdma_base_addr;
724 uint32_t temp;
725 unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
726
727 m = get_sdma_mqd(mqd);
728 sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
729 m->sdma_queue_id);
730
731 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
732 temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
733 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
734
735 while (true) {
736 temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
737 if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
738 break;
739 if (time_after(jiffies, end_jiffies))
740 return -ETIME;
741 usleep_range(500, 1000);
742 }
743
744 WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
745 WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
746 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
747 SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
748
749 m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
750 m->sdmax_rlcx_rb_rptr_hi =
751 RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
752
753 return 0;
754}
755
756static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
757 uint8_t vmid)
758{
759 uint32_t reg;
760 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
761
762 reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
763 + vmid);
764 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
765}
766
767static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
768 uint8_t vmid)
769{
770 uint32_t reg;
771 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
772
773 reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
774 + vmid);
775 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
776}
777
778static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
779{
780 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
781 uint32_t req = (1 << vmid) |
782 (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
783 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
784 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
785 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
786 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
787 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
788
789 mutex_lock(&adev->srbm_mutex);
790
791 /* Use legacy mode tlb invalidation.
792 *
793 * Currently on Raven the code below is broken for anything but
794 * legacy mode due to a MMHUB power gating problem. A workaround
795 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
796 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
797 * bit.
798 *
799 * TODO 1: agree on the right set of invalidation registers for
800 * KFD use. Use the last one for now. Invalidate both GC and
801 * MMHUB.
802 *
803 * TODO 2: support range-based invalidation, requires kfg2kgd
804 * interface change
805 */
806 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
807 0xffffffff);
808 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
809 0x0000001f);
810
811 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
812 mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
813 0xffffffff);
814 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
815 mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
816 0x0000001f);
817
818 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
819
820 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
821 req);
822
823 while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
824 (1 << vmid)))
825 cpu_relax();
826
827 while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
828 mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
829 (1 << vmid)))
830 cpu_relax();
831
832 mutex_unlock(&adev->srbm_mutex);
833
834}
835
836static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
837{
838 signed long r;
839 uint32_t seq;
840 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
841
842 spin_lock(&adev->gfx.kiq.ring_lock);
843 amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
844 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
845 amdgpu_ring_write(ring,
846 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
847 PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
848 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
849 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
850 amdgpu_fence_emit_polling(ring, &seq);
851 amdgpu_ring_commit(ring);
852 spin_unlock(&adev->gfx.kiq.ring_lock);
853
854 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
855 if (r < 1) {
856 DRM_ERROR("wait for kiq fence error: %ld.\n", r);
857 return -ETIME;
858 }
859
860 return 0;
861}
862
863static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
864{
865 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
866 int vmid;
867 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
868
869 if (ring->ready)
870 return invalidate_tlbs_with_kiq(adev, pasid);
871
872 for (vmid = 0; vmid < 16; vmid++) {
873 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
874 continue;
875 if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
876 if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
877 == pasid) {
878 write_vmid_invalidate_request(kgd, vmid);
879 break;
880 }
881 }
882 }
883
884 return 0;
885}
886
887static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
888{
889 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
890
891 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
892 pr_err("non kfd vmid %d\n", vmid);
893 return 0;
894 }
895
896 write_vmid_invalidate_request(kgd, vmid);
897 return 0;
898}
899
900static int kgd_address_watch_disable(struct kgd_dev *kgd)
901{
902 return 0;
903}
904
905static int kgd_address_watch_execute(struct kgd_dev *kgd,
906 unsigned int watch_point_id,
907 uint32_t cntl_val,
908 uint32_t addr_hi,
909 uint32_t addr_lo)
910{
911 return 0;
912}
913
914static int kgd_wave_control_execute(struct kgd_dev *kgd,
915 uint32_t gfx_index_val,
916 uint32_t sq_cmd)
917{
918 struct amdgpu_device *adev = get_amdgpu_device(kgd);
919 uint32_t data = 0;
920
921 mutex_lock(&adev->grbm_idx_mutex);
922
923 WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
924 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
925
926 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
927 INSTANCE_BROADCAST_WRITES, 1);
928 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
929 SH_BROADCAST_WRITES, 1);
930 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
931 SE_BROADCAST_WRITES, 1);
932
933 WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
934 mutex_unlock(&adev->grbm_idx_mutex);
935
936 return 0;
937}
938
939static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
940 unsigned int watch_point_id,
941 unsigned int reg_offset)
942{
943 return 0;
944}
945
946static void set_scratch_backing_va(struct kgd_dev *kgd,
947 uint64_t va, uint32_t vmid)
948{
949 /* No longer needed on GFXv9. The scratch base address is
950 * passed to the shader by the CP. It's the user mode driver's
951 * responsibility.
952 */
953}
954
955/* FIXME: Does this need to be ASIC-specific code? */
956static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
957{
958 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
959 const union amdgpu_firmware_header *hdr;
960
961 switch (type) {
962 case KGD_ENGINE_PFP:
963 hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
964 break;
965
966 case KGD_ENGINE_ME:
967 hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
968 break;
969
970 case KGD_ENGINE_CE:
971 hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
972 break;
973
974 case KGD_ENGINE_MEC1:
975 hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
976 break;
977
978 case KGD_ENGINE_MEC2:
979 hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
980 break;
981
982 case KGD_ENGINE_RLC:
983 hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
984 break;
985
986 case KGD_ENGINE_SDMA1:
987 hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
988 break;
989
990 case KGD_ENGINE_SDMA2:
991 hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
992 break;
993
994 default:
995 return 0;
996 }
997
998 if (hdr == NULL)
999 return 0;
1000
1001 /* Only 12 bit in use*/
1002 return hdr->common.ucode_version;
1003}
1004
1005static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
1006 uint32_t page_table_base)
1007{
1008 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1009 uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
1010 AMDGPU_PTE_VALID;
1011
1012 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
1013 pr_err("trying to set page table base for wrong VMID %u\n",
1014 vmid);
1015 return;
1016 }
1017
1018 /* TODO: take advantage of per-process address space size. For
1019 * now, all processes share the same address space size, like
1020 * on GFX8 and older.
1021 */
1022 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
1023 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1024
1025 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1026 lower_32_bits(adev->vm_manager.max_pfn - 1));
1027 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1028 upper_32_bits(adev->vm_manager.max_pfn - 1));
1029
1030 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
1031 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1032
1033 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
1034 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1035
1036 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1037 lower_32_bits(adev->vm_manager.max_pfn - 1));
1038 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1039 upper_32_bits(adev->vm_manager.max_pfn - 1));
1040
1041 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
1042 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1043}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e1479da38..ff8fd75f7ca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,8 @@
23#define pr_fmt(fmt) "kfd2kgd: " fmt 23#define pr_fmt(fmt) "kfd2kgd: " fmt
24 24
25#include <linux/list.h> 25#include <linux/list.h>
26#include <linux/pagemap.h>
27#include <linux/sched/mm.h>
26#include <drm/drmP.h> 28#include <drm/drmP.h>
27#include "amdgpu_object.h" 29#include "amdgpu_object.h"
28#include "amdgpu_vm.h" 30#include "amdgpu_vm.h"
@@ -33,10 +35,20 @@
33 */ 35 */
34#define VI_BO_SIZE_ALIGN (0x8000) 36#define VI_BO_SIZE_ALIGN (0x8000)
35 37
38/* BO flag to indicate a KFD userptr BO */
39#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
40
41/* Userptr restore delay, just long enough to allow consecutive VM
42 * changes to accumulate
43 */
44#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
45
36/* Impose limit on how much memory KFD can use */ 46/* Impose limit on how much memory KFD can use */
37static struct { 47static struct {
38 uint64_t max_system_mem_limit; 48 uint64_t max_system_mem_limit;
49 uint64_t max_userptr_mem_limit;
39 int64_t system_mem_used; 50 int64_t system_mem_used;
51 int64_t userptr_mem_used;
40 spinlock_t mem_limit_lock; 52 spinlock_t mem_limit_lock;
41} kfd_mem_limit; 53} kfd_mem_limit;
42 54
@@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = {
57 69
58#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] 70#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
59 71
72static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
60 73
61 74
62static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) 75static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
78 91
79/* Set memory usage limits. Current, limits are 92/* Set memory usage limits. Current, limits are
80 * System (kernel) memory - 3/8th System RAM 93 * System (kernel) memory - 3/8th System RAM
94 * Userptr memory - 3/4th System RAM
81 */ 95 */
82void amdgpu_amdkfd_gpuvm_init_mem_limits(void) 96void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
83{ 97{
@@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
90 104
91 spin_lock_init(&kfd_mem_limit.mem_limit_lock); 105 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
92 kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); 106 kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
93 pr_debug("Kernel memory limit %lluM\n", 107 kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
94 (kfd_mem_limit.max_system_mem_limit >> 20)); 108 pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
109 (kfd_mem_limit.max_system_mem_limit >> 20),
110 (kfd_mem_limit.max_userptr_mem_limit >> 20));
95} 111}
96 112
97static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, 113static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
111 goto err_no_mem; 127 goto err_no_mem;
112 } 128 }
113 kfd_mem_limit.system_mem_used += (acc_size + size); 129 kfd_mem_limit.system_mem_used += (acc_size + size);
130 } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
131 if ((kfd_mem_limit.system_mem_used + acc_size >
132 kfd_mem_limit.max_system_mem_limit) ||
133 (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
134 kfd_mem_limit.max_userptr_mem_limit)) {
135 ret = -ENOMEM;
136 goto err_no_mem;
137 }
138 kfd_mem_limit.system_mem_used += acc_size;
139 kfd_mem_limit.userptr_mem_used += size;
114 } 140 }
115err_no_mem: 141err_no_mem:
116 spin_unlock(&kfd_mem_limit.mem_limit_lock); 142 spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
126 sizeof(struct amdgpu_bo)); 152 sizeof(struct amdgpu_bo));
127 153
128 spin_lock(&kfd_mem_limit.mem_limit_lock); 154 spin_lock(&kfd_mem_limit.mem_limit_lock);
129 if (domain == AMDGPU_GEM_DOMAIN_GTT) 155 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
130 kfd_mem_limit.system_mem_used -= (acc_size + size); 156 kfd_mem_limit.system_mem_used -= (acc_size + size);
157 } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
158 kfd_mem_limit.system_mem_used -= acc_size;
159 kfd_mem_limit.userptr_mem_used -= size;
160 }
131 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 161 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
132 "kfd system memory accounting unbalanced"); 162 "kfd system memory accounting unbalanced");
163 WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
164 "kfd userptr memory accounting unbalanced");
133 165
134 spin_unlock(&kfd_mem_limit.mem_limit_lock); 166 spin_unlock(&kfd_mem_limit.mem_limit_lock);
135} 167}
@@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
138{ 170{
139 spin_lock(&kfd_mem_limit.mem_limit_lock); 171 spin_lock(&kfd_mem_limit.mem_limit_lock);
140 172
141 if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { 173 if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
174 kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
175 kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
176 } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
142 kfd_mem_limit.system_mem_used -= 177 kfd_mem_limit.system_mem_used -=
143 (bo->tbo.acc_size + amdgpu_bo_size(bo)); 178 (bo->tbo.acc_size + amdgpu_bo_size(bo));
144 } 179 }
145 WARN_ONCE(kfd_mem_limit.system_mem_used < 0, 180 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
146 "kfd system memory accounting unbalanced"); 181 "kfd system memory accounting unbalanced");
182 WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
183 "kfd userptr memory accounting unbalanced");
147 184
148 spin_unlock(&kfd_mem_limit.mem_limit_lock); 185 spin_unlock(&kfd_mem_limit.mem_limit_lock);
149} 186}
@@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
506} 543}
507 544
508static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, 545static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
509 struct amdkfd_process_info *process_info) 546 struct amdkfd_process_info *process_info,
547 bool userptr)
510{ 548{
511 struct ttm_validate_buffer *entry = &mem->validate_list; 549 struct ttm_validate_buffer *entry = &mem->validate_list;
512 struct amdgpu_bo *bo = mem->bo; 550 struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
515 entry->shared = true; 553 entry->shared = true;
516 entry->bo = &bo->tbo; 554 entry->bo = &bo->tbo;
517 mutex_lock(&process_info->lock); 555 mutex_lock(&process_info->lock);
518 list_add_tail(&entry->head, &process_info->kfd_bo_list); 556 if (userptr)
557 list_add_tail(&entry->head, &process_info->userptr_valid_list);
558 else
559 list_add_tail(&entry->head, &process_info->kfd_bo_list);
519 mutex_unlock(&process_info->lock); 560 mutex_unlock(&process_info->lock);
520} 561}
521 562
563/* Initializes user pages. It registers the MMU notifier and validates
564 * the userptr BO in the GTT domain.
565 *
566 * The BO must already be on the userptr_valid_list. Otherwise an
567 * eviction and restore may happen that leaves the new BO unmapped
568 * with the user mode queues running.
569 *
570 * Takes the process_info->lock to protect against concurrent restore
571 * workers.
572 *
573 * Returns 0 for success, negative errno for errors.
574 */
575static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
576 uint64_t user_addr)
577{
578 struct amdkfd_process_info *process_info = mem->process_info;
579 struct amdgpu_bo *bo = mem->bo;
580 struct ttm_operation_ctx ctx = { true, false };
581 int ret = 0;
582
583 mutex_lock(&process_info->lock);
584
585 ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
586 if (ret) {
587 pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
588 goto out;
589 }
590
591 ret = amdgpu_mn_register(bo, user_addr);
592 if (ret) {
593 pr_err("%s: Failed to register MMU notifier: %d\n",
594 __func__, ret);
595 goto out;
596 }
597
598 /* If no restore worker is running concurrently, user_pages
599 * should not be allocated
600 */
601 WARN(mem->user_pages, "Leaking user_pages array");
602
603 mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
604 sizeof(struct page *),
605 GFP_KERNEL | __GFP_ZERO);
606 if (!mem->user_pages) {
607 pr_err("%s: Failed to allocate pages array\n", __func__);
608 ret = -ENOMEM;
609 goto unregister_out;
610 }
611
612 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
613 if (ret) {
614 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
615 goto free_out;
616 }
617
618 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
619
620 ret = amdgpu_bo_reserve(bo, true);
621 if (ret) {
622 pr_err("%s: Failed to reserve BO\n", __func__);
623 goto release_out;
624 }
625 amdgpu_ttm_placement_from_domain(bo, mem->domain);
626 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
627 if (ret)
628 pr_err("%s: failed to validate BO\n", __func__);
629 amdgpu_bo_unreserve(bo);
630
631release_out:
632 if (ret)
633 release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
634free_out:
635 kvfree(mem->user_pages);
636 mem->user_pages = NULL;
637unregister_out:
638 if (ret)
639 amdgpu_mn_unregister(bo);
640out:
641 mutex_unlock(&process_info->lock);
642 return ret;
643}
644
522/* Reserving a BO and its page table BOs must happen atomically to 645/* Reserving a BO and its page table BOs must happen atomically to
523 * avoid deadlocks. Some operations update multiple VMs at once. Track 646 * avoid deadlocks. Some operations update multiple VMs at once. Track
524 * all the reservation info in a context structure. Optionally a sync 647 * all the reservation info in a context structure. Optionally a sync
@@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
748} 871}
749 872
750static int map_bo_to_gpuvm(struct amdgpu_device *adev, 873static int map_bo_to_gpuvm(struct amdgpu_device *adev,
751 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) 874 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
875 bool no_update_pte)
752{ 876{
753 int ret; 877 int ret;
754 878
@@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
762 return ret; 886 return ret;
763 } 887 }
764 888
889 if (no_update_pte)
890 return 0;
891
765 ret = update_gpuvm_pte(adev, entry, sync); 892 ret = update_gpuvm_pte(adev, entry, sync);
766 if (ret) { 893 if (ret) {
767 pr_err("update_gpuvm_pte() failed\n"); 894 pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
820 mutex_init(&info->lock); 947 mutex_init(&info->lock);
821 INIT_LIST_HEAD(&info->vm_list_head); 948 INIT_LIST_HEAD(&info->vm_list_head);
822 INIT_LIST_HEAD(&info->kfd_bo_list); 949 INIT_LIST_HEAD(&info->kfd_bo_list);
950 INIT_LIST_HEAD(&info->userptr_valid_list);
951 INIT_LIST_HEAD(&info->userptr_inval_list);
823 952
824 info->eviction_fence = 953 info->eviction_fence =
825 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), 954 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
830 goto create_evict_fence_fail; 959 goto create_evict_fence_fail;
831 } 960 }
832 961
962 info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
963 atomic_set(&info->evicted_bos, 0);
964 INIT_DELAYED_WORK(&info->restore_userptr_work,
965 amdgpu_amdkfd_restore_userptr_worker);
966
833 *process_info = info; 967 *process_info = info;
834 *ef = dma_fence_get(&info->eviction_fence->base); 968 *ef = dma_fence_get(&info->eviction_fence->base);
835 } 969 }
@@ -872,6 +1006,7 @@ reserve_pd_fail:
872 dma_fence_put(*ef); 1006 dma_fence_put(*ef);
873 *ef = NULL; 1007 *ef = NULL;
874 *process_info = NULL; 1008 *process_info = NULL;
1009 put_pid(info->pid);
875create_evict_fence_fail: 1010create_evict_fence_fail:
876 mutex_destroy(&info->lock); 1011 mutex_destroy(&info->lock);
877 kfree(info); 1012 kfree(info);
@@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
967 /* Release per-process resources when last compute VM is destroyed */ 1102 /* Release per-process resources when last compute VM is destroyed */
968 if (!process_info->n_vms) { 1103 if (!process_info->n_vms) {
969 WARN_ON(!list_empty(&process_info->kfd_bo_list)); 1104 WARN_ON(!list_empty(&process_info->kfd_bo_list));
1105 WARN_ON(!list_empty(&process_info->userptr_valid_list));
1106 WARN_ON(!list_empty(&process_info->userptr_inval_list));
970 1107
971 dma_fence_put(&process_info->eviction_fence->base); 1108 dma_fence_put(&process_info->eviction_fence->base);
1109 cancel_delayed_work_sync(&process_info->restore_userptr_work);
1110 put_pid(process_info->pid);
972 mutex_destroy(&process_info->lock); 1111 mutex_destroy(&process_info->lock);
973 kfree(process_info); 1112 kfree(process_info);
974 } 1113 }
@@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1003{ 1142{
1004 struct amdgpu_device *adev = get_amdgpu_device(kgd); 1143 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1005 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; 1144 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1145 uint64_t user_addr = 0;
1006 struct amdgpu_bo *bo; 1146 struct amdgpu_bo *bo;
1147 struct amdgpu_bo_param bp;
1007 int byte_align; 1148 int byte_align;
1008 u32 alloc_domain; 1149 u32 domain, alloc_domain;
1009 u64 alloc_flags; 1150 u64 alloc_flags;
1010 uint32_t mapping_flags; 1151 uint32_t mapping_flags;
1011 int ret; 1152 int ret;
@@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1014 * Check on which domain to allocate BO 1155 * Check on which domain to allocate BO
1015 */ 1156 */
1016 if (flags & ALLOC_MEM_FLAGS_VRAM) { 1157 if (flags & ALLOC_MEM_FLAGS_VRAM) {
1017 alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; 1158 domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1018 alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; 1159 alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
1019 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? 1160 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
1020 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 1161 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
1021 AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 1162 AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1022 } else if (flags & ALLOC_MEM_FLAGS_GTT) { 1163 } else if (flags & ALLOC_MEM_FLAGS_GTT) {
1023 alloc_domain = AMDGPU_GEM_DOMAIN_GTT; 1164 domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1165 alloc_flags = 0;
1166 } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
1167 domain = AMDGPU_GEM_DOMAIN_GTT;
1168 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1024 alloc_flags = 0; 1169 alloc_flags = 0;
1170 if (!offset || !*offset)
1171 return -EINVAL;
1172 user_addr = *offset;
1025 } else { 1173 } else {
1026 return -EINVAL; 1174 return -EINVAL;
1027 } 1175 }
@@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1069 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", 1217 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
1070 va, size, domain_string(alloc_domain)); 1218 va, size, domain_string(alloc_domain));
1071 1219
1072 ret = amdgpu_bo_create(adev, size, byte_align, 1220 memset(&bp, 0, sizeof(bp));
1073 alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); 1221 bp.size = size;
1222 bp.byte_align = byte_align;
1223 bp.domain = alloc_domain;
1224 bp.flags = alloc_flags;
1225 bp.type = ttm_bo_type_device;
1226 bp.resv = NULL;
1227 ret = amdgpu_bo_create(adev, &bp, &bo);
1074 if (ret) { 1228 if (ret) {
1075 pr_debug("Failed to create BO on domain %s. ret %d\n", 1229 pr_debug("Failed to create BO on domain %s. ret %d\n",
1076 domain_string(alloc_domain), ret); 1230 domain_string(alloc_domain), ret);
@@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1078 } 1232 }
1079 bo->kfd_bo = *mem; 1233 bo->kfd_bo = *mem;
1080 (*mem)->bo = bo; 1234 (*mem)->bo = bo;
1235 if (user_addr)
1236 bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
1081 1237
1082 (*mem)->va = va; 1238 (*mem)->va = va;
1083 (*mem)->domain = alloc_domain; 1239 (*mem)->domain = domain;
1084 (*mem)->mapped_to_gpu_memory = 0; 1240 (*mem)->mapped_to_gpu_memory = 0;
1085 (*mem)->process_info = avm->process_info; 1241 (*mem)->process_info = avm->process_info;
1086 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); 1242 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
1243
1244 if (user_addr) {
1245 ret = init_user_pages(*mem, current->mm, user_addr);
1246 if (ret) {
1247 mutex_lock(&avm->process_info->lock);
1248 list_del(&(*mem)->validate_list.head);
1249 mutex_unlock(&avm->process_info->lock);
1250 goto allocate_init_user_pages_failed;
1251 }
1252 }
1087 1253
1088 if (offset) 1254 if (offset)
1089 *offset = amdgpu_bo_mmap_offset(bo); 1255 *offset = amdgpu_bo_mmap_offset(bo);
1090 1256
1091 return 0; 1257 return 0;
1092 1258
1259allocate_init_user_pages_failed:
1260 amdgpu_bo_unref(&bo);
1261 /* Don't unreserve system mem limit twice */
1262 goto err_reserve_system_mem;
1093err_bo_create: 1263err_bo_create:
1094 unreserve_system_mem_limit(adev, size, alloc_domain); 1264 unreserve_system_mem_limit(adev, size, alloc_domain);
1095err_reserve_system_mem: 1265err_reserve_system_mem:
@@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1122 * be freed anyway 1292 * be freed anyway
1123 */ 1293 */
1124 1294
1295 /* No more MMU notifiers */
1296 amdgpu_mn_unregister(mem->bo);
1297
1125 /* Make sure restore workers don't access the BO any more */ 1298 /* Make sure restore workers don't access the BO any more */
1126 bo_list_entry = &mem->validate_list; 1299 bo_list_entry = &mem->validate_list;
1127 mutex_lock(&process_info->lock); 1300 mutex_lock(&process_info->lock);
1128 list_del(&bo_list_entry->head); 1301 list_del(&bo_list_entry->head);
1129 mutex_unlock(&process_info->lock); 1302 mutex_unlock(&process_info->lock);
1130 1303
1304 /* Free user pages if necessary */
1305 if (mem->user_pages) {
1306 pr_debug("%s: Freeing user_pages array\n", __func__);
1307 if (mem->user_pages[0])
1308 release_pages(mem->user_pages,
1309 mem->bo->tbo.ttm->num_pages);
1310 kvfree(mem->user_pages);
1311 }
1312
1131 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1313 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1132 if (unlikely(ret)) 1314 if (unlikely(ret))
1133 return ret; 1315 return ret;
@@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1173 struct kfd_bo_va_list *bo_va_entry = NULL; 1355 struct kfd_bo_va_list *bo_va_entry = NULL;
1174 struct kfd_bo_va_list *bo_va_entry_aql = NULL; 1356 struct kfd_bo_va_list *bo_va_entry_aql = NULL;
1175 unsigned long bo_size; 1357 unsigned long bo_size;
1176 1358 bool is_invalid_userptr = false;
1177 /* Make sure restore is not running concurrently.
1178 */
1179 mutex_lock(&mem->process_info->lock);
1180
1181 mutex_lock(&mem->lock);
1182 1359
1183 bo = mem->bo; 1360 bo = mem->bo;
1184
1185 if (!bo) { 1361 if (!bo) {
1186 pr_err("Invalid BO when mapping memory to GPU\n"); 1362 pr_err("Invalid BO when mapping memory to GPU\n");
1187 ret = -EINVAL; 1363 return -EINVAL;
1188 goto out;
1189 } 1364 }
1190 1365
1366 /* Make sure restore is not running concurrently. Since we
1367 * don't map invalid userptr BOs, we rely on the next restore
1368 * worker to do the mapping
1369 */
1370 mutex_lock(&mem->process_info->lock);
1371
1372 /* Lock mmap-sem. If we find an invalid userptr BO, we can be
1373 * sure that the MMU notifier is no longer running
1374 * concurrently and the queues are actually stopped
1375 */
1376 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1377 down_write(&current->mm->mmap_sem);
1378 is_invalid_userptr = atomic_read(&mem->invalid);
1379 up_write(&current->mm->mmap_sem);
1380 }
1381
1382 mutex_lock(&mem->lock);
1383
1191 domain = mem->domain; 1384 domain = mem->domain;
1192 bo_size = bo->tbo.mem.size; 1385 bo_size = bo->tbo.mem.size;
1193 1386
@@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1200 if (unlikely(ret)) 1393 if (unlikely(ret))
1201 goto out; 1394 goto out;
1202 1395
1396 /* Userptr can be marked as "not invalid", but not actually be
1397 * validated yet (still in the system domain). In that case
1398 * the queues are still stopped and we can leave mapping for
1399 * the next restore worker
1400 */
1401 if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
1402 is_invalid_userptr = true;
1403
1203 if (check_if_add_bo_to_vm(avm, mem)) { 1404 if (check_if_add_bo_to_vm(avm, mem)) {
1204 ret = add_bo_to_vm(adev, mem, avm, false, 1405 ret = add_bo_to_vm(adev, mem, avm, false,
1205 &bo_va_entry); 1406 &bo_va_entry);
@@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1217 goto add_bo_to_vm_failed; 1418 goto add_bo_to_vm_failed;
1218 } 1419 }
1219 1420
1220 if (mem->mapped_to_gpu_memory == 0) { 1421 if (mem->mapped_to_gpu_memory == 0 &&
1422 !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1221 /* Validate BO only once. The eviction fence gets added to BO 1423 /* Validate BO only once. The eviction fence gets added to BO
1222 * the first time it is mapped. Validate will wait for all 1424 * the first time it is mapped. Validate will wait for all
1223 * background evictions to complete. 1425 * background evictions to complete.
@@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1235 entry->va, entry->va + bo_size, 1437 entry->va, entry->va + bo_size,
1236 entry); 1438 entry);
1237 1439
1238 ret = map_bo_to_gpuvm(adev, entry, ctx.sync); 1440 ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
1441 is_invalid_userptr);
1239 if (ret) { 1442 if (ret) {
1240 pr_err("Failed to map radeon bo to gpuvm\n"); 1443 pr_err("Failed to map radeon bo to gpuvm\n");
1241 goto map_bo_to_gpuvm_failed; 1444 goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1621,337 @@ bo_reserve_failed:
1418 return ret; 1621 return ret;
1419} 1622}
1420 1623
1624/* Evict a userptr BO by stopping the queues if necessary
1625 *
1626 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
1627 * cannot do any memory allocations, and cannot take any locks that
1628 * are held elsewhere while allocating memory. Therefore this is as
1629 * simple as possible, using atomic counters.
1630 *
1631 * It doesn't do anything to the BO itself. The real work happens in
1632 * restore, where we get updated page addresses. This function only
1633 * ensures that GPU access to the BO is stopped.
1634 */
1635int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
1636 struct mm_struct *mm)
1637{
1638 struct amdkfd_process_info *process_info = mem->process_info;
1639 int invalid, evicted_bos;
1640 int r = 0;
1641
1642 invalid = atomic_inc_return(&mem->invalid);
1643 evicted_bos = atomic_inc_return(&process_info->evicted_bos);
1644 if (evicted_bos == 1) {
1645 /* First eviction, stop the queues */
1646 r = kgd2kfd->quiesce_mm(mm);
1647 if (r)
1648 pr_err("Failed to quiesce KFD\n");
1649 schedule_delayed_work(&process_info->restore_userptr_work,
1650 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
1651 }
1652
1653 return r;
1654}
1655
1656/* Update invalid userptr BOs
1657 *
1658 * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
1659 * userptr_inval_list and updates user pages for all BOs that have
1660 * been invalidated since their last update.
1661 */
1662static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
1663 struct mm_struct *mm)
1664{
1665 struct kgd_mem *mem, *tmp_mem;
1666 struct amdgpu_bo *bo;
1667 struct ttm_operation_ctx ctx = { false, false };
1668 int invalid, ret;
1669
1670 /* Move all invalidated BOs to the userptr_inval_list and
1671 * release their user pages by migration to the CPU domain
1672 */
1673 list_for_each_entry_safe(mem, tmp_mem,
1674 &process_info->userptr_valid_list,
1675 validate_list.head) {
1676 if (!atomic_read(&mem->invalid))
1677 continue; /* BO is still valid */
1678
1679 bo = mem->bo;
1680
1681 if (amdgpu_bo_reserve(bo, true))
1682 return -EAGAIN;
1683 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
1684 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1685 amdgpu_bo_unreserve(bo);
1686 if (ret) {
1687 pr_err("%s: Failed to invalidate userptr BO\n",
1688 __func__);
1689 return -EAGAIN;
1690 }
1691
1692 list_move_tail(&mem->validate_list.head,
1693 &process_info->userptr_inval_list);
1694 }
1695
1696 if (list_empty(&process_info->userptr_inval_list))
1697 return 0; /* All evicted userptr BOs were freed */
1698
1699 /* Go through userptr_inval_list and update any invalid user_pages */
1700 list_for_each_entry(mem, &process_info->userptr_inval_list,
1701 validate_list.head) {
1702 invalid = atomic_read(&mem->invalid);
1703 if (!invalid)
1704 /* BO hasn't been invalidated since the last
1705 * revalidation attempt. Keep its BO list.
1706 */
1707 continue;
1708
1709 bo = mem->bo;
1710
1711 if (!mem->user_pages) {
1712 mem->user_pages =
1713 kvmalloc_array(bo->tbo.ttm->num_pages,
1714 sizeof(struct page *),
1715 GFP_KERNEL | __GFP_ZERO);
1716 if (!mem->user_pages) {
1717 pr_err("%s: Failed to allocate pages array\n",
1718 __func__);
1719 return -ENOMEM;
1720 }
1721 } else if (mem->user_pages[0]) {
1722 release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
1723 }
1724
1725 /* Get updated user pages */
1726 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
1727 mem->user_pages);
1728 if (ret) {
1729 mem->user_pages[0] = NULL;
1730 pr_info("%s: Failed to get user pages: %d\n",
1731 __func__, ret);
1732 /* Pretend it succeeded. It will fail later
1733 * with a VM fault if the GPU tries to access
1734 * it. Better than hanging indefinitely with
1735 * stalled user mode queues.
1736 */
1737 }
1738
1739 /* Mark the BO as valid unless it was invalidated
1740 * again concurrently
1741 */
1742 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
1743 return -EAGAIN;
1744 }
1745
1746 return 0;
1747}
1748
1749/* Validate invalid userptr BOs
1750 *
1751 * Validates BOs on the userptr_inval_list, and moves them back to the
1752 * userptr_valid_list. Also updates GPUVM page tables with new page
1753 * addresses and waits for the page table updates to complete.
1754 */
1755static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1756{
1757 struct amdgpu_bo_list_entry *pd_bo_list_entries;
1758 struct list_head resv_list, duplicates;
1759 struct ww_acquire_ctx ticket;
1760 struct amdgpu_sync sync;
1761
1762 struct amdgpu_vm *peer_vm;
1763 struct kgd_mem *mem, *tmp_mem;
1764 struct amdgpu_bo *bo;
1765 struct ttm_operation_ctx ctx = { false, false };
1766 int i, ret;
1767
1768 pd_bo_list_entries = kcalloc(process_info->n_vms,
1769 sizeof(struct amdgpu_bo_list_entry),
1770 GFP_KERNEL);
1771 if (!pd_bo_list_entries) {
1772 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1773 return -ENOMEM;
1774 }
1775
1776 INIT_LIST_HEAD(&resv_list);
1777 INIT_LIST_HEAD(&duplicates);
1778
1779 /* Get all the page directory BOs that need to be reserved */
1780 i = 0;
1781 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1782 vm_list_node)
1783 amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
1784 &pd_bo_list_entries[i++]);
1785 /* Add the userptr_inval_list entries to resv_list */
1786 list_for_each_entry(mem, &process_info->userptr_inval_list,
1787 validate_list.head) {
1788 list_add_tail(&mem->resv_list.head, &resv_list);
1789 mem->resv_list.bo = mem->validate_list.bo;
1790 mem->resv_list.shared = mem->validate_list.shared;
1791 }
1792
1793 /* Reserve all BOs and page tables for validation */
1794 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
1795 WARN(!list_empty(&duplicates), "Duplicates should be empty");
1796 if (ret)
1797 goto out;
1798
1799 amdgpu_sync_create(&sync);
1800
1801 /* Avoid triggering eviction fences when unmapping invalid
1802 * userptr BOs (waits for all fences, doesn't use
1803 * FENCE_OWNER_VM)
1804 */
1805 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1806 vm_list_node)
1807 amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
1808 process_info->eviction_fence,
1809 NULL, NULL);
1810
1811 ret = process_validate_vms(process_info);
1812 if (ret)
1813 goto unreserve_out;
1814
1815 /* Validate BOs and update GPUVM page tables */
1816 list_for_each_entry_safe(mem, tmp_mem,
1817 &process_info->userptr_inval_list,
1818 validate_list.head) {
1819 struct kfd_bo_va_list *bo_va_entry;
1820
1821 bo = mem->bo;
1822
1823 /* Copy pages array and validate the BO if we got user pages */
1824 if (mem->user_pages[0]) {
1825 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
1826 mem->user_pages);
1827 amdgpu_ttm_placement_from_domain(bo, mem->domain);
1828 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1829 if (ret) {
1830 pr_err("%s: failed to validate BO\n", __func__);
1831 goto unreserve_out;
1832 }
1833 }
1834
1835 /* Validate succeeded, now the BO owns the pages, free
1836 * our copy of the pointer array. Put this BO back on
1837 * the userptr_valid_list. If we need to revalidate
1838 * it, we need to start from scratch.
1839 */
1840 kvfree(mem->user_pages);
1841 mem->user_pages = NULL;
1842 list_move_tail(&mem->validate_list.head,
1843 &process_info->userptr_valid_list);
1844
1845 /* Update mapping. If the BO was not validated
1846 * (because we couldn't get user pages), this will
1847 * clear the page table entries, which will result in
1848 * VM faults if the GPU tries to access the invalid
1849 * memory.
1850 */
1851 list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
1852 if (!bo_va_entry->is_mapped)
1853 continue;
1854
1855 ret = update_gpuvm_pte((struct amdgpu_device *)
1856 bo_va_entry->kgd_dev,
1857 bo_va_entry, &sync);
1858 if (ret) {
1859 pr_err("%s: update PTE failed\n", __func__);
1860 /* make sure this gets validated again */
1861 atomic_inc(&mem->invalid);
1862 goto unreserve_out;
1863 }
1864 }
1865 }
1866
1867 /* Update page directories */
1868 ret = process_update_pds(process_info, &sync);
1869
1870unreserve_out:
1871 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1872 vm_list_node)
1873 amdgpu_bo_fence(peer_vm->root.base.bo,
1874 &process_info->eviction_fence->base, true);
1875 ttm_eu_backoff_reservation(&ticket, &resv_list);
1876 amdgpu_sync_wait(&sync, false);
1877 amdgpu_sync_free(&sync);
1878out:
1879 kfree(pd_bo_list_entries);
1880
1881 return ret;
1882}
1883
1884/* Worker callback to restore evicted userptr BOs
1885 *
1886 * Tries to update and validate all userptr BOs. If successful and no
1887 * concurrent evictions happened, the queues are restarted. Otherwise,
1888 * reschedule for another attempt later.
1889 */
1890static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
1891{
1892 struct delayed_work *dwork = to_delayed_work(work);
1893 struct amdkfd_process_info *process_info =
1894 container_of(dwork, struct amdkfd_process_info,
1895 restore_userptr_work);
1896 struct task_struct *usertask;
1897 struct mm_struct *mm;
1898 int evicted_bos;
1899
1900 evicted_bos = atomic_read(&process_info->evicted_bos);
1901 if (!evicted_bos)
1902 return;
1903
1904 /* Reference task and mm in case of concurrent process termination */
1905 usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
1906 if (!usertask)
1907 return;
1908 mm = get_task_mm(usertask);
1909 if (!mm) {
1910 put_task_struct(usertask);
1911 return;
1912 }
1913
1914 mutex_lock(&process_info->lock);
1915
1916 if (update_invalid_user_pages(process_info, mm))
1917 goto unlock_out;
1918 /* userptr_inval_list can be empty if all evicted userptr BOs
1919 * have been freed. In that case there is nothing to validate
1920 * and we can just restart the queues.
1921 */
1922 if (!list_empty(&process_info->userptr_inval_list)) {
1923 if (atomic_read(&process_info->evicted_bos) != evicted_bos)
1924 goto unlock_out; /* Concurrent eviction, try again */
1925
1926 if (validate_invalid_user_pages(process_info))
1927 goto unlock_out;
1928 }
1929 /* Final check for concurrent evicton and atomic update. If
1930 * another eviction happens after successful update, it will
1931 * be a first eviction that calls quiesce_mm. The eviction
1932 * reference counting inside KFD will handle this case.
1933 */
1934 if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
1935 evicted_bos)
1936 goto unlock_out;
1937 evicted_bos = 0;
1938 if (kgd2kfd->resume_mm(mm)) {
1939 pr_err("%s: Failed to resume KFD\n", __func__);
1940 /* No recovery from this failure. Probably the CP is
1941 * hanging. No point trying again.
1942 */
1943 }
1944unlock_out:
1945 mutex_unlock(&process_info->lock);
1946 mmput(mm);
1947 put_task_struct(usertask);
1948
1949 /* If validation failed, reschedule another attempt */
1950 if (evicted_bos)
1951 schedule_delayed_work(&process_info->restore_userptr_work,
1952 msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
1953}
1954
1421/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given 1955/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
1422 * KFD process identified by process_info 1956 * KFD process identified by process_info
1423 * 1957 *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index a0f48cb9b8f0..236915849cfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
322 322
323 return ret; 323 return ret;
324} 324}
325
326union gfx_info {
327 struct atom_gfx_info_v2_4 v24;
328};
329
330int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
331{
332 struct amdgpu_mode_info *mode_info = &adev->mode_info;
333 int index;
334 uint8_t frev, crev;
335 uint16_t data_offset;
336
337 index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
338 gfx_info);
339 if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
340 &frev, &crev, &data_offset)) {
341 union gfx_info *gfx_info = (union gfx_info *)
342 (mode_info->atom_context->bios + data_offset);
343 switch (crev) {
344 case 4:
345 adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
346 adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
347 adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
348 adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
349 adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
350 adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
351 adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
352 adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
353 adev->gfx.config.gs_prim_buffer_depth =
354 le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
355 adev->gfx.config.double_offchip_lds_buf =
356 gfx_info->v24.gc_double_offchip_lds_buffer;
357 adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
358 adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
359 adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
360 adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
361 return 0;
362 default:
363 return -EINVAL;
364 }
365
366 }
367 return -EINVAL;
368}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 7689c961c4ef..20f158fd3b76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
30int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); 30int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
31int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); 31int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
32int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); 32int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
33int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
33 34
34#endif 35#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1ae5ae8c45a4..1bcb2b247335 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void)
550 * look up whether we are the integrated or discrete GPU (all asics). 550 * look up whether we are the integrated or discrete GPU (all asics).
551 * Returns the client id. 551 * Returns the client id.
552 */ 552 */
553static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) 553static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
554{ 554{
555 if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) 555 if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
556 return VGA_SWITCHEROO_IGD; 556 return VGA_SWITCHEROO_IGD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02b849be083b..19cfff31f2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
75{ 75{
76 struct amdgpu_bo *dobj = NULL; 76 struct amdgpu_bo *dobj = NULL;
77 struct amdgpu_bo *sobj = NULL; 77 struct amdgpu_bo *sobj = NULL;
78 struct amdgpu_bo_param bp;
78 uint64_t saddr, daddr; 79 uint64_t saddr, daddr;
79 int r, n; 80 int r, n;
80 int time; 81 int time;
81 82
83 memset(&bp, 0, sizeof(bp));
84 bp.size = size;
85 bp.byte_align = PAGE_SIZE;
86 bp.domain = sdomain;
87 bp.flags = 0;
88 bp.type = ttm_bo_type_kernel;
89 bp.resv = NULL;
82 n = AMDGPU_BENCHMARK_ITERATIONS; 90 n = AMDGPU_BENCHMARK_ITERATIONS;
83 r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, 91 r = amdgpu_bo_create(adev, &bp, &sobj);
84 ttm_bo_type_kernel, NULL, &sobj);
85 if (r) { 92 if (r) {
86 goto out_cleanup; 93 goto out_cleanup;
87 } 94 }
@@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
93 if (r) { 100 if (r) {
94 goto out_cleanup; 101 goto out_cleanup;
95 } 102 }
96 r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, 103 bp.domain = ddomain;
97 ttm_bo_type_kernel, NULL, &dobj); 104 r = amdgpu_bo_create(adev, &bp, &dobj);
98 if (r) { 105 if (r) {
99 goto out_cleanup; 106 goto out_cleanup;
100 } 107 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 71a57b2f7f04..e950730f1933 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -23,7 +23,6 @@
23 */ 23 */
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/pci.h>
27#include <drm/drmP.h> 26#include <drm/drmP.h>
28#include <linux/firmware.h> 27#include <linux/firmware.h>
29#include <drm/amdgpu_drm.h> 28#include <drm/amdgpu_drm.h>
@@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
109 WARN(1, "Invalid indirect register space"); 108 WARN(1, "Invalid indirect register space");
110} 109}
111 110
112static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
113 enum cgs_resource_type resource_type,
114 uint64_t size,
115 uint64_t offset,
116 uint64_t *resource_base)
117{
118 CGS_FUNC_ADEV;
119
120 if (resource_base == NULL)
121 return -EINVAL;
122
123 switch (resource_type) {
124 case CGS_RESOURCE_TYPE_MMIO:
125 if (adev->rmmio_size == 0)
126 return -ENOENT;
127 if ((offset + size) > adev->rmmio_size)
128 return -EINVAL;
129 *resource_base = adev->rmmio_base;
130 return 0;
131 case CGS_RESOURCE_TYPE_DOORBELL:
132 if (adev->doorbell.size == 0)
133 return -ENOENT;
134 if ((offset + size) > adev->doorbell.size)
135 return -EINVAL;
136 *resource_base = adev->doorbell.base;
137 return 0;
138 case CGS_RESOURCE_TYPE_FB:
139 case CGS_RESOURCE_TYPE_IO:
140 case CGS_RESOURCE_TYPE_ROM:
141 default:
142 return -EINVAL;
143 }
144}
145
146static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
147 unsigned table, uint16_t *size,
148 uint8_t *frev, uint8_t *crev)
149{
150 CGS_FUNC_ADEV;
151 uint16_t data_start;
152
153 if (amdgpu_atom_parse_data_header(
154 adev->mode_info.atom_context, table, size,
155 frev, crev, &data_start))
156 return (uint8_t*)adev->mode_info.atom_context->bios +
157 data_start;
158
159 return NULL;
160}
161
162static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
163 uint8_t *frev, uint8_t *crev)
164{
165 CGS_FUNC_ADEV;
166
167 if (amdgpu_atom_parse_cmd_header(
168 adev->mode_info.atom_context, table,
169 frev, crev))
170 return 0;
171
172 return -EINVAL;
173}
174
175static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
176 void *args)
177{
178 CGS_FUNC_ADEV;
179
180 return amdgpu_atom_execute_table(
181 adev->mode_info.atom_context, table, args);
182}
183
184static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
185 enum amd_ip_block_type block_type,
186 enum amd_clockgating_state state)
187{
188 CGS_FUNC_ADEV;
189 int i, r = -1;
190
191 for (i = 0; i < adev->num_ip_blocks; i++) {
192 if (!adev->ip_blocks[i].status.valid)
193 continue;
194
195 if (adev->ip_blocks[i].version->type == block_type) {
196 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
197 (void *)adev,
198 state);
199 break;
200 }
201 }
202 return r;
203}
204
205static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
206 enum amd_ip_block_type block_type,
207 enum amd_powergating_state state)
208{
209 CGS_FUNC_ADEV;
210 int i, r = -1;
211
212 for (i = 0; i < adev->num_ip_blocks; i++) {
213 if (!adev->ip_blocks[i].status.valid)
214 continue;
215
216 if (adev->ip_blocks[i].version->type == block_type) {
217 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
218 (void *)adev,
219 state);
220 break;
221 }
222 }
223 return r;
224}
225
226
227static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) 111static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
228{ 112{
229 CGS_FUNC_ADEV; 113 CGS_FUNC_ADEV;
@@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
271 return result; 155 return result;
272} 156}
273 157
274static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
275{
276 CGS_FUNC_ADEV;
277 if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
278 release_firmware(adev->pm.fw);
279 adev->pm.fw = NULL;
280 return 0;
281 }
282 /* cannot release other firmware because they are not created by cgs */
283 return -EINVAL;
284}
285
286static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, 158static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
287 enum cgs_ucode_id type) 159 enum cgs_ucode_id type)
288{ 160{
@@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
326 return fw_version; 198 return fw_version;
327} 199}
328 200
329static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
330 bool en)
331{
332 CGS_FUNC_ADEV;
333
334 if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
335 adev->gfx.rlc.funcs->exit_safe_mode == NULL)
336 return 0;
337
338 if (en)
339 adev->gfx.rlc.funcs->enter_safe_mode(adev);
340 else
341 adev->gfx.rlc.funcs->exit_safe_mode(adev);
342
343 return 0;
344}
345
346static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
347 bool lock)
348{
349 CGS_FUNC_ADEV;
350
351 if (lock)
352 mutex_lock(&adev->grbm_idx_mutex);
353 else
354 mutex_unlock(&adev->grbm_idx_mutex);
355}
356
357static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, 201static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
358 enum cgs_ucode_id type, 202 enum cgs_ucode_id type,
359 struct cgs_firmware_info *info) 203 struct cgs_firmware_info *info)
@@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
541 case CHIP_POLARIS12: 385 case CHIP_POLARIS12:
542 strcpy(fw_name, "amdgpu/polaris12_smc.bin"); 386 strcpy(fw_name, "amdgpu/polaris12_smc.bin");
543 break; 387 break;
388 case CHIP_VEGAM:
389 strcpy(fw_name, "amdgpu/vegam_smc.bin");
390 break;
544 case CHIP_VEGA10: 391 case CHIP_VEGA10:
545 if ((adev->pdev->device == 0x687f) && 392 if ((adev->pdev->device == 0x687f) &&
546 ((adev->pdev->revision == 0xc0) || 393 ((adev->pdev->revision == 0xc0) ||
@@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
553 case CHIP_VEGA12: 400 case CHIP_VEGA12:
554 strcpy(fw_name, "amdgpu/vega12_smc.bin"); 401 strcpy(fw_name, "amdgpu/vega12_smc.bin");
555 break; 402 break;
403 case CHIP_VEGA20:
404 strcpy(fw_name, "amdgpu/vega20_smc.bin");
405 break;
556 default: 406 default:
557 DRM_ERROR("SMC firmware not supported\n"); 407 DRM_ERROR("SMC firmware not supported\n");
558 return -EINVAL; 408 return -EINVAL;
@@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
598 return 0; 448 return 0;
599} 449}
600 450
601static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
602{
603 CGS_FUNC_ADEV;
604 return amdgpu_sriov_vf(adev);
605}
606
607static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
608 struct cgs_display_info *info)
609{
610 CGS_FUNC_ADEV;
611 struct cgs_mode_info *mode_info;
612
613 if (info == NULL)
614 return -EINVAL;
615
616 mode_info = info->mode_info;
617 if (mode_info)
618 /* if the displays are off, vblank time is max */
619 mode_info->vblank_time_us = 0xffffffff;
620
621 if (!amdgpu_device_has_dc_support(adev)) {
622 struct amdgpu_crtc *amdgpu_crtc;
623 struct drm_device *ddev = adev->ddev;
624 struct drm_crtc *crtc;
625 uint32_t line_time_us, vblank_lines;
626
627 if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
628 list_for_each_entry(crtc,
629 &ddev->mode_config.crtc_list, head) {
630 amdgpu_crtc = to_amdgpu_crtc(crtc);
631 if (crtc->enabled) {
632 info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
633 info->display_count++;
634 }
635 if (mode_info != NULL &&
636 crtc->enabled && amdgpu_crtc->enabled &&
637 amdgpu_crtc->hw_mode.clock) {
638 line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
639 amdgpu_crtc->hw_mode.clock;
640 vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
641 amdgpu_crtc->hw_mode.crtc_vdisplay +
642 (amdgpu_crtc->v_border * 2);
643 mode_info->vblank_time_us = vblank_lines * line_time_us;
644 mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
645 /* we have issues with mclk switching with refresh rates
646 * over 120 hz on the non-DC code.
647 */
648 if (mode_info->refresh_rate > 120)
649 mode_info->vblank_time_us = 0;
650 mode_info = NULL;
651 }
652 }
653 }
654 } else {
655 info->display_count = adev->pm.pm_display_cfg.num_display;
656 if (mode_info != NULL) {
657 mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
658 mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
659 }
660 }
661 return 0;
662}
663
664
665static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
666{
667 CGS_FUNC_ADEV;
668
669 adev->pm.dpm_enabled = enabled;
670
671 return 0;
672}
673
674static const struct cgs_ops amdgpu_cgs_ops = { 451static const struct cgs_ops amdgpu_cgs_ops = {
675 .read_register = amdgpu_cgs_read_register, 452 .read_register = amdgpu_cgs_read_register,
676 .write_register = amdgpu_cgs_write_register, 453 .write_register = amdgpu_cgs_write_register,
677 .read_ind_register = amdgpu_cgs_read_ind_register, 454 .read_ind_register = amdgpu_cgs_read_ind_register,
678 .write_ind_register = amdgpu_cgs_write_ind_register, 455 .write_ind_register = amdgpu_cgs_write_ind_register,
679 .get_pci_resource = amdgpu_cgs_get_pci_resource,
680 .atom_get_data_table = amdgpu_cgs_atom_get_data_table,
681 .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
682 .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
683 .get_firmware_info = amdgpu_cgs_get_firmware_info, 456 .get_firmware_info = amdgpu_cgs_get_firmware_info,
684 .rel_firmware = amdgpu_cgs_rel_firmware,
685 .set_powergating_state = amdgpu_cgs_set_powergating_state,
686 .set_clockgating_state = amdgpu_cgs_set_clockgating_state,
687 .get_active_displays_info = amdgpu_cgs_get_active_displays_info,
688 .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
689 .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
690 .enter_safe_mode = amdgpu_cgs_enter_safe_mode,
691 .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
692}; 457};
693 458
694struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) 459struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 96501ff0e55b..8e66851eb427 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
691 return ret; 691 return ret;
692} 692}
693 693
694static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, 694static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
695 struct drm_display_mode *mode) 695 struct drm_display_mode *mode)
696{ 696{
697 struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); 697 struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
843 return ret; 843 return ret;
844} 844}
845 845
846static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector, 846static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
847 struct drm_display_mode *mode) 847 struct drm_display_mode *mode)
848{ 848{
849 struct drm_device *dev = connector->dev; 849 struct drm_device *dev = connector->dev;
@@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
1172 amdgpu_connector->use_digital = true; 1172 amdgpu_connector->use_digital = true;
1173} 1173}
1174 1174
1175static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, 1175static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
1176 struct drm_display_mode *mode) 1176 struct drm_display_mode *mode)
1177{ 1177{
1178 struct drm_device *dev = connector->dev; 1178 struct drm_device *dev = connector->dev;
@@ -1448,7 +1448,7 @@ out:
1448 return ret; 1448 return ret;
1449} 1449}
1450 1450
1451static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector, 1451static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
1452 struct drm_display_mode *mode) 1452 struct drm_display_mode *mode)
1453{ 1453{
1454 struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); 1454 struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50e6b29..9c1d491d742e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -382,8 +382,7 @@ retry:
382 382
383 p->bytes_moved += ctx.bytes_moved; 383 p->bytes_moved += ctx.bytes_moved;
384 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && 384 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
385 bo->tbo.mem.mem_type == TTM_PL_VRAM && 385 amdgpu_bo_in_cpu_visible_vram(bo))
386 bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
387 p->bytes_moved_vis += ctx.bytes_moved; 386 p->bytes_moved_vis += ctx.bytes_moved;
388 387
389 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 388 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
411 struct amdgpu_bo_list_entry *candidate = p->evictable; 410 struct amdgpu_bo_list_entry *candidate = p->evictable;
412 struct amdgpu_bo *bo = candidate->robj; 411 struct amdgpu_bo *bo = candidate->robj;
413 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 412 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
414 u64 initial_bytes_moved, bytes_moved;
415 bool update_bytes_moved_vis; 413 bool update_bytes_moved_vis;
416 uint32_t other; 414 uint32_t other;
417 415
@@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
435 continue; 433 continue;
436 434
437 /* Good we can try to move this BO somewhere else */ 435 /* Good we can try to move this BO somewhere else */
438 amdgpu_ttm_placement_from_domain(bo, other);
439 update_bytes_moved_vis = 436 update_bytes_moved_vis =
440 adev->gmc.visible_vram_size < adev->gmc.real_vram_size && 437 adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
441 bo->tbo.mem.mem_type == TTM_PL_VRAM && 438 amdgpu_bo_in_cpu_visible_vram(bo);
442 bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; 439 amdgpu_ttm_placement_from_domain(bo, other);
443 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
444 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 440 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
445 bytes_moved = atomic64_read(&adev->num_bytes_moved) - 441 p->bytes_moved += ctx.bytes_moved;
446 initial_bytes_moved;
447 p->bytes_moved += bytes_moved;
448 if (update_bytes_moved_vis) 442 if (update_bytes_moved_vis)
449 p->bytes_moved_vis += bytes_moved; 443 p->bytes_moved_vis += ctx.bytes_moved;
450 444
451 if (unlikely(r)) 445 if (unlikely(r))
452 break; 446 break;
@@ -536,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
536 if (p->bo_list) { 530 if (p->bo_list) {
537 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 531 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
538 if (p->bo_list->first_userptr != p->bo_list->num_entries) 532 if (p->bo_list->first_userptr != p->bo_list->num_entries)
539 p->mn = amdgpu_mn_get(p->adev); 533 p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
540 } 534 }
541 535
542 INIT_LIST_HEAD(&duplicates); 536 INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3fabf9f97022..c5bb36275e93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
91 continue; 91 continue;
92 92
93 r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, 93 r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
94 rq, amdgpu_sched_jobs, &ctx->guilty); 94 rq, &ctx->guilty);
95 if (r) 95 if (r)
96 goto failed; 96 goto failed;
97 } 97 }
@@ -111,8 +111,9 @@ failed:
111 return r; 111 return r;
112} 112}
113 113
114static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) 114static void amdgpu_ctx_fini(struct kref *ref)
115{ 115{
116 struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
116 struct amdgpu_device *adev = ctx->adev; 117 struct amdgpu_device *adev = ctx->adev;
117 unsigned i, j; 118 unsigned i, j;
118 119
@@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
125 kfree(ctx->fences); 126 kfree(ctx->fences);
126 ctx->fences = NULL; 127 ctx->fences = NULL;
127 128
128 for (i = 0; i < adev->num_rings; i++)
129 drm_sched_entity_fini(&adev->rings[i]->sched,
130 &ctx->rings[i].entity);
131
132 amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); 129 amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
133 130
134 mutex_destroy(&ctx->lock); 131 mutex_destroy(&ctx->lock);
132
133 kfree(ctx);
135} 134}
136 135
137static int amdgpu_ctx_alloc(struct amdgpu_device *adev, 136static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
170static void amdgpu_ctx_do_release(struct kref *ref) 169static void amdgpu_ctx_do_release(struct kref *ref)
171{ 170{
172 struct amdgpu_ctx *ctx; 171 struct amdgpu_ctx *ctx;
172 u32 i;
173 173
174 ctx = container_of(ref, struct amdgpu_ctx, refcount); 174 ctx = container_of(ref, struct amdgpu_ctx, refcount);
175 175
176 amdgpu_ctx_fini(ctx); 176 for (i = 0; i < ctx->adev->num_rings; i++) {
177 177
178 kfree(ctx); 178 if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
179 continue;
180
181 drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
182 &ctx->rings[i].entity);
183 }
184
185 amdgpu_ctx_fini(ref);
179} 186}
180 187
181static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) 188static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@@ -437,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
437 idr_init(&mgr->ctx_handles); 444 idr_init(&mgr->ctx_handles);
438} 445}
439 446
447void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
448{
449 struct amdgpu_ctx *ctx;
450 struct idr *idp;
451 uint32_t id, i;
452
453 idp = &mgr->ctx_handles;
454
455 idr_for_each_entry(idp, ctx, id) {
456
457 if (!ctx->adev)
458 return;
459
460 for (i = 0; i < ctx->adev->num_rings; i++) {
461
462 if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
463 continue;
464
465 if (kref_read(&ctx->refcount) == 1)
466 drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
467 &ctx->rings[i].entity);
468 else
469 DRM_ERROR("ctx %p is still alive\n", ctx);
470 }
471 }
472}
473
474void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
475{
476 struct amdgpu_ctx *ctx;
477 struct idr *idp;
478 uint32_t id, i;
479
480 idp = &mgr->ctx_handles;
481
482 idr_for_each_entry(idp, ctx, id) {
483
484 if (!ctx->adev)
485 return;
486
487 for (i = 0; i < ctx->adev->num_rings; i++) {
488
489 if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
490 continue;
491
492 if (kref_read(&ctx->refcount) == 1)
493 drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
494 &ctx->rings[i].entity);
495 else
496 DRM_ERROR("ctx %p is still alive\n", ctx);
497 }
498 }
499}
500
440void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) 501void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
441{ 502{
442 struct amdgpu_ctx *ctx; 503 struct amdgpu_ctx *ctx;
443 struct idr *idp; 504 struct idr *idp;
444 uint32_t id; 505 uint32_t id;
445 506
507 amdgpu_ctx_mgr_entity_cleanup(mgr);
508
446 idp = &mgr->ctx_handles; 509 idp = &mgr->ctx_handles;
447 510
448 idr_for_each_entry(idp, ctx, id) { 511 idr_for_each_entry(idp, ctx, id) {
449 if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) 512 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
450 DRM_ERROR("ctx %p is still alive\n", ctx); 513 DRM_ERROR("ctx %p is still alive\n", ctx);
451 } 514 }
452 515
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 448d69fe3756..f5fb93795a69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -28,8 +28,13 @@
28#include <linux/debugfs.h> 28#include <linux/debugfs.h>
29#include "amdgpu.h" 29#include "amdgpu.h"
30 30
31/* 31/**
32 * Debugfs 32 * amdgpu_debugfs_add_files - Add simple debugfs entries
33 *
34 * @adev: Device to attach debugfs entries to
35 * @files: Array of function callbacks that respond to reads
36 * @nfiles: Number of callbacks to register
37 *
33 */ 38 */
34int amdgpu_debugfs_add_files(struct amdgpu_device *adev, 39int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
35 const struct drm_info_list *files, 40 const struct drm_info_list *files,
@@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
64 69
65#if defined(CONFIG_DEBUG_FS) 70#if defined(CONFIG_DEBUG_FS)
66 71
67 72/**
73 * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
74 *
75 * @read: True if reading
76 * @f: open file handle
77 * @buf: User buffer to write/read to
78 * @size: Number of bytes to write/read
79 * @pos: Offset to seek to
80 *
81 * This debugfs entry has special meaning on the offset being sought.
82 * Various bits have different meanings:
83 *
84 * Bit 62: Indicates a GRBM bank switch is needed
85 * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is
86 * zero)
87 * Bits 24..33: The SE or ME selector if needed
88 * Bits 34..43: The SH (or SA) or PIPE selector if needed
89 * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
90 *
91 * Bit 23: Indicates that the PM power gating lock should be held
92 * This is necessary to read registers that might be
93 * unreliable during a power gating transistion.
94 *
95 * The lower bits are the BYTE offset of the register to read. This
96 * allows reading multiple registers in a single call and having
97 * the returned size reflect that.
98 */
68static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, 99static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
69 char __user *buf, size_t size, loff_t *pos) 100 char __user *buf, size_t size, loff_t *pos)
70{ 101{
@@ -164,19 +195,37 @@ end:
164 return result; 195 return result;
165} 196}
166 197
167 198/**
199 * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
200 */
168static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, 201static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
169 size_t size, loff_t *pos) 202 size_t size, loff_t *pos)
170{ 203{
171 return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); 204 return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
172} 205}
173 206
207/**
208 * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
209 */
174static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, 210static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
175 size_t size, loff_t *pos) 211 size_t size, loff_t *pos)
176{ 212{
177 return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); 213 return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
178} 214}
179 215
216
217/**
218 * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
219 *
220 * @f: open file handle
221 * @buf: User buffer to store read data in
222 * @size: Number of bytes to read
223 * @pos: Offset to seek to
224 *
225 * The lower bits are the BYTE offset of the register to read. This
226 * allows reading multiple registers in a single call and having
227 * the returned size reflect that.
228 */
180static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, 229static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
181 size_t size, loff_t *pos) 230 size_t size, loff_t *pos)
182{ 231{
@@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
204 return result; 253 return result;
205} 254}
206 255
256/**
257 * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
258 *
259 * @f: open file handle
260 * @buf: User buffer to write data from
261 * @size: Number of bytes to write
262 * @pos: Offset to seek to
263 *
264 * The lower bits are the BYTE offset of the register to write. This
265 * allows writing multiple registers in a single call and having
266 * the returned size reflect that.
267 */
207static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, 268static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
208 size_t size, loff_t *pos) 269 size_t size, loff_t *pos)
209{ 270{
@@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
232 return result; 293 return result;
233} 294}
234 295
296/**
297 * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
298 *
299 * @f: open file handle
300 * @buf: User buffer to store read data in
301 * @size: Number of bytes to read
302 * @pos: Offset to seek to
303 *
304 * The lower bits are the BYTE offset of the register to read. This
305 * allows reading multiple registers in a single call and having
306 * the returned size reflect that.
307 */
235static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, 308static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
236 size_t size, loff_t *pos) 309 size_t size, loff_t *pos)
237{ 310{
@@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
259 return result; 332 return result;
260} 333}
261 334
335/**
336 * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
337 *
338 * @f: open file handle
339 * @buf: User buffer to write data from
340 * @size: Number of bytes to write
341 * @pos: Offset to seek to
342 *
343 * The lower bits are the BYTE offset of the register to write. This
344 * allows writing multiple registers in a single call and having
345 * the returned size reflect that.
346 */
262static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, 347static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
263 size_t size, loff_t *pos) 348 size_t size, loff_t *pos)
264{ 349{
@@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
287 return result; 372 return result;
288} 373}
289 374
375/**
376 * amdgpu_debugfs_regs_smc_read - Read from a SMC register
377 *
378 * @f: open file handle
379 * @buf: User buffer to store read data in
380 * @size: Number of bytes to read
381 * @pos: Offset to seek to
382 *
383 * The lower bits are the BYTE offset of the register to read. This
384 * allows reading multiple registers in a single call and having
385 * the returned size reflect that.
386 */
290static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, 387static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
291 size_t size, loff_t *pos) 388 size_t size, loff_t *pos)
292{ 389{
@@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
314 return result; 411 return result;
315} 412}
316 413
414/**
415 * amdgpu_debugfs_regs_smc_write - Write to a SMC register
416 *
417 * @f: open file handle
418 * @buf: User buffer to write data from
419 * @size: Number of bytes to write
420 * @pos: Offset to seek to
421 *
422 * The lower bits are the BYTE offset of the register to write. This
423 * allows writing multiple registers in a single call and having
424 * the returned size reflect that.
425 */
317static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, 426static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
318 size_t size, loff_t *pos) 427 size_t size, loff_t *pos)
319{ 428{
@@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
342 return result; 451 return result;
343} 452}
344 453
454/**
455 * amdgpu_debugfs_gca_config_read - Read from gfx config data
456 *
457 * @f: open file handle
458 * @buf: User buffer to store read data in
459 * @size: Number of bytes to read
460 * @pos: Offset to seek to
461 *
462 * This file is used to access configuration data in a somewhat
463 * stable fashion. The format is a series of DWORDs with the first
464 * indicating which revision it is. New content is appended to the
465 * end so that older software can still read the data.
466 */
467
345static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, 468static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
346 size_t size, loff_t *pos) 469 size_t size, loff_t *pos)
347{ 470{
@@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
418 return result; 541 return result;
419} 542}
420 543
544/**
545 * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
546 *
547 * @f: open file handle
548 * @buf: User buffer to store read data in
549 * @size: Number of bytes to read
550 * @pos: Offset to seek to
551 *
552 * The offset is treated as the BYTE address of one of the sensors
553 * enumerated in amd/include/kgd_pp_interface.h under the
554 * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK
555 * you would use the offset 3 * 4 = 12.
556 */
421static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, 557static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
422 size_t size, loff_t *pos) 558 size_t size, loff_t *pos)
423{ 559{
@@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
428 if (size & 3 || *pos & 0x3) 564 if (size & 3 || *pos & 0x3)
429 return -EINVAL; 565 return -EINVAL;
430 566
431 if (amdgpu_dpm == 0) 567 if (!adev->pm.dpm_enabled)
432 return -EINVAL; 568 return -EINVAL;
433 569
434 /* convert offset to sensor number */ 570 /* convert offset to sensor number */
@@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
457 return !r ? outsize : r; 593 return !r ? outsize : r;
458} 594}
459 595
596/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
597 *
598 * @f: open file handle
599 * @buf: User buffer to store read data in
600 * @size: Number of bytes to read
601 * @pos: Offset to seek to
602 *
603 * The offset being sought changes which wave that the status data
604 * will be returned for. The bits are used as follows:
605 *
606 * Bits 0..6: Byte offset into data
607 * Bits 7..14: SE selector
608 * Bits 15..22: SH/SA selector
609 * Bits 23..30: CU/{WGP+SIMD} selector
610 * Bits 31..36: WAVE ID selector
611 * Bits 37..44: SIMD ID selector
612 *
613 * The returned data begins with one DWORD of version information
614 * Followed by WAVE STATUS registers relevant to the GFX IP version
615 * being used. See gfx_v8_0_read_wave_data() for an example output.
616 */
460static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, 617static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
461 size_t size, loff_t *pos) 618 size_t size, loff_t *pos)
462{ 619{
@@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
507 return result; 664 return result;
508} 665}
509 666
667/** amdgpu_debugfs_gpr_read - Read wave gprs
668 *
669 * @f: open file handle
670 * @buf: User buffer to store read data in
671 * @size: Number of bytes to read
672 * @pos: Offset to seek to
673 *
674 * The offset being sought changes which wave that the status data
675 * will be returned for. The bits are used as follows:
676 *
677 * Bits 0..11: Byte offset into data
678 * Bits 12..19: SE selector
679 * Bits 20..27: SH/SA selector
680 * Bits 28..35: CU/{WGP+SIMD} selector
681 * Bits 36..43: WAVE ID selector
682 * Bits 37..44: SIMD ID selector
683 * Bits 52..59: Thread selector
684 * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
685 *
686 * The return data comes from the SGPR or VGPR register bank for
687 * the selected operational unit.
688 */
510static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, 689static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
511 size_t size, loff_t *pos) 690 size_t size, loff_t *pos)
512{ 691{
@@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = {
637 "amdgpu_gpr", 816 "amdgpu_gpr",
638}; 817};
639 818
819/**
820 * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide
821 * register access.
822 *
823 * @adev: The device to attach the debugfs entries to
824 */
640int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) 825int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
641{ 826{
642 struct drm_minor *minor = adev->ddev->primary; 827 struct drm_minor *minor = adev->ddev->primary;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 34af664b9f93..290e279abf0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = {
83 "POLARIS10", 83 "POLARIS10",
84 "POLARIS11", 84 "POLARIS11",
85 "POLARIS12", 85 "POLARIS12",
86 "VEGAM",
86 "VEGA10", 87 "VEGA10",
87 "VEGA12", 88 "VEGA12",
89 "VEGA20",
88 "RAVEN", 90 "RAVEN",
89 "LAST", 91 "LAST",
90}; 92};
@@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
690{ 692{
691 u64 size_af, size_bf; 693 u64 size_af, size_bf;
692 694
695 mc->gart_size += adev->pm.smu_prv_buffer_size;
696
693 size_af = adev->gmc.mc_mask - mc->vram_end; 697 size_af = adev->gmc.mc_mask - mc->vram_end;
694 size_bf = mc->vram_start; 698 size_bf = mc->vram_start;
695 if (size_bf > size_af) { 699 if (size_bf > size_af) {
@@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
907 } 911 }
908} 912}
909 913
914static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
915{
916 struct sysinfo si;
917 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
918 uint64_t total_memory;
919 uint64_t dram_size_seven_GB = 0x1B8000000;
920 uint64_t dram_size_three_GB = 0xB8000000;
921
922 if (amdgpu_smu_memory_pool_size == 0)
923 return;
924
925 if (!is_os_64) {
926 DRM_WARN("Not 64-bit OS, feature not supported\n");
927 goto def_value;
928 }
929 si_meminfo(&si);
930 total_memory = (uint64_t)si.totalram * si.mem_unit;
931
932 if ((amdgpu_smu_memory_pool_size == 1) ||
933 (amdgpu_smu_memory_pool_size == 2)) {
934 if (total_memory < dram_size_three_GB)
935 goto def_value1;
936 } else if ((amdgpu_smu_memory_pool_size == 4) ||
937 (amdgpu_smu_memory_pool_size == 8)) {
938 if (total_memory < dram_size_seven_GB)
939 goto def_value1;
940 } else {
941 DRM_WARN("Smu memory pool size not supported\n");
942 goto def_value;
943 }
944 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
945
946 return;
947
948def_value1:
949 DRM_WARN("No enough system memory\n");
950def_value:
951 adev->pm.smu_prv_buffer_size = 0;
952}
953
910/** 954/**
911 * amdgpu_device_check_arguments - validate module params 955 * amdgpu_device_check_arguments - validate module params
912 * 956 *
@@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
948 amdgpu_vm_fragment_size = -1; 992 amdgpu_vm_fragment_size = -1;
949 } 993 }
950 994
995 amdgpu_device_check_smu_prv_buffer_size(adev);
996
951 amdgpu_device_check_vm_size(adev); 997 amdgpu_device_check_vm_size(adev);
952 998
953 amdgpu_device_check_block_size(adev); 999 amdgpu_device_check_block_size(adev);
@@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1039 * the hardware IP specified. 1085 * the hardware IP specified.
1040 * Returns the error code from the last instance. 1086 * Returns the error code from the last instance.
1041 */ 1087 */
1042int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, 1088int amdgpu_device_ip_set_clockgating_state(void *dev,
1043 enum amd_ip_block_type block_type, 1089 enum amd_ip_block_type block_type,
1044 enum amd_clockgating_state state) 1090 enum amd_clockgating_state state)
1045{ 1091{
1092 struct amdgpu_device *adev = dev;
1046 int i, r = 0; 1093 int i, r = 0;
1047 1094
1048 for (i = 0; i < adev->num_ip_blocks; i++) { 1095 for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
1072 * the hardware IP specified. 1119 * the hardware IP specified.
1073 * Returns the error code from the last instance. 1120 * Returns the error code from the last instance.
1074 */ 1121 */
1075int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, 1122int amdgpu_device_ip_set_powergating_state(void *dev,
1076 enum amd_ip_block_type block_type, 1123 enum amd_ip_block_type block_type,
1077 enum amd_powergating_state state) 1124 enum amd_powergating_state state)
1078{ 1125{
1126 struct amdgpu_device *adev = dev;
1079 int i, r = 0; 1127 int i, r = 0;
1080 1128
1081 for (i = 0; i < adev->num_ip_blocks; i++) { 1129 for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1320 case CHIP_TOPAZ: 1368 case CHIP_TOPAZ:
1321 case CHIP_TONGA: 1369 case CHIP_TONGA:
1322 case CHIP_FIJI: 1370 case CHIP_FIJI:
1323 case CHIP_POLARIS11:
1324 case CHIP_POLARIS10: 1371 case CHIP_POLARIS10:
1372 case CHIP_POLARIS11:
1325 case CHIP_POLARIS12: 1373 case CHIP_POLARIS12:
1374 case CHIP_VEGAM:
1326 case CHIP_CARRIZO: 1375 case CHIP_CARRIZO:
1327 case CHIP_STONEY: 1376 case CHIP_STONEY:
1328#ifdef CONFIG_DRM_AMDGPU_SI 1377#ifdef CONFIG_DRM_AMDGPU_SI
@@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1339 case CHIP_KABINI: 1388 case CHIP_KABINI:
1340 case CHIP_MULLINS: 1389 case CHIP_MULLINS:
1341#endif 1390#endif
1391 case CHIP_VEGA20:
1342 default: 1392 default:
1343 return 0; 1393 return 0;
1344 case CHIP_VEGA10: 1394 case CHIP_VEGA10:
@@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1428 case CHIP_TOPAZ: 1478 case CHIP_TOPAZ:
1429 case CHIP_TONGA: 1479 case CHIP_TONGA:
1430 case CHIP_FIJI: 1480 case CHIP_FIJI:
1431 case CHIP_POLARIS11:
1432 case CHIP_POLARIS10: 1481 case CHIP_POLARIS10:
1482 case CHIP_POLARIS11:
1433 case CHIP_POLARIS12: 1483 case CHIP_POLARIS12:
1484 case CHIP_VEGAM:
1434 case CHIP_CARRIZO: 1485 case CHIP_CARRIZO:
1435 case CHIP_STONEY: 1486 case CHIP_STONEY:
1436 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) 1487 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1472#endif 1523#endif
1473 case CHIP_VEGA10: 1524 case CHIP_VEGA10:
1474 case CHIP_VEGA12: 1525 case CHIP_VEGA12:
1526 case CHIP_VEGA20:
1475 case CHIP_RAVEN: 1527 case CHIP_RAVEN:
1476 if (adev->asic_type == CHIP_RAVEN) 1528 if (adev->asic_type == CHIP_RAVEN)
1477 adev->family = AMDGPU_FAMILY_RV; 1529 adev->family = AMDGPU_FAMILY_RV;
@@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1499 return -EAGAIN; 1551 return -EAGAIN;
1500 } 1552 }
1501 1553
1554 adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
1555
1502 for (i = 0; i < adev->num_ip_blocks; i++) { 1556 for (i = 0; i < adev->num_ip_blocks; i++) {
1503 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1557 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1504 DRM_ERROR("disabled ip block: %d <%s>\n", 1558 DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
1654 if (amdgpu_emu_mode == 1) 1708 if (amdgpu_emu_mode == 1)
1655 return 0; 1709 return 0;
1656 1710
1711 r = amdgpu_ib_ring_tests(adev);
1712 if (r)
1713 DRM_ERROR("ib ring test failed (%d).\n", r);
1714
1657 for (i = 0; i < adev->num_ip_blocks; i++) { 1715 for (i = 0; i < adev->num_ip_blocks; i++) {
1658 if (!adev->ip_blocks[i].status.valid) 1716 if (!adev->ip_blocks[i].status.valid)
1659 continue; 1717 continue;
1660 /* skip CG for VCE/UVD, it's handled specially */ 1718 /* skip CG for VCE/UVD, it's handled specially */
1661 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1719 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1662 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1720 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1721 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1663 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1722 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1664 /* enable clockgating to save power */ 1723 /* enable clockgating to save power */
1665 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1724 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1704,8 +1763,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
1704 } 1763 }
1705 } 1764 }
1706 1765
1707 mod_delayed_work(system_wq, &adev->late_init_work, 1766 queue_delayed_work(system_wq, &adev->late_init_work,
1708 msecs_to_jiffies(AMDGPU_RESUME_MS)); 1767 msecs_to_jiffies(AMDGPU_RESUME_MS));
1709 1768
1710 amdgpu_device_fill_reset_magic(adev); 1769 amdgpu_device_fill_reset_magic(adev);
1711 1770
@@ -1759,6 +1818,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1759 1818
1760 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1819 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 1820 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1821 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1762 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 1822 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1763 /* ungate blocks before hw fini so that we can shutdown the blocks safely */ 1823 /* ungate blocks before hw fini so that we can shutdown the blocks safely */
1764 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1824 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1850,6 +1910,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
1850 if (amdgpu_sriov_vf(adev)) 1910 if (amdgpu_sriov_vf(adev))
1851 amdgpu_virt_request_full_gpu(adev, false); 1911 amdgpu_virt_request_full_gpu(adev, false);
1852 1912
1913 /* ungate SMC block powergating */
1914 if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
1915 amdgpu_device_ip_set_powergating_state(adev,
1916 AMD_IP_BLOCK_TYPE_SMC,
1917 AMD_CG_STATE_UNGATE);
1918
1853 /* ungate SMC block first */ 1919 /* ungate SMC block first */
1854 r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, 1920 r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
1855 AMD_CG_STATE_UNGATE); 1921 AMD_CG_STATE_UNGATE);
@@ -2086,16 +2152,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2086 case CHIP_MULLINS: 2152 case CHIP_MULLINS:
2087 case CHIP_CARRIZO: 2153 case CHIP_CARRIZO:
2088 case CHIP_STONEY: 2154 case CHIP_STONEY:
2089 case CHIP_POLARIS11:
2090 case CHIP_POLARIS10: 2155 case CHIP_POLARIS10:
2156 case CHIP_POLARIS11:
2091 case CHIP_POLARIS12: 2157 case CHIP_POLARIS12:
2158 case CHIP_VEGAM:
2092 case CHIP_TONGA: 2159 case CHIP_TONGA:
2093 case CHIP_FIJI: 2160 case CHIP_FIJI:
2094#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
2095 return amdgpu_dc != 0;
2096#endif
2097 case CHIP_VEGA10: 2161 case CHIP_VEGA10:
2098 case CHIP_VEGA12: 2162 case CHIP_VEGA12:
2163 case CHIP_VEGA20:
2099#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2164#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
2100 case CHIP_RAVEN: 2165 case CHIP_RAVEN:
2101#endif 2166#endif
@@ -2375,10 +2440,6 @@ fence_driver_init:
2375 goto failed; 2440 goto failed;
2376 } 2441 }
2377 2442
2378 r = amdgpu_ib_ring_tests(adev);
2379 if (r)
2380 DRM_ERROR("ib ring test failed (%d).\n", r);
2381
2382 if (amdgpu_sriov_vf(adev)) 2443 if (amdgpu_sriov_vf(adev))
2383 amdgpu_virt_init_data_exchange(adev); 2444 amdgpu_virt_init_data_exchange(adev);
2384 2445
@@ -2539,7 +2600,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2539 /* unpin the front buffers and cursors */ 2600 /* unpin the front buffers and cursors */
2540 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2601 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2541 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2602 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2542 struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); 2603 struct drm_framebuffer *fb = crtc->primary->fb;
2543 struct amdgpu_bo *robj; 2604 struct amdgpu_bo *robj;
2544 2605
2545 if (amdgpu_crtc->cursor_bo) { 2606 if (amdgpu_crtc->cursor_bo) {
@@ -2551,10 +2612,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2551 } 2612 }
2552 } 2613 }
2553 2614
2554 if (rfb == NULL || rfb->obj == NULL) { 2615 if (fb == NULL || fb->obj[0] == NULL) {
2555 continue; 2616 continue;
2556 } 2617 }
2557 robj = gem_to_amdgpu_bo(rfb->obj); 2618 robj = gem_to_amdgpu_bo(fb->obj[0]);
2558 /* don't unpin kernel fb objects */ 2619 /* don't unpin kernel fb objects */
2559 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { 2620 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2560 r = amdgpu_bo_reserve(robj, true); 2621 r = amdgpu_bo_reserve(robj, true);
@@ -2640,11 +2701,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
2640 } 2701 }
2641 amdgpu_fence_driver_resume(adev); 2702 amdgpu_fence_driver_resume(adev);
2642 2703
2643 if (resume) {
2644 r = amdgpu_ib_ring_tests(adev);
2645 if (r)
2646 DRM_ERROR("ib ring test failed (%d).\n", r);
2647 }
2648 2704
2649 r = amdgpu_device_ip_late_init(adev); 2705 r = amdgpu_device_ip_late_init(adev);
2650 if (r) 2706 if (r)
@@ -2736,6 +2792,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
2736 if (amdgpu_sriov_vf(adev)) 2792 if (amdgpu_sriov_vf(adev))
2737 return true; 2793 return true;
2738 2794
2795 if (amdgpu_asic_need_full_reset(adev))
2796 return true;
2797
2739 for (i = 0; i < adev->num_ip_blocks; i++) { 2798 for (i = 0; i < adev->num_ip_blocks; i++) {
2740 if (!adev->ip_blocks[i].status.valid) 2799 if (!adev->ip_blocks[i].status.valid)
2741 continue; 2800 continue;
@@ -2792,6 +2851,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
2792{ 2851{
2793 int i; 2852 int i;
2794 2853
2854 if (amdgpu_asic_need_full_reset(adev))
2855 return true;
2856
2795 for (i = 0; i < adev->num_ip_blocks; i++) { 2857 for (i = 0; i < adev->num_ip_blocks; i++) {
2796 if (!adev->ip_blocks[i].status.valid) 2858 if (!adev->ip_blocks[i].status.valid)
2797 continue; 2859 continue;
@@ -3087,20 +3149,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3087 3149
3088 /* now we are okay to resume SMC/CP/SDMA */ 3150 /* now we are okay to resume SMC/CP/SDMA */
3089 r = amdgpu_device_ip_reinit_late_sriov(adev); 3151 r = amdgpu_device_ip_reinit_late_sriov(adev);
3090 amdgpu_virt_release_full_gpu(adev, true);
3091 if (r) 3152 if (r)
3092 goto error; 3153 goto error;
3093 3154
3094 amdgpu_irq_gpu_reset_resume_helper(adev); 3155 amdgpu_irq_gpu_reset_resume_helper(adev);
3095 r = amdgpu_ib_ring_tests(adev); 3156 r = amdgpu_ib_ring_tests(adev);
3096 3157
3158error:
3159 amdgpu_virt_release_full_gpu(adev, true);
3097 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3160 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3098 atomic_inc(&adev->vram_lost_counter); 3161 atomic_inc(&adev->vram_lost_counter);
3099 r = amdgpu_device_handle_vram_lost(adev); 3162 r = amdgpu_device_handle_vram_lost(adev);
3100 } 3163 }
3101 3164
3102error:
3103
3104 return r; 3165 return r;
3105} 3166}
3106 3167
@@ -3117,7 +3178,6 @@ error:
3117int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 3178int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3118 struct amdgpu_job *job, bool force) 3179 struct amdgpu_job *job, bool force)
3119{ 3180{
3120 struct drm_atomic_state *state = NULL;
3121 int i, r, resched; 3181 int i, r, resched;
3122 3182
3123 if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { 3183 if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -3140,10 +3200,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3140 /* block TTM */ 3200 /* block TTM */
3141 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3201 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3142 3202
3143 /* store modesetting */
3144 if (amdgpu_device_has_dc_support(adev))
3145 state = drm_atomic_helper_suspend(adev->ddev);
3146
3147 /* block all schedulers and reset given job's ring */ 3203 /* block all schedulers and reset given job's ring */
3148 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3204 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3149 struct amdgpu_ring *ring = adev->rings[i]; 3205 struct amdgpu_ring *ring = adev->rings[i];
@@ -3183,10 +3239,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3183 kthread_unpark(ring->sched.thread); 3239 kthread_unpark(ring->sched.thread);
3184 } 3240 }
3185 3241
3186 if (amdgpu_device_has_dc_support(adev)) { 3242 if (!amdgpu_device_has_dc_support(adev)) {
3187 if (drm_atomic_helper_resume(adev->ddev, state))
3188 dev_info(adev->dev, "drm resume failed:%d\n", r);
3189 } else {
3190 drm_helper_resume_force_mode(adev->ddev); 3243 drm_helper_resume_force_mode(adev->ddev);
3191 } 3244 }
3192 3245
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 93f700ab1bfb..76ee8e04ff11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,6 +35,7 @@
35#include <linux/pm_runtime.h> 35#include <linux/pm_runtime.h>
36#include <drm/drm_crtc_helper.h> 36#include <drm/drm_crtc_helper.h>
37#include <drm/drm_edid.h> 37#include <drm/drm_edid.h>
38#include <drm/drm_gem_framebuffer_helper.h>
38#include <drm/drm_fb_helper.h> 39#include <drm/drm_fb_helper.h>
39 40
40static void amdgpu_display_flip_callback(struct dma_fence *f, 41static void amdgpu_display_flip_callback(struct dma_fence *f,
@@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
151 struct drm_device *dev = crtc->dev; 152 struct drm_device *dev = crtc->dev;
152 struct amdgpu_device *adev = dev->dev_private; 153 struct amdgpu_device *adev = dev->dev_private;
153 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 154 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
154 struct amdgpu_framebuffer *old_amdgpu_fb;
155 struct amdgpu_framebuffer *new_amdgpu_fb;
156 struct drm_gem_object *obj; 155 struct drm_gem_object *obj;
157 struct amdgpu_flip_work *work; 156 struct amdgpu_flip_work *work;
158 struct amdgpu_bo *new_abo; 157 struct amdgpu_bo *new_abo;
@@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
174 work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; 173 work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
175 174
176 /* schedule unpin of the old buffer */ 175 /* schedule unpin of the old buffer */
177 old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 176 obj = crtc->primary->fb->obj[0];
178 obj = old_amdgpu_fb->obj;
179 177
180 /* take a reference to the old object */ 178 /* take a reference to the old object */
181 work->old_abo = gem_to_amdgpu_bo(obj); 179 work->old_abo = gem_to_amdgpu_bo(obj);
182 amdgpu_bo_ref(work->old_abo); 180 amdgpu_bo_ref(work->old_abo);
183 181
184 new_amdgpu_fb = to_amdgpu_framebuffer(fb); 182 obj = fb->obj[0];
185 obj = new_amdgpu_fb->obj;
186 new_abo = gem_to_amdgpu_bo(obj); 183 new_abo = gem_to_amdgpu_bo(obj);
187 184
188 /* pin the new buffer */ 185 /* pin the new buffer */
@@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
192 goto cleanup; 189 goto cleanup;
193 } 190 }
194 191
195 r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); 192 r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
196 if (unlikely(r != 0)) { 193 if (unlikely(r != 0)) {
197 DRM_ERROR("failed to pin new abo buffer before flip\n"); 194 DRM_ERROR("failed to pin new abo buffer before flip\n");
198 goto unreserve; 195 goto unreserve;
@@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
482 return true; 479 return true;
483} 480}
484 481
485static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
486{
487 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
488
489 drm_gem_object_put_unlocked(amdgpu_fb->obj);
490 drm_framebuffer_cleanup(fb);
491 kfree(amdgpu_fb);
492}
493
494static int amdgpu_display_user_framebuffer_create_handle(
495 struct drm_framebuffer *fb,
496 struct drm_file *file_priv,
497 unsigned int *handle)
498{
499 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
500
501 return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
502}
503
504static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { 482static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
505 .destroy = amdgpu_display_user_framebuffer_destroy, 483 .destroy = drm_gem_fb_destroy,
506 .create_handle = amdgpu_display_user_framebuffer_create_handle, 484 .create_handle = drm_gem_fb_create_handle,
507}; 485};
508 486
509uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) 487uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
510{ 488{
511 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; 489 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
512 490
@@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
526 struct drm_gem_object *obj) 504 struct drm_gem_object *obj)
527{ 505{
528 int ret; 506 int ret;
529 rfb->obj = obj; 507 rfb->base.obj[0] = obj;
530 drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); 508 drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
531 ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); 509 ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
532 if (ret) { 510 if (ret) {
533 rfb->obj = NULL; 511 rfb->base.obj[0] = NULL;
534 return ret; 512 return ret;
535 } 513 }
536 return 0; 514 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 2b11d808f297..f66e3e3fef0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,7 +23,7 @@
23#ifndef __AMDGPU_DISPLAY_H__ 23#ifndef __AMDGPU_DISPLAY_H__
24#define __AMDGPU_DISPLAY_H__ 24#define __AMDGPU_DISPLAY_H__
25 25
26uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); 26uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
27struct drm_framebuffer * 27struct drm_framebuffer *
28amdgpu_display_user_framebuffer_create(struct drm_device *dev, 28amdgpu_display_user_framebuffer_create(struct drm_device *dev,
29 struct drm_file *file_priv, 29 struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index e997ebbe43ea..def1010ac05e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
115 pr_cont("\n"); 115 pr_cont("\n");
116} 116}
117 117
118void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
119{
120 struct drm_device *ddev = adev->ddev;
121 struct drm_crtc *crtc;
122 struct amdgpu_crtc *amdgpu_crtc;
123
124 adev->pm.dpm.new_active_crtcs = 0;
125 adev->pm.dpm.new_active_crtc_count = 0;
126 if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
127 list_for_each_entry(crtc,
128 &ddev->mode_config.crtc_list, head) {
129 amdgpu_crtc = to_amdgpu_crtc(crtc);
130 if (amdgpu_crtc->enabled) {
131 adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
132 adev->pm.dpm.new_active_crtc_count++;
133 }
134 }
135 }
136}
137
118 138
119u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) 139u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
120{ 140{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 643d008410c6..dd6203a0a6b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src {
52 AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 52 AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
53}; 53};
54 54
55#define SCLK_DEEP_SLEEP_MASK 0x8
56
57struct amdgpu_ps { 55struct amdgpu_ps {
58 u32 caps; /* vbios flags */ 56 u32 caps; /* vbios flags */
59 u32 class; /* vbios flags */ 57 u32 class; /* vbios flags */
@@ -349,12 +347,6 @@ enum amdgpu_pcie_gen {
349 ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ 347 ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
350 (adev)->powerplay.pp_handle, msg_id)) 348 (adev)->powerplay.pp_handle, msg_id))
351 349
352#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
353 virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
354 ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
355 (adev)->powerplay.pp_handle, virtual_addr_low, \
356 virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
357
358#define amdgpu_dpm_get_power_profile_mode(adev, buf) \ 350#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
359 ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ 351 ((adev)->powerplay.pp_funcs->get_power_profile_mode(\
360 (adev)->powerplay.pp_handle, buf)) 352 (adev)->powerplay.pp_handle, buf))
@@ -445,6 +437,8 @@ struct amdgpu_pm {
445 uint32_t pcie_gen_mask; 437 uint32_t pcie_gen_mask;
446 uint32_t pcie_mlw_mask; 438 uint32_t pcie_mlw_mask;
447 struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ 439 struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
440 uint32_t smu_prv_buffer_size;
441 struct amdgpu_bo *smu_prv_buffer;
448}; 442};
449 443
450#define R600_SSTU_DFLT 0 444#define R600_SSTU_DFLT 0
@@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
482 struct amdgpu_ps *rps); 476 struct amdgpu_ps *rps);
483u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); 477u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
484u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); 478u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
479void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
485bool amdgpu_is_uvd_state(u32 class, u32 class2); 480bool amdgpu_is_uvd_state(u32 class, u32 class2);
486void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, 481void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
487 u32 *p, u32 *u); 482 u32 *p, u32 *u);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0b19482b36b8..b0bf2f24da48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -75,9 +75,10 @@
75 * - 3.23.0 - Add query for VRAM lost counter 75 * - 3.23.0 - Add query for VRAM lost counter
76 * - 3.24.0 - Add high priority compute support for gfx9 76 * - 3.24.0 - Add high priority compute support for gfx9
77 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). 77 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
78 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
78 */ 79 */
79#define KMS_DRIVER_MAJOR 3 80#define KMS_DRIVER_MAJOR 3
80#define KMS_DRIVER_MINOR 25 81#define KMS_DRIVER_MINOR 26
81#define KMS_DRIVER_PATCHLEVEL 0 82#define KMS_DRIVER_PATCHLEVEL 0
82 83
83int amdgpu_vram_limit = 0; 84int amdgpu_vram_limit = 0;
@@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff;
121uint amdgpu_sdma_phase_quantum = 32; 122uint amdgpu_sdma_phase_quantum = 32;
122char *amdgpu_disable_cu = NULL; 123char *amdgpu_disable_cu = NULL;
123char *amdgpu_virtual_display = NULL; 124char *amdgpu_virtual_display = NULL;
124uint amdgpu_pp_feature_mask = 0xffffbfff; 125uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
125int amdgpu_ngg = 0; 126int amdgpu_ngg = 0;
126int amdgpu_prim_buf_per_se = 0; 127int amdgpu_prim_buf_per_se = 0;
127int amdgpu_pos_buf_per_se = 0; 128int amdgpu_pos_buf_per_se = 0;
@@ -132,6 +133,7 @@ int amdgpu_lbpw = -1;
132int amdgpu_compute_multipipe = -1; 133int amdgpu_compute_multipipe = -1;
133int amdgpu_gpu_recovery = -1; /* auto */ 134int amdgpu_gpu_recovery = -1; /* auto */
134int amdgpu_emu_mode = 0; 135int amdgpu_emu_mode = 0;
136uint amdgpu_smu_memory_pool_size = 0;
135 137
136MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 138MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
137module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); 139module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
316module_param_named(cik_support, amdgpu_cik_support, int, 0444); 318module_param_named(cik_support, amdgpu_cik_support, int, 0444);
317#endif 319#endif
318 320
321MODULE_PARM_DESC(smu_memory_pool_size,
322 "reserve gtt for smu debug usage, 0 = disable,"
323 "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
324module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
325
319static const struct pci_device_id pciidlist[] = { 326static const struct pci_device_id pciidlist[] = {
320#ifdef CONFIG_DRM_AMDGPU_SI 327#ifdef CONFIG_DRM_AMDGPU_SI
321 {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, 328 {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = {
534 {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 541 {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
535 {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 542 {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
536 {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, 543 {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
544 /* VEGAM */
545 {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
546 {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
537 /* Vega 10 */ 547 /* Vega 10 */
538 {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, 548 {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
539 {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, 549 {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = {
550 {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, 560 {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
551 {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, 561 {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
552 {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, 562 {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
563 /* Vega 20 */
564 {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
565 {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
566 {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
567 {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
568 {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
569 {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
553 /* Raven */ 570 /* Raven */
554 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, 571 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
555 572
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 12063019751b..bc5fd8ebab5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
137 /* need to align pitch with crtc limits */ 137 /* need to align pitch with crtc limits */
138 mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, 138 mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
139 fb_tiled); 139 fb_tiled);
140 domain = amdgpu_display_framebuffer_domains(adev); 140 domain = amdgpu_display_supported_domains(adev);
141 141
142 height = ALIGN(mode_cmd->height, 8); 142 height = ALIGN(mode_cmd->height, 8);
143 size = mode_cmd->pitches[0] * height; 143 size = mode_cmd->pitches[0] * height;
@@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
292 292
293 drm_fb_helper_unregister_fbi(&rfbdev->helper); 293 drm_fb_helper_unregister_fbi(&rfbdev->helper);
294 294
295 if (rfb->obj) { 295 if (rfb->base.obj[0]) {
296 amdgpufb_destroy_pinned_object(rfb->obj); 296 amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
297 rfb->obj = NULL; 297 rfb->base.obj[0] = NULL;
298 drm_framebuffer_unregister_private(&rfb->base); 298 drm_framebuffer_unregister_private(&rfb->base);
299 drm_framebuffer_cleanup(&rfb->base); 299 drm_framebuffer_cleanup(&rfb->base);
300 } 300 }
@@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
377 if (!adev->mode_info.rfbdev) 377 if (!adev->mode_info.rfbdev)
378 return 0; 378 return 0;
379 379
380 robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj); 380 robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
381 size += amdgpu_bo_size(robj); 381 size += amdgpu_bo_size(robj);
382 return size; 382 return size;
383} 383}
@@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
386{ 386{
387 if (!adev->mode_info.rfbdev) 387 if (!adev->mode_info.rfbdev)
388 return false; 388 return false;
389 if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj)) 389 if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
390 return true; 390 return true;
391 return false; 391 return false;
392} 392}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97449e06a242..39ec6b8890a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
131 * Emits a fence command on the requested ring (all asics). 131 * Emits a fence command on the requested ring (all asics).
132 * Returns 0 on success, -ENOMEM on failure. 132 * Returns 0 on success, -ENOMEM on failure.
133 */ 133 */
134int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) 134int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
135 unsigned flags)
135{ 136{
136 struct amdgpu_device *adev = ring->adev; 137 struct amdgpu_device *adev = ring->adev;
137 struct amdgpu_fence *fence; 138 struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
149 adev->fence_context + ring->idx, 150 adev->fence_context + ring->idx,
150 seq); 151 seq);
151 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 152 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
152 seq, AMDGPU_FENCE_FLAG_INT); 153 seq, flags | AMDGPU_FENCE_FLAG_INT);
153 154
154 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; 155 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
155 /* This function can't be called concurrently anyway, otherwise 156 /* This function can't be called concurrently anyway, otherwise
@@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
375 struct amdgpu_device *adev = ring->adev; 376 struct amdgpu_device *adev = ring->adev;
376 uint64_t index; 377 uint64_t index;
377 378
378 if (ring != &adev->uvd.ring) { 379 if (ring != &adev->uvd.inst[ring->me].ring) {
379 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; 380 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
380 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); 381 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
381 } else { 382 } else {
382 /* put fence directly behind firmware */ 383 /* put fence directly behind firmware */
383 index = ALIGN(adev->uvd.fw->size, 8); 384 index = ALIGN(adev->uvd.fw->size, 8);
384 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; 385 ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
385 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; 386 ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
386 } 387 }
387 amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); 388 amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
388 amdgpu_irq_get(adev, irq_src, irq_type); 389 amdgpu_irq_get(adev, irq_src, irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index cf0f186c6092..17d6b9fb6d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
113 int r; 113 int r;
114 114
115 if (adev->gart.robj == NULL) { 115 if (adev->gart.robj == NULL) {
116 r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, 116 struct amdgpu_bo_param bp;
117 AMDGPU_GEM_DOMAIN_VRAM, 117
118 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 118 memset(&bp, 0, sizeof(bp));
119 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 119 bp.size = adev->gart.table_size;
120 ttm_bo_type_kernel, NULL, 120 bp.byte_align = PAGE_SIZE;
121 &adev->gart.robj); 121 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
122 bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
123 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
124 bp.type = ttm_bo_type_kernel;
125 bp.resv = NULL;
126 r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
122 if (r) { 127 if (r) {
123 return r; 128 return r;
124 } 129 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 46b9ea4e6103..2c8e27370284 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -48,17 +48,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
48 struct drm_gem_object **obj) 48 struct drm_gem_object **obj)
49{ 49{
50 struct amdgpu_bo *bo; 50 struct amdgpu_bo *bo;
51 struct amdgpu_bo_param bp;
51 int r; 52 int r;
52 53
54 memset(&bp, 0, sizeof(bp));
53 *obj = NULL; 55 *obj = NULL;
54 /* At least align on page size */ 56 /* At least align on page size */
55 if (alignment < PAGE_SIZE) { 57 if (alignment < PAGE_SIZE) {
56 alignment = PAGE_SIZE; 58 alignment = PAGE_SIZE;
57 } 59 }
58 60
61 bp.size = size;
62 bp.byte_align = alignment;
63 bp.type = type;
64 bp.resv = resv;
65 bp.preferred_domain = initial_domain;
59retry: 66retry:
60 r = amdgpu_bo_create(adev, size, alignment, initial_domain, 67 bp.flags = flags;
61 flags, type, resv, &bo); 68 bp.domain = initial_domain;
69 r = amdgpu_bo_create(adev, &bp, &bo);
62 if (r) { 70 if (r) {
63 if (r != -ERESTARTSYS) { 71 if (r != -ERESTARTSYS) {
64 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { 72 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -221,12 +229,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
221 return -EINVAL; 229 return -EINVAL;
222 230
223 /* reject invalid gem domains */ 231 /* reject invalid gem domains */
224 if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | 232 if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
225 AMDGPU_GEM_DOMAIN_GTT |
226 AMDGPU_GEM_DOMAIN_VRAM |
227 AMDGPU_GEM_DOMAIN_GDS |
228 AMDGPU_GEM_DOMAIN_GWS |
229 AMDGPU_GEM_DOMAIN_OA))
230 return -EINVAL; 233 return -EINVAL;
231 234
232 /* create a gem object to contain this object in */ 235 /* create a gem object to contain this object in */
@@ -771,16 +774,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
771} 774}
772 775
773#if defined(CONFIG_DEBUG_FS) 776#if defined(CONFIG_DEBUG_FS)
777
778#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \
779 if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \
780 seq_printf((m), " " #flag); \
781 }
782
774static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) 783static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
775{ 784{
776 struct drm_gem_object *gobj = ptr; 785 struct drm_gem_object *gobj = ptr;
777 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); 786 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
778 struct seq_file *m = data; 787 struct seq_file *m = data;
779 788
789 struct dma_buf_attachment *attachment;
790 struct dma_buf *dma_buf;
780 unsigned domain; 791 unsigned domain;
781 const char *placement; 792 const char *placement;
782 unsigned pin_count; 793 unsigned pin_count;
783 uint64_t offset;
784 794
785 domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); 795 domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
786 switch (domain) { 796 switch (domain) {
@@ -798,13 +808,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
798 seq_printf(m, "\t0x%08x: %12ld byte %s", 808 seq_printf(m, "\t0x%08x: %12ld byte %s",
799 id, amdgpu_bo_size(bo), placement); 809 id, amdgpu_bo_size(bo), placement);
800 810
801 offset = READ_ONCE(bo->tbo.mem.start);
802 if (offset != AMDGPU_BO_INVALID_OFFSET)
803 seq_printf(m, " @ 0x%010Lx", offset);
804
805 pin_count = READ_ONCE(bo->pin_count); 811 pin_count = READ_ONCE(bo->pin_count);
806 if (pin_count) 812 if (pin_count)
807 seq_printf(m, " pin count %d", pin_count); 813 seq_printf(m, " pin count %d", pin_count);
814
815 dma_buf = READ_ONCE(bo->gem_base.dma_buf);
816 attachment = READ_ONCE(bo->gem_base.import_attach);
817
818 if (attachment)
819 seq_printf(m, " imported from %p", dma_buf);
820 else if (dma_buf)
821 seq_printf(m, " exported as %p", dma_buf);
822
823 amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
824 amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
825 amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
826 amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
827 amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
828 amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
829 amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
830 amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
831
808 seq_printf(m, "\n"); 832 seq_printf(m, "\n");
809 833
810 return 0; 834 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 311589e02d17..f70eeed9ed76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
127 struct amdgpu_vm *vm; 127 struct amdgpu_vm *vm;
128 uint64_t fence_ctx; 128 uint64_t fence_ctx;
129 uint32_t status = 0, alloc_size; 129 uint32_t status = 0, alloc_size;
130 unsigned fence_flags = 0;
130 131
131 unsigned i; 132 unsigned i;
132 int r = 0; 133 int r = 0;
@@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
227#endif 228#endif
228 amdgpu_asic_invalidate_hdp(adev, ring); 229 amdgpu_asic_invalidate_hdp(adev, ring);
229 230
230 r = amdgpu_fence_emit(ring, f); 231 if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
232 fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
233
234 r = amdgpu_fence_emit(ring, f, fence_flags);
231 if (r) { 235 if (r) {
232 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 236 dev_err(adev->dev, "failed to emit fence (%d)\n", r);
233 if (job && job->vmid) 237 if (job && job->vmid)
@@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
242 /* wrap the last IB with fence */ 246 /* wrap the last IB with fence */
243 if (job && job->uf_addr) { 247 if (job && job->uf_addr) {
244 amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, 248 amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
245 AMDGPU_FENCE_FLAG_64BIT); 249 fence_flags | AMDGPU_FENCE_FLAG_64BIT);
246 } 250 }
247 251
248 if (patch_offset != ~0 && ring->funcs->patch_cond_exec) 252 if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4b7824d30e73..91517b166a3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -31,6 +31,7 @@
31#include "amdgpu_sched.h" 31#include "amdgpu_sched.h"
32#include "amdgpu_uvd.h" 32#include "amdgpu_uvd.h"
33#include "amdgpu_vce.h" 33#include "amdgpu_vce.h"
34#include "atom.h"
34 35
35#include <linux/vga_switcheroo.h> 36#include <linux/vga_switcheroo.h>
36#include <linux/slab.h> 37#include <linux/slab.h>
@@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
214 fw_info->ver = adev->gfx.rlc_fw_version; 215 fw_info->ver = adev->gfx.rlc_fw_version;
215 fw_info->feature = adev->gfx.rlc_feature_version; 216 fw_info->feature = adev->gfx.rlc_feature_version;
216 break; 217 break;
218 case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
219 fw_info->ver = adev->gfx.rlc_srlc_fw_version;
220 fw_info->feature = adev->gfx.rlc_srlc_feature_version;
221 break;
222 case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
223 fw_info->ver = adev->gfx.rlc_srlg_fw_version;
224 fw_info->feature = adev->gfx.rlc_srlg_feature_version;
225 break;
226 case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
227 fw_info->ver = adev->gfx.rlc_srls_fw_version;
228 fw_info->feature = adev->gfx.rlc_srls_feature_version;
229 break;
217 case AMDGPU_INFO_FW_GFX_MEC: 230 case AMDGPU_INFO_FW_GFX_MEC:
218 if (query_fw->index == 0) { 231 if (query_fw->index == 0) {
219 fw_info->ver = adev->gfx.mec_fw_version; 232 fw_info->ver = adev->gfx.mec_fw_version;
@@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
273 struct drm_crtc *crtc; 286 struct drm_crtc *crtc;
274 uint32_t ui32 = 0; 287 uint32_t ui32 = 0;
275 uint64_t ui64 = 0; 288 uint64_t ui64 = 0;
276 int i, found; 289 int i, j, found;
277 int ui32_size = sizeof(ui32); 290 int ui32_size = sizeof(ui32);
278 291
279 if (!info->return_size || !info->return_pointer) 292 if (!info->return_size || !info->return_pointer)
280 return -EINVAL; 293 return -EINVAL;
281 294
295 /* Ensure IB tests are run on ring */
296 flush_delayed_work(&adev->late_init_work);
297
282 switch (info->query) { 298 switch (info->query) {
283 case AMDGPU_INFO_ACCEL_WORKING: 299 case AMDGPU_INFO_ACCEL_WORKING:
284 ui32 = adev->accel_working; 300 ui32 = adev->accel_working;
@@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
332 break; 348 break;
333 case AMDGPU_HW_IP_UVD: 349 case AMDGPU_HW_IP_UVD:
334 type = AMD_IP_BLOCK_TYPE_UVD; 350 type = AMD_IP_BLOCK_TYPE_UVD;
335 ring_mask = adev->uvd.ring.ready ? 1 : 0; 351 for (i = 0; i < adev->uvd.num_uvd_inst; i++)
352 ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
336 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; 353 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
337 ib_size_alignment = 16; 354 ib_size_alignment = 16;
338 break; 355 break;
@@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
345 break; 362 break;
346 case AMDGPU_HW_IP_UVD_ENC: 363 case AMDGPU_HW_IP_UVD_ENC:
347 type = AMD_IP_BLOCK_TYPE_UVD; 364 type = AMD_IP_BLOCK_TYPE_UVD;
348 for (i = 0; i < adev->uvd.num_enc_rings; i++) 365 for (i = 0; i < adev->uvd.num_uvd_inst; i++)
349 ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i); 366 for (j = 0; j < adev->uvd.num_enc_rings; j++)
367 ring_mask |=
368 ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
369 (j + i * adev->uvd.num_enc_rings));
350 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; 370 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
351 ib_size_alignment = 1; 371 ib_size_alignment = 1;
352 break; 372 break;
@@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
701 } 721 }
702 } 722 }
703 case AMDGPU_INFO_SENSOR: { 723 case AMDGPU_INFO_SENSOR: {
704 struct pp_gpu_power query = {0}; 724 if (!adev->pm.dpm_enabled)
705 int query_size = sizeof(query);
706
707 if (amdgpu_dpm == 0)
708 return -ENOENT; 725 return -ENOENT;
709 726
710 switch (info->sensor_info.type) { 727 switch (info->sensor_info.type) {
@@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
746 /* get average GPU power */ 763 /* get average GPU power */
747 if (amdgpu_dpm_read_sensor(adev, 764 if (amdgpu_dpm_read_sensor(adev,
748 AMDGPU_PP_SENSOR_GPU_POWER, 765 AMDGPU_PP_SENSOR_GPU_POWER,
749 (void *)&query, &query_size)) { 766 (void *)&ui32, &ui32_size)) {
750 return -EINVAL; 767 return -EINVAL;
751 } 768 }
752 ui32 = query.average_gpu_power >> 8; 769 ui32 >>= 8;
753 break; 770 break;
754 case AMDGPU_INFO_SENSOR_VDDNB: 771 case AMDGPU_INFO_SENSOR_VDDNB:
755 /* get VDDNB in millivolts */ 772 /* get VDDNB in millivolts */
@@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
913 return; 930 return;
914 931
915 pm_runtime_get_sync(dev->dev); 932 pm_runtime_get_sync(dev->dev);
916 933 amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
917 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
918 934
919 if (adev->asic_type != CHIP_RAVEN) { 935 if (adev->asic_type != CHIP_RAVEN) {
920 amdgpu_uvd_free_handles(adev, file_priv); 936 amdgpu_uvd_free_handles(adev, file_priv);
@@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
935 pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); 951 pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
936 952
937 amdgpu_vm_fini(adev, &fpriv->vm); 953 amdgpu_vm_fini(adev, &fpriv->vm);
954 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
955
938 if (pasid) 956 if (pasid)
939 amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); 957 amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
940 amdgpu_bo_unref(&pd); 958 amdgpu_bo_unref(&pd);
@@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
1088 struct amdgpu_device *adev = dev->dev_private; 1106 struct amdgpu_device *adev = dev->dev_private;
1089 struct drm_amdgpu_info_firmware fw_info; 1107 struct drm_amdgpu_info_firmware fw_info;
1090 struct drm_amdgpu_query_fw query_fw; 1108 struct drm_amdgpu_query_fw query_fw;
1109 struct atom_context *ctx = adev->mode_info.atom_context;
1091 int ret, i; 1110 int ret, i;
1092 1111
1093 /* VCE */ 1112 /* VCE */
@@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
1146 seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", 1165 seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
1147 fw_info.feature, fw_info.ver); 1166 fw_info.feature, fw_info.ver);
1148 1167
1168 /* RLC SAVE RESTORE LIST CNTL */
1169 query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
1170 ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
1171 if (ret)
1172 return ret;
1173 seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
1174 fw_info.feature, fw_info.ver);
1175
1176 /* RLC SAVE RESTORE LIST GPM MEM */
1177 query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
1178 ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
1179 if (ret)
1180 return ret;
1181 seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
1182 fw_info.feature, fw_info.ver);
1183
1184 /* RLC SAVE RESTORE LIST SRM MEM */
1185 query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
1186 ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
1187 if (ret)
1188 return ret;
1189 seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
1190 fw_info.feature, fw_info.ver);
1191
1149 /* MEC */ 1192 /* MEC */
1150 query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; 1193 query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
1151 query_fw.index = 0; 1194 query_fw.index = 0;
@@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
1210 seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", 1253 seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
1211 fw_info.feature, fw_info.ver); 1254 fw_info.feature, fw_info.ver);
1212 1255
1256
1257 seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
1258
1213 return 0; 1259 return 0;
1214} 1260}
1215 1261
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4cb8e6c..83e344fbb50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
36#include <drm/drm.h> 36#include <drm/drm.h>
37 37
38#include "amdgpu.h" 38#include "amdgpu.h"
39#include "amdgpu_amdkfd.h"
39 40
40struct amdgpu_mn { 41struct amdgpu_mn {
41 /* constant after initialisation */ 42 /* constant after initialisation */
42 struct amdgpu_device *adev; 43 struct amdgpu_device *adev;
43 struct mm_struct *mm; 44 struct mm_struct *mm;
44 struct mmu_notifier mn; 45 struct mmu_notifier mn;
46 enum amdgpu_mn_type type;
45 47
46 /* only used on destruction */ 48 /* only used on destruction */
47 struct work_struct work; 49 struct work_struct work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
185} 187}
186 188
187/** 189/**
188 * amdgpu_mn_invalidate_range_start - callback to notify about mm change 190 * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
189 * 191 *
190 * @mn: our notifier 192 * @mn: our notifier
191 * @mn: the mm this callback is about 193 * @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
195 * We block for all BOs between start and end to be idle and 197 * We block for all BOs between start and end to be idle and
196 * unmap them by move them into system domain again. 198 * unmap them by move them into system domain again.
197 */ 199 */
198static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, 200static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
199 struct mm_struct *mm, 201 struct mm_struct *mm,
200 unsigned long start, 202 unsigned long start,
201 unsigned long end) 203 unsigned long end)
202{ 204{
203 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); 205 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
204 struct interval_tree_node *it; 206 struct interval_tree_node *it;
@@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
220} 222}
221 223
222/** 224/**
225 * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
226 *
227 * @mn: our notifier
228 * @mn: the mm this callback is about
229 * @start: start of updated range
230 * @end: end of updated range
231 *
232 * We temporarily evict all BOs between start and end. This
233 * necessitates evicting all user-mode queues of the process. The BOs
234 * are restorted in amdgpu_mn_invalidate_range_end_hsa.
235 */
236static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
237 struct mm_struct *mm,
238 unsigned long start,
239 unsigned long end)
240{
241 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
242 struct interval_tree_node *it;
243
244 /* notification is exclusive, but interval is inclusive */
245 end -= 1;
246
247 amdgpu_mn_read_lock(rmn);
248
249 it = interval_tree_iter_first(&rmn->objects, start, end);
250 while (it) {
251 struct amdgpu_mn_node *node;
252 struct amdgpu_bo *bo;
253
254 node = container_of(it, struct amdgpu_mn_node, it);
255 it = interval_tree_iter_next(it, start, end);
256
257 list_for_each_entry(bo, &node->bos, mn_list) {
258 struct kgd_mem *mem = bo->kfd_bo;
259
260 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
261 start, end))
262 amdgpu_amdkfd_evict_userptr(mem, mm);
263 }
264 }
265}
266
267/**
223 * amdgpu_mn_invalidate_range_end - callback to notify about mm change 268 * amdgpu_mn_invalidate_range_end - callback to notify about mm change
224 * 269 *
225 * @mn: our notifier 270 * @mn: our notifier
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
239 amdgpu_mn_read_unlock(rmn); 284 amdgpu_mn_read_unlock(rmn);
240} 285}
241 286
242static const struct mmu_notifier_ops amdgpu_mn_ops = { 287static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
243 .release = amdgpu_mn_release, 288 [AMDGPU_MN_TYPE_GFX] = {
244 .invalidate_range_start = amdgpu_mn_invalidate_range_start, 289 .release = amdgpu_mn_release,
245 .invalidate_range_end = amdgpu_mn_invalidate_range_end, 290 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
291 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
292 },
293 [AMDGPU_MN_TYPE_HSA] = {
294 .release = amdgpu_mn_release,
295 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
296 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
297 },
246}; 298};
247 299
300/* Low bits of any reasonable mm pointer will be unused due to struct
301 * alignment. Use these bits to make a unique key from the mm pointer
302 * and notifier type.
303 */
304#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
305
248/** 306/**
249 * amdgpu_mn_get - create notifier context 307 * amdgpu_mn_get - create notifier context
250 * 308 *
251 * @adev: amdgpu device pointer 309 * @adev: amdgpu device pointer
310 * @type: type of MMU notifier context
252 * 311 *
253 * Creates a notifier context for current->mm. 312 * Creates a notifier context for current->mm.
254 */ 313 */
255struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) 314struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
315 enum amdgpu_mn_type type)
256{ 316{
257 struct mm_struct *mm = current->mm; 317 struct mm_struct *mm = current->mm;
258 struct amdgpu_mn *rmn; 318 struct amdgpu_mn *rmn;
319 unsigned long key = AMDGPU_MN_KEY(mm, type);
259 int r; 320 int r;
260 321
261 mutex_lock(&adev->mn_lock); 322 mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
264 return ERR_PTR(-EINTR); 325 return ERR_PTR(-EINTR);
265 } 326 }
266 327
267 hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) 328 hash_for_each_possible(adev->mn_hash, rmn, node, key)
268 if (rmn->mm == mm) 329 if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
269 goto release_locks; 330 goto release_locks;
270 331
271 rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); 332 rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
276 337
277 rmn->adev = adev; 338 rmn->adev = adev;
278 rmn->mm = mm; 339 rmn->mm = mm;
279 rmn->mn.ops = &amdgpu_mn_ops;
280 init_rwsem(&rmn->lock); 340 init_rwsem(&rmn->lock);
341 rmn->type = type;
342 rmn->mn.ops = &amdgpu_mn_ops[type];
281 rmn->objects = RB_ROOT_CACHED; 343 rmn->objects = RB_ROOT_CACHED;
282 mutex_init(&rmn->read_lock); 344 mutex_init(&rmn->read_lock);
283 atomic_set(&rmn->recursion, 0); 345 atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
286 if (r) 348 if (r)
287 goto free_rmn; 349 goto free_rmn;
288 350
289 hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); 351 hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
290 352
291release_locks: 353release_locks:
292 up_write(&mm->mmap_sem); 354 up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
315{ 377{
316 unsigned long end = addr + amdgpu_bo_size(bo) - 1; 378 unsigned long end = addr + amdgpu_bo_size(bo) - 1;
317 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 379 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
380 enum amdgpu_mn_type type =
381 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
318 struct amdgpu_mn *rmn; 382 struct amdgpu_mn *rmn;
319 struct amdgpu_mn_node *node = NULL; 383 struct amdgpu_mn_node *node = NULL, *new_node;
320 struct list_head bos; 384 struct list_head bos;
321 struct interval_tree_node *it; 385 struct interval_tree_node *it;
322 386
323 rmn = amdgpu_mn_get(adev); 387 rmn = amdgpu_mn_get(adev, type);
324 if (IS_ERR(rmn)) 388 if (IS_ERR(rmn))
325 return PTR_ERR(rmn); 389 return PTR_ERR(rmn);
326 390
391 new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
392 if (!new_node)
393 return -ENOMEM;
394
327 INIT_LIST_HEAD(&bos); 395 INIT_LIST_HEAD(&bos);
328 396
329 down_write(&rmn->lock); 397 down_write(&rmn->lock);
@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
337 list_splice(&node->bos, &bos); 405 list_splice(&node->bos, &bos);
338 } 406 }
339 407
340 if (!node) { 408 if (!node)
341 node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); 409 node = new_node;
342 if (!node) { 410 else
343 up_write(&rmn->lock); 411 kfree(new_node);
344 return -ENOMEM;
345 }
346 }
347 412
348 bo->mn = rmn; 413 bo->mn = rmn;
349 414
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3793b8..eb0f432f78fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
29 */ 29 */
30struct amdgpu_mn; 30struct amdgpu_mn;
31 31
32enum amdgpu_mn_type {
33 AMDGPU_MN_TYPE_GFX,
34 AMDGPU_MN_TYPE_HSA,
35};
36
32#if defined(CONFIG_MMU_NOTIFIER) 37#if defined(CONFIG_MMU_NOTIFIER)
33void amdgpu_mn_lock(struct amdgpu_mn *mn); 38void amdgpu_mn_lock(struct amdgpu_mn *mn);
34void amdgpu_mn_unlock(struct amdgpu_mn *mn); 39void amdgpu_mn_unlock(struct amdgpu_mn *mn);
35struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); 40struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
41 enum amdgpu_mn_type type);
36int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); 42int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
37void amdgpu_mn_unregister(struct amdgpu_bo *bo); 43void amdgpu_mn_unregister(struct amdgpu_bo *bo);
38#else 44#else
39static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} 45static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
40static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} 46static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
41static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) 47static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
48 enum amdgpu_mn_type type)
42{ 49{
43 return NULL; 50 return NULL;
44} 51}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d6416ee52e32..b9e9e8b02fb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,7 +308,6 @@ struct amdgpu_display_funcs {
308 308
309struct amdgpu_framebuffer { 309struct amdgpu_framebuffer {
310 struct drm_framebuffer base; 310 struct drm_framebuffer base;
311 struct drm_gem_object *obj;
312 311
313 /* caching for later use */ 312 /* caching for later use */
314 uint64_t address; 313 uint64_t address;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6d08cde8443c..6a9e46ae7f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
191 u32 domain, struct amdgpu_bo **bo_ptr, 191 u32 domain, struct amdgpu_bo **bo_ptr,
192 u64 *gpu_addr, void **cpu_addr) 192 u64 *gpu_addr, void **cpu_addr)
193{ 193{
194 struct amdgpu_bo_param bp;
194 bool free = false; 195 bool free = false;
195 int r; 196 int r;
196 197
198 memset(&bp, 0, sizeof(bp));
199 bp.size = size;
200 bp.byte_align = align;
201 bp.domain = domain;
202 bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
203 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
204 bp.type = ttm_bo_type_kernel;
205 bp.resv = NULL;
206
197 if (!*bo_ptr) { 207 if (!*bo_ptr) {
198 r = amdgpu_bo_create(adev, size, align, domain, 208 r = amdgpu_bo_create(adev, &bp, bo_ptr);
199 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
200 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
201 ttm_bo_type_kernel, NULL, bo_ptr);
202 if (r) { 209 if (r) {
203 dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", 210 dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
204 r); 211 r);
@@ -341,27 +348,25 @@ fail:
341 return false; 348 return false;
342} 349}
343 350
344static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, 351static int amdgpu_bo_do_create(struct amdgpu_device *adev,
345 int byte_align, u32 domain, 352 struct amdgpu_bo_param *bp,
346 u64 flags, enum ttm_bo_type type,
347 struct reservation_object *resv,
348 struct amdgpu_bo **bo_ptr) 353 struct amdgpu_bo **bo_ptr)
349{ 354{
350 struct ttm_operation_ctx ctx = { 355 struct ttm_operation_ctx ctx = {
351 .interruptible = (type != ttm_bo_type_kernel), 356 .interruptible = (bp->type != ttm_bo_type_kernel),
352 .no_wait_gpu = false, 357 .no_wait_gpu = false,
353 .resv = resv, 358 .resv = bp->resv,
354 .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT 359 .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
355 }; 360 };
356 struct amdgpu_bo *bo; 361 struct amdgpu_bo *bo;
357 unsigned long page_align; 362 unsigned long page_align, size = bp->size;
358 size_t acc_size; 363 size_t acc_size;
359 int r; 364 int r;
360 365
361 page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; 366 page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
362 size = ALIGN(size, PAGE_SIZE); 367 size = ALIGN(size, PAGE_SIZE);
363 368
364 if (!amdgpu_bo_validate_size(adev, size, domain)) 369 if (!amdgpu_bo_validate_size(adev, size, bp->domain))
365 return -ENOMEM; 370 return -ENOMEM;
366 371
367 *bo_ptr = NULL; 372 *bo_ptr = NULL;
@@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
375 drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); 380 drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
376 INIT_LIST_HEAD(&bo->shadow_list); 381 INIT_LIST_HEAD(&bo->shadow_list);
377 INIT_LIST_HEAD(&bo->va); 382 INIT_LIST_HEAD(&bo->va);
378 bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | 383 bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
379 AMDGPU_GEM_DOMAIN_GTT | 384 bp->domain;
380 AMDGPU_GEM_DOMAIN_CPU |
381 AMDGPU_GEM_DOMAIN_GDS |
382 AMDGPU_GEM_DOMAIN_GWS |
383 AMDGPU_GEM_DOMAIN_OA);
384 bo->allowed_domains = bo->preferred_domains; 385 bo->allowed_domains = bo->preferred_domains;
385 if (type != ttm_bo_type_kernel && 386 if (bp->type != ttm_bo_type_kernel &&
386 bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) 387 bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
387 bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; 388 bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
388 389
389 bo->flags = flags; 390 bo->flags = bp->flags;
390 391
391#ifdef CONFIG_X86_32 392#ifdef CONFIG_X86_32
392 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit 393 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
417#endif 418#endif
418 419
419 bo->tbo.bdev = &adev->mman.bdev; 420 bo->tbo.bdev = &adev->mman.bdev;
420 amdgpu_ttm_placement_from_domain(bo, domain); 421 amdgpu_ttm_placement_from_domain(bo, bp->domain);
422 if (bp->type == ttm_bo_type_kernel)
423 bo->tbo.priority = 1;
421 424
422 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, 425 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
423 &bo->placement, page_align, &ctx, acc_size, 426 &bo->placement, page_align, &ctx, acc_size,
424 NULL, resv, &amdgpu_ttm_bo_destroy); 427 NULL, bp->resv, &amdgpu_ttm_bo_destroy);
425 if (unlikely(r != 0)) 428 if (unlikely(r != 0))
426 return r; 429 return r;
427 430
@@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
433 else 436 else
434 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); 437 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
435 438
436 if (type == ttm_bo_type_kernel) 439 if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
437 bo->tbo.priority = 1;
438
439 if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
440 bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { 440 bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
441 struct dma_fence *fence; 441 struct dma_fence *fence;
442 442
@@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
449 bo->tbo.moving = dma_fence_get(fence); 449 bo->tbo.moving = dma_fence_get(fence);
450 dma_fence_put(fence); 450 dma_fence_put(fence);
451 } 451 }
452 if (!resv) 452 if (!bp->resv)
453 amdgpu_bo_unreserve(bo); 453 amdgpu_bo_unreserve(bo);
454 *bo_ptr = bo; 454 *bo_ptr = bo;
455 455
456 trace_amdgpu_bo_create(bo); 456 trace_amdgpu_bo_create(bo);
457 457
458 /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ 458 /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
459 if (type == ttm_bo_type_device) 459 if (bp->type == ttm_bo_type_device)
460 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 460 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
461 461
462 return 0; 462 return 0;
463 463
464fail_unreserve: 464fail_unreserve:
465 if (!resv) 465 if (!bp->resv)
466 ww_mutex_unlock(&bo->tbo.resv->lock); 466 ww_mutex_unlock(&bo->tbo.resv->lock);
467 amdgpu_bo_unref(&bo); 467 amdgpu_bo_unref(&bo);
468 return r; 468 return r;
@@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
472 unsigned long size, int byte_align, 472 unsigned long size, int byte_align,
473 struct amdgpu_bo *bo) 473 struct amdgpu_bo *bo)
474{ 474{
475 struct amdgpu_bo_param bp;
475 int r; 476 int r;
476 477
477 if (bo->shadow) 478 if (bo->shadow)
478 return 0; 479 return 0;
479 480
480 r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, 481 memset(&bp, 0, sizeof(bp));
481 AMDGPU_GEM_CREATE_CPU_GTT_USWC | 482 bp.size = size;
482 AMDGPU_GEM_CREATE_SHADOW, 483 bp.byte_align = byte_align;
483 ttm_bo_type_kernel, 484 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
484 bo->tbo.resv, &bo->shadow); 485 bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
486 AMDGPU_GEM_CREATE_SHADOW;
487 bp.type = ttm_bo_type_kernel;
488 bp.resv = bo->tbo.resv;
489
490 r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
485 if (!r) { 491 if (!r) {
486 bo->shadow->parent = amdgpu_bo_ref(bo); 492 bo->shadow->parent = amdgpu_bo_ref(bo);
487 mutex_lock(&adev->shadow_list_lock); 493 mutex_lock(&adev->shadow_list_lock);
@@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
492 return r; 498 return r;
493} 499}
494 500
495int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, 501int amdgpu_bo_create(struct amdgpu_device *adev,
496 int byte_align, u32 domain, 502 struct amdgpu_bo_param *bp,
497 u64 flags, enum ttm_bo_type type,
498 struct reservation_object *resv,
499 struct amdgpu_bo **bo_ptr) 503 struct amdgpu_bo **bo_ptr)
500{ 504{
501 uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; 505 u64 flags = bp->flags;
502 int r; 506 int r;
503 507
504 r = amdgpu_bo_do_create(adev, size, byte_align, domain, 508 bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
505 parent_flags, type, resv, bo_ptr); 509 r = amdgpu_bo_do_create(adev, bp, bo_ptr);
506 if (r) 510 if (r)
507 return r; 511 return r;
508 512
509 if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { 513 if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
510 if (!resv) 514 if (!bp->resv)
511 WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, 515 WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
512 NULL)); 516 NULL));
513 517
514 r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); 518 r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
515 519
516 if (!resv) 520 if (!bp->resv)
517 reservation_object_unlock((*bo_ptr)->tbo.resv); 521 reservation_object_unlock((*bo_ptr)->tbo.resv);
518 522
519 if (r) 523 if (r)
@@ -689,8 +693,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
689 return -EINVAL; 693 return -EINVAL;
690 694
691 /* A shared bo cannot be migrated to VRAM */ 695 /* A shared bo cannot be migrated to VRAM */
692 if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) 696 if (bo->prime_shared_count) {
693 return -EINVAL; 697 if (domain & AMDGPU_GEM_DOMAIN_GTT)
698 domain = AMDGPU_GEM_DOMAIN_GTT;
699 else
700 return -EINVAL;
701 }
702
703 /* This assumes only APU display buffers are pinned with (VRAM|GTT).
704 * See function amdgpu_display_supported_domains()
705 */
706 if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
707 domain = AMDGPU_GEM_DOMAIN_VRAM;
708 if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
709 domain = AMDGPU_GEM_DOMAIN_GTT;
710 }
694 711
695 if (bo->pin_count) { 712 if (bo->pin_count) {
696 uint32_t mem_type = bo->tbo.mem.mem_type; 713 uint32_t mem_type = bo->tbo.mem.mem_type;
@@ -838,6 +855,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
838 return amdgpu_ttm_init(adev); 855 return amdgpu_ttm_init(adev);
839} 856}
840 857
858int amdgpu_bo_late_init(struct amdgpu_device *adev)
859{
860 amdgpu_ttm_late_init(adev);
861
862 return 0;
863}
864
841void amdgpu_bo_fini(struct amdgpu_device *adev) 865void amdgpu_bo_fini(struct amdgpu_device *adev)
842{ 866{
843 amdgpu_ttm_fini(adev); 867 amdgpu_ttm_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 546f77cb7882..540e03fa159f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,6 +33,16 @@
33 33
34#define AMDGPU_BO_INVALID_OFFSET LONG_MAX 34#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
35 35
36struct amdgpu_bo_param {
37 unsigned long size;
38 int byte_align;
39 u32 domain;
40 u32 preferred_domain;
41 u64 flags;
42 enum ttm_bo_type type;
43 struct reservation_object *resv;
44};
45
36/* bo virtual addresses in a vm */ 46/* bo virtual addresses in a vm */
37struct amdgpu_bo_va_mapping { 47struct amdgpu_bo_va_mapping {
38 struct amdgpu_bo_va *bo_va; 48 struct amdgpu_bo_va *bo_va;
@@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
196} 206}
197 207
198/** 208/**
209 * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
210 */
211static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
212{
213 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
214 unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
215 struct drm_mm_node *node = bo->tbo.mem.mm_node;
216 unsigned long pages_left;
217
218 if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
219 return false;
220
221 for (pages_left = bo->tbo.mem.num_pages; pages_left;
222 pages_left -= node->size, node++)
223 if (node->start < fpfn)
224 return true;
225
226 return false;
227}
228
229/**
199 * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced 230 * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
200 */ 231 */
201static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) 232static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
203 return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; 234 return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
204} 235}
205 236
206int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, 237int amdgpu_bo_create(struct amdgpu_device *adev,
207 int byte_align, u32 domain, 238 struct amdgpu_bo_param *bp,
208 u64 flags, enum ttm_bo_type type,
209 struct reservation_object *resv,
210 struct amdgpu_bo **bo_ptr); 239 struct amdgpu_bo **bo_ptr);
211int amdgpu_bo_create_reserved(struct amdgpu_device *adev, 240int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
212 unsigned long size, int align, 241 unsigned long size, int align,
@@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
230int amdgpu_bo_unpin(struct amdgpu_bo *bo); 259int amdgpu_bo_unpin(struct amdgpu_bo *bo);
231int amdgpu_bo_evict_vram(struct amdgpu_device *adev); 260int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
232int amdgpu_bo_init(struct amdgpu_device *adev); 261int amdgpu_bo_init(struct amdgpu_device *adev);
262int amdgpu_bo_late_init(struct amdgpu_device *adev);
233void amdgpu_bo_fini(struct amdgpu_device *adev); 263void amdgpu_bo_fini(struct amdgpu_device *adev);
234int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, 264int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
235 struct vm_area_struct *vma); 265 struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 361975cf45a9..b455da487782 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
77 } 77 }
78} 78}
79 79
80/**
81 * DOC: power_dpm_state
82 *
83 * This is a legacy interface and is only provided for backwards compatibility.
84 * The amdgpu driver provides a sysfs API for adjusting certain power
85 * related parameters. The file power_dpm_state is used for this.
86 * It accepts the following arguments:
87 * - battery
88 * - balanced
89 * - performance
90 *
91 * battery
92 *
93 * On older GPUs, the vbios provided a special power state for battery
94 * operation. Selecting battery switched to this state. This is no
95 * longer provided on newer GPUs so the option does nothing in that case.
96 *
97 * balanced
98 *
99 * On older GPUs, the vbios provided a special power state for balanced
100 * operation. Selecting balanced switched to this state. This is no
101 * longer provided on newer GPUs so the option does nothing in that case.
102 *
103 * performance
104 *
105 * On older GPUs, the vbios provided a special power state for performance
106 * operation. Selecting performance switched to this state. This is no
107 * longer provided on newer GPUs so the option does nothing in that case.
108 *
109 */
110
80static ssize_t amdgpu_get_dpm_state(struct device *dev, 111static ssize_t amdgpu_get_dpm_state(struct device *dev,
81 struct device_attribute *attr, 112 struct device_attribute *attr,
82 char *buf) 113 char *buf)
@@ -131,6 +162,59 @@ fail:
131 return count; 162 return count;
132} 163}
133 164
165
166/**
167 * DOC: power_dpm_force_performance_level
168 *
169 * The amdgpu driver provides a sysfs API for adjusting certain power
170 * related parameters. The file power_dpm_force_performance_level is
171 * used for this. It accepts the following arguments:
172 * - auto
173 * - low
174 * - high
175 * - manual
176 * - GPU fan
177 * - profile_standard
178 * - profile_min_sclk
179 * - profile_min_mclk
180 * - profile_peak
181 *
182 * auto
183 *
184 * When auto is selected, the driver will attempt to dynamically select
185 * the optimal power profile for current conditions in the driver.
186 *
187 * low
188 *
189 * When low is selected, the clocks are forced to the lowest power state.
190 *
191 * high
192 *
193 * When high is selected, the clocks are forced to the highest power state.
194 *
195 * manual
196 *
197 * When manual is selected, the user can manually adjust which power states
198 * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
199 * and pp_dpm_pcie files and adjust the power state transition heuristics
200 * via the pp_power_profile_mode sysfs file.
201 *
202 * profile_standard
203 * profile_min_sclk
204 * profile_min_mclk
205 * profile_peak
206 *
207 * When the profiling modes are selected, clock and power gating are
208 * disabled and the clocks are set for different profiling cases. This
209 * mode is recommended for profiling specific work loads where you do
210 * not want clock or power gating for clock fluctuation to interfere
211 * with your results. profile_standard sets the clocks to a fixed clock
212 * level which varies from asic to asic. profile_min_sclk forces the sclk
213 * to the lowest level. profile_min_mclk forces the mclk to the lowest level.
214 * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
215 *
216 */
217
134static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, 218static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
135 struct device_attribute *attr, 219 struct device_attribute *attr,
136 char *buf) 220 char *buf)
@@ -324,6 +408,17 @@ fail:
324 return count; 408 return count;
325} 409}
326 410
411/**
412 * DOC: pp_table
413 *
414 * The amdgpu driver provides a sysfs API for uploading new powerplay
415 * tables. The file pp_table is used for this. Reading the file
416 * will dump the current power play table. Writing to the file
417 * will attempt to upload a new powerplay table and re-initialize
418 * powerplay using that new table.
419 *
420 */
421
327static ssize_t amdgpu_get_pp_table(struct device *dev, 422static ssize_t amdgpu_get_pp_table(struct device *dev,
328 struct device_attribute *attr, 423 struct device_attribute *attr,
329 char *buf) 424 char *buf)
@@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
360 return count; 455 return count;
361} 456}
362 457
458/**
459 * DOC: pp_od_clk_voltage
460 *
461 * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
462 * in each power level within a power state. The pp_od_clk_voltage is used for
463 * this.
464 *
465 * Reading the file will display:
466 * - a list of engine clock levels and voltages labeled OD_SCLK
467 * - a list of memory clock levels and voltages labeled OD_MCLK
468 * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
469 *
470 * To manually adjust these settings, first select manual using
471 * power_dpm_force_performance_level. Enter a new value for each
472 * level by writing a string that contains "s/m level clock voltage" to
473 * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
474 * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
475 * 810 mV. When you have edited all of the states as needed, write
476 * "c" (commit) to the file to commit your changes. If you want to reset to the
477 * default power levels, write "r" (reset) to the file to reset them.
478 *
479 */
480
363static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, 481static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
364 struct device_attribute *attr, 482 struct device_attribute *attr,
365 const char *buf, 483 const char *buf,
@@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
437 if (adev->powerplay.pp_funcs->print_clock_levels) { 555 if (adev->powerplay.pp_funcs->print_clock_levels) {
438 size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); 556 size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
439 size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); 557 size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
558 size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
440 return size; 559 return size;
441 } else { 560 } else {
442 return snprintf(buf, PAGE_SIZE, "\n"); 561 return snprintf(buf, PAGE_SIZE, "\n");
@@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
444 563
445} 564}
446 565
566/**
567 * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
568 *
569 * The amdgpu driver provides a sysfs API for adjusting what power levels
570 * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk,
571 * and pp_dpm_pcie are used for this.
572 *
573 * Reading back the files will show you the available power levels within
574 * the power state and the clock information for those levels.
575 *
576 * To manually adjust these states, first select manual using
577 * power_dpm_force_performance_level.
578 * Secondly,Enter a new value for each level by inputing a string that
579 * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
580 * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
581 */
582
447static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, 583static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
448 struct device_attribute *attr, 584 struct device_attribute *attr,
449 char *buf) 585 char *buf)
@@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
466 struct amdgpu_device *adev = ddev->dev_private; 602 struct amdgpu_device *adev = ddev->dev_private;
467 int ret; 603 int ret;
468 long level; 604 long level;
469 uint32_t i, mask = 0; 605 uint32_t mask = 0;
470 char sub_str[2]; 606 char *sub_str = NULL;
607 char *tmp;
608 char buf_cpy[count];
609 const char delimiter[3] = {' ', '\n', '\0'};
471 610
472 for (i = 0; i < strlen(buf); i++) { 611 memcpy(buf_cpy, buf, count+1);
473 if (*(buf + i) == '\n') 612 tmp = buf_cpy;
474 continue; 613 while (tmp[0]) {
475 sub_str[0] = *(buf + i); 614 sub_str = strsep(&tmp, delimiter);
476 sub_str[1] = '\0'; 615 if (strlen(sub_str)) {
477 ret = kstrtol(sub_str, 0, &level); 616 ret = kstrtol(sub_str, 0, &level);
478 617
479 if (ret) { 618 if (ret) {
480 count = -EINVAL; 619 count = -EINVAL;
481 goto fail; 620 goto fail;
482 } 621 }
483 mask |= 1 << level; 622 mask |= 1 << level;
623 } else
624 break;
484 } 625 }
485
486 if (adev->powerplay.pp_funcs->force_clock_level) 626 if (adev->powerplay.pp_funcs->force_clock_level)
487 amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); 627 amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
488 628
@@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
512 struct amdgpu_device *adev = ddev->dev_private; 652 struct amdgpu_device *adev = ddev->dev_private;
513 int ret; 653 int ret;
514 long level; 654 long level;
515 uint32_t i, mask = 0; 655 uint32_t mask = 0;
516 char sub_str[2]; 656 char *sub_str = NULL;
657 char *tmp;
658 char buf_cpy[count];
659 const char delimiter[3] = {' ', '\n', '\0'};
517 660
518 for (i = 0; i < strlen(buf); i++) { 661 memcpy(buf_cpy, buf, count+1);
519 if (*(buf + i) == '\n') 662 tmp = buf_cpy;
520 continue; 663 while (tmp[0]) {
521 sub_str[0] = *(buf + i); 664 sub_str = strsep(&tmp, delimiter);
522 sub_str[1] = '\0'; 665 if (strlen(sub_str)) {
523 ret = kstrtol(sub_str, 0, &level); 666 ret = kstrtol(sub_str, 0, &level);
524 667
525 if (ret) { 668 if (ret) {
526 count = -EINVAL; 669 count = -EINVAL;
527 goto fail; 670 goto fail;
528 } 671 }
529 mask |= 1 << level; 672 mask |= 1 << level;
673 } else
674 break;
530 } 675 }
531 if (adev->powerplay.pp_funcs->force_clock_level) 676 if (adev->powerplay.pp_funcs->force_clock_level)
532 amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); 677 amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
557 struct amdgpu_device *adev = ddev->dev_private; 702 struct amdgpu_device *adev = ddev->dev_private;
558 int ret; 703 int ret;
559 long level; 704 long level;
560 uint32_t i, mask = 0; 705 uint32_t mask = 0;
561 char sub_str[2]; 706 char *sub_str = NULL;
707 char *tmp;
708 char buf_cpy[count];
709 const char delimiter[3] = {' ', '\n', '\0'};
562 710
563 for (i = 0; i < strlen(buf); i++) { 711 memcpy(buf_cpy, buf, count+1);
564 if (*(buf + i) == '\n') 712 tmp = buf_cpy;
565 continue;
566 sub_str[0] = *(buf + i);
567 sub_str[1] = '\0';
568 ret = kstrtol(sub_str, 0, &level);
569 713
570 if (ret) { 714 while (tmp[0]) {
571 count = -EINVAL; 715 sub_str = strsep(&tmp, delimiter);
572 goto fail; 716 if (strlen(sub_str)) {
573 } 717 ret = kstrtol(sub_str, 0, &level);
574 mask |= 1 << level; 718
719 if (ret) {
720 count = -EINVAL;
721 goto fail;
722 }
723 mask |= 1 << level;
724 } else
725 break;
575 } 726 }
576 if (adev->powerplay.pp_funcs->force_clock_level) 727 if (adev->powerplay.pp_funcs->force_clock_level)
577 amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); 728 amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -668,6 +819,26 @@ fail:
668 return count; 819 return count;
669} 820}
670 821
822/**
823 * DOC: pp_power_profile_mode
824 *
825 * The amdgpu driver provides a sysfs API for adjusting the heuristics
826 * related to switching between power levels in a power state. The file
827 * pp_power_profile_mode is used for this.
828 *
829 * Reading this file outputs a list of all of the predefined power profiles
830 * and the relevant heuristics settings for that profile.
831 *
832 * To select a profile or create a custom profile, first select manual using
833 * power_dpm_force_performance_level. Writing the number of a predefined
834 * profile to pp_power_profile_mode will enable those heuristics. To
835 * create a custom set of heuristics, write a string of numbers to the file
836 * starting with the number of the custom profile along with a setting
837 * for each heuristic parameter. Due to differences across asic families
838 * the heuristic parameters vary from family to family.
839 *
840 */
841
671static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, 842static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
672 struct device_attribute *attr, 843 struct device_attribute *attr,
673 char *buf) 844 char *buf)
@@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
1020{ 1191{
1021 struct amdgpu_device *adev = dev_get_drvdata(dev); 1192 struct amdgpu_device *adev = dev_get_drvdata(dev);
1022 struct drm_device *ddev = adev->ddev; 1193 struct drm_device *ddev = adev->ddev;
1023 struct pp_gpu_power query = {0}; 1194 u32 query = 0;
1024 int r, size = sizeof(query); 1195 int r, size = sizeof(u32);
1025 unsigned uw; 1196 unsigned uw;
1026 1197
1027 /* Can't get power when the card is off */ 1198 /* Can't get power when the card is off */
@@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
1041 return r; 1212 return r;
1042 1213
1043 /* convert to microwatts */ 1214 /* convert to microwatts */
1044 uw = (query.average_gpu_power >> 8) * 1000000; 1215 uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
1045 1216
1046 return snprintf(buf, PAGE_SIZE, "%u\n", uw); 1217 return snprintf(buf, PAGE_SIZE, "%u\n", uw);
1047} 1218}
@@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
1109 return count; 1280 return count;
1110} 1281}
1111 1282
1283
1284/**
1285 * DOC: hwmon
1286 *
1287 * The amdgpu driver exposes the following sensor interfaces:
1288 * - GPU temperature (via the on-die sensor)
1289 * - GPU voltage
1290 * - Northbridge voltage (APUs only)
1291 * - GPU power
1292 * - GPU fan
1293 *
1294 * hwmon interfaces for GPU temperature:
1295 * - temp1_input: the on die GPU temperature in millidegrees Celsius
1296 * - temp1_crit: temperature critical max value in millidegrees Celsius
1297 * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
1298 *
1299 * hwmon interfaces for GPU voltage:
1300 * - in0_input: the voltage on the GPU in millivolts
1301 * - in1_input: the voltage on the Northbridge in millivolts
1302 *
1303 * hwmon interfaces for GPU power:
1304 * - power1_average: average power used by the GPU in microWatts
1305 * - power1_cap_min: minimum cap supported in microWatts
1306 * - power1_cap_max: maximum cap supported in microWatts
1307 * - power1_cap: selected power cap in microWatts
1308 *
1309 * hwmon interfaces for GPU fan:
1310 * - pwm1: pulse width modulation fan level (0-255)
1311 * - pwm1_enable: pulse width modulation fan control method
1312 * 0: no fan speed control
1313 * 1: manual fan speed control using pwm interface
1314 * 2: automatic fan speed control
1315 * - pwm1_min: pulse width modulation fan control minimum level (0)
1316 * - pwm1_max: pulse width modulation fan control maximum level (255)
1317 * - fan1_input: fan speed in RPM
1318 *
1319 * You can use hwmon tools like sensors to view this information on your system.
1320 *
1321 */
1322
1112static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); 1323static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
1113static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); 1324static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
1114static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); 1325static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
1153 struct amdgpu_device *adev = dev_get_drvdata(dev); 1364 struct amdgpu_device *adev = dev_get_drvdata(dev);
1154 umode_t effective_mode = attr->mode; 1365 umode_t effective_mode = attr->mode;
1155 1366
1156 /* handle non-powerplay limitations */ 1367
1157 if (!adev->powerplay.pp_handle) { 1368 /* Skip fan attributes if fan is not present */
1158 /* Skip fan attributes if fan is not present */ 1369 if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
1159 if (adev->pm.no_fan && 1370 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
1160 (attr == &sensor_dev_attr_pwm1.dev_attr.attr || 1371 attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
1161 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || 1372 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
1162 attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || 1373 attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
1163 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) 1374 return 0;
1164 return 0;
1165 /* requires powerplay */
1166 if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
1167 return 0;
1168 }
1169 1375
1170 /* Skip limit attributes if DPM is not enabled */ 1376 /* Skip limit attributes if DPM is not enabled */
1171 if (!adev->pm.dpm_enabled && 1377 if (!adev->pm.dpm_enabled &&
@@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
1658 1864
1659void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) 1865void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
1660{ 1866{
1661 struct drm_device *ddev = adev->ddev;
1662 struct drm_crtc *crtc;
1663 struct amdgpu_crtc *amdgpu_crtc;
1664 int i = 0; 1867 int i = 0;
1665 1868
1666 if (!adev->pm.dpm_enabled) 1869 if (!adev->pm.dpm_enabled)
@@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
1676 } 1879 }
1677 1880
1678 if (adev->powerplay.pp_funcs->dispatch_tasks) { 1881 if (adev->powerplay.pp_funcs->dispatch_tasks) {
1882 if (!amdgpu_device_has_dc_support(adev)) {
1883 mutex_lock(&adev->pm.mutex);
1884 amdgpu_dpm_get_active_displays(adev);
1885 adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
1886 adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
1887 adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
1888 /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
1889 if (adev->pm.pm_display_cfg.vrefresh > 120)
1890 adev->pm.pm_display_cfg.min_vblank_time = 0;
1891 if (adev->powerplay.pp_funcs->display_configuration_change)
1892 adev->powerplay.pp_funcs->display_configuration_change(
1893 adev->powerplay.pp_handle,
1894 &adev->pm.pm_display_cfg);
1895 mutex_unlock(&adev->pm.mutex);
1896 }
1679 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); 1897 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
1680 } else { 1898 } else {
1681 mutex_lock(&adev->pm.mutex); 1899 mutex_lock(&adev->pm.mutex);
1682 adev->pm.dpm.new_active_crtcs = 0; 1900 amdgpu_dpm_get_active_displays(adev);
1683 adev->pm.dpm.new_active_crtc_count = 0;
1684 if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
1685 list_for_each_entry(crtc,
1686 &ddev->mode_config.crtc_list, head) {
1687 amdgpu_crtc = to_amdgpu_crtc(crtc);
1688 if (amdgpu_crtc->enabled) {
1689 adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
1690 adev->pm.dpm.new_active_crtc_count++;
1691 }
1692 }
1693 }
1694 /* update battery/ac status */ 1901 /* update battery/ac status */
1695 if (power_supply_is_system_supplied() > 0) 1902 if (power_supply_is_system_supplied() > 0)
1696 adev->pm.dpm.ac_power = true; 1903 adev->pm.dpm.ac_power = true;
@@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
1711static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) 1918static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
1712{ 1919{
1713 uint32_t value; 1920 uint32_t value;
1714 struct pp_gpu_power query = {0}; 1921 uint32_t query = 0;
1715 int size; 1922 int size;
1716 1923
1717 /* sanity check PP is enabled */ 1924 /* sanity check PP is enabled */
@@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
1734 seq_printf(m, "\t%u mV (VDDGFX)\n", value); 1941 seq_printf(m, "\t%u mV (VDDGFX)\n", value);
1735 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) 1942 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
1736 seq_printf(m, "\t%u mV (VDDNB)\n", value); 1943 seq_printf(m, "\t%u mV (VDDNB)\n", value);
1737 size = sizeof(query); 1944 size = sizeof(uint32_t);
1738 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { 1945 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
1739 seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, 1946 seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
1740 query.vddc_power & 0xff);
1741 seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
1742 query.vddci_power & 0xff);
1743 seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
1744 query.max_gpu_power & 0xff);
1745 seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
1746 query.average_gpu_power & 0xff);
1747 }
1748 size = sizeof(value); 1947 size = sizeof(value);
1749 seq_printf(m, "\n"); 1948 seq_printf(m, "\n");
1750 1949
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4b584cb75bf4..4683626b065f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
102 struct reservation_object *resv = attach->dmabuf->resv; 102 struct reservation_object *resv = attach->dmabuf->resv;
103 struct amdgpu_device *adev = dev->dev_private; 103 struct amdgpu_device *adev = dev->dev_private;
104 struct amdgpu_bo *bo; 104 struct amdgpu_bo *bo;
105 struct amdgpu_bo_param bp;
105 int ret; 106 int ret;
106 107
108 memset(&bp, 0, sizeof(bp));
109 bp.size = attach->dmabuf->size;
110 bp.byte_align = PAGE_SIZE;
111 bp.domain = AMDGPU_GEM_DOMAIN_CPU;
112 bp.flags = 0;
113 bp.type = ttm_bo_type_sg;
114 bp.resv = resv;
107 ww_mutex_lock(&resv->lock, NULL); 115 ww_mutex_lock(&resv->lock, NULL);
108 ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, 116 ret = amdgpu_bo_create(adev, &bp, &bo);
109 AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
110 resv, &bo);
111 if (ret) 117 if (ret)
112 goto error; 118 goto error;
113 119
@@ -209,7 +215,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
209 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); 215 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
210 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 216 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
211 struct ttm_operation_ctx ctx = { true, false }; 217 struct ttm_operation_ctx ctx = { true, false };
212 u32 domain = amdgpu_display_framebuffer_domains(adev); 218 u32 domain = amdgpu_display_supported_domains(adev);
213 int ret; 219 int ret;
214 bool reads = (direction == DMA_BIDIRECTIONAL || 220 bool reads = (direction == DMA_BIDIRECTIONAL ||
215 direction == DMA_FROM_DEVICE); 221 direction == DMA_FROM_DEVICE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c7d43e064fc7..9f1a5bd39ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -52,6 +52,7 @@ static int psp_sw_init(void *handle)
52 switch (adev->asic_type) { 52 switch (adev->asic_type) {
53 case CHIP_VEGA10: 53 case CHIP_VEGA10:
54 case CHIP_VEGA12: 54 case CHIP_VEGA12:
55 case CHIP_VEGA20:
55 psp_v3_1_set_psp_funcs(psp); 56 psp_v3_1_set_psp_funcs(psp);
56 break; 57 break;
57 case CHIP_RAVEN: 58 case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 262c1267249e..8af16e81c7d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
66 u32 ring, 66 u32 ring,
67 struct amdgpu_ring **out_ring) 67 struct amdgpu_ring **out_ring)
68{ 68{
69 u32 instance;
70
69 switch (mapper->hw_ip) { 71 switch (mapper->hw_ip) {
70 case AMDGPU_HW_IP_GFX: 72 case AMDGPU_HW_IP_GFX:
71 *out_ring = &adev->gfx.gfx_ring[ring]; 73 *out_ring = &adev->gfx.gfx_ring[ring];
@@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
77 *out_ring = &adev->sdma.instance[ring].ring; 79 *out_ring = &adev->sdma.instance[ring].ring;
78 break; 80 break;
79 case AMDGPU_HW_IP_UVD: 81 case AMDGPU_HW_IP_UVD:
80 *out_ring = &adev->uvd.ring; 82 instance = ring;
83 *out_ring = &adev->uvd.inst[instance].ring;
81 break; 84 break;
82 case AMDGPU_HW_IP_VCE: 85 case AMDGPU_HW_IP_VCE:
83 *out_ring = &adev->vce.ring[ring]; 86 *out_ring = &adev->vce.ring[ring];
84 break; 87 break;
85 case AMDGPU_HW_IP_UVD_ENC: 88 case AMDGPU_HW_IP_UVD_ENC:
86 *out_ring = &adev->uvd.ring_enc[ring]; 89 instance = ring / adev->uvd.num_enc_rings;
90 *out_ring =
91 &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
87 break; 92 break;
88 case AMDGPU_HW_IP_VCN_DEC: 93 case AMDGPU_HW_IP_VCN_DEC:
89 *out_ring = &adev->vcn.ring_dec; 94 *out_ring = &adev->vcn.ring_dec;
@@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
240 ip_num_rings = adev->sdma.num_instances; 245 ip_num_rings = adev->sdma.num_instances;
241 break; 246 break;
242 case AMDGPU_HW_IP_UVD: 247 case AMDGPU_HW_IP_UVD:
243 ip_num_rings = 1; 248 ip_num_rings = adev->uvd.num_uvd_inst;
244 break; 249 break;
245 case AMDGPU_HW_IP_VCE: 250 case AMDGPU_HW_IP_VCE:
246 ip_num_rings = adev->vce.num_rings; 251 ip_num_rings = adev->vce.num_rings;
247 break; 252 break;
248 case AMDGPU_HW_IP_UVD_ENC: 253 case AMDGPU_HW_IP_UVD_ENC:
249 ip_num_rings = adev->uvd.num_enc_rings; 254 ip_num_rings =
255 adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
250 break; 256 break;
251 case AMDGPU_HW_IP_VCN_DEC: 257 case AMDGPU_HW_IP_VCN_DEC:
252 ip_num_rings = 1; 258 ip_num_rings = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d5f526f38e50..c6850b629d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
362 362
363 dma_fence_put(ring->vmid_wait); 363 dma_fence_put(ring->vmid_wait);
364 ring->vmid_wait = NULL; 364 ring->vmid_wait = NULL;
365 ring->me = 0;
365 366
366 ring->adev->rings[ring->idx] = NULL; 367 ring->adev->rings[ring->idx] = NULL;
367} 368}
@@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
459 spin_unlock(&adev->ring_lru_list_lock); 460 spin_unlock(&adev->ring_lru_list_lock);
460} 461}
461 462
463/**
464 * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
465 *
466 * @adev: amdgpu_device pointer
467 * @reg0: register to write
468 * @reg1: register to wait on
469 * @ref: reference value to write/wait on
470 * @mask: mask to wait on
471 *
472 * Helper for rings that don't support write and wait in a
473 * single oneshot packet.
474 */
475void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
476 uint32_t reg0, uint32_t reg1,
477 uint32_t ref, uint32_t mask)
478{
479 amdgpu_ring_emit_wreg(ring, reg0, ref);
480 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
481}
482
462/* 483/*
463 * Debugfs info 484 * Debugfs info
464 */ 485 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1a5911882657..1513124c5659 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
29#include <drm/drm_print.h> 29#include <drm/drm_print.h>
30 30
31/* max number of rings */ 31/* max number of rings */
32#define AMDGPU_MAX_RINGS 18 32#define AMDGPU_MAX_RINGS 21
33#define AMDGPU_MAX_GFX_RINGS 1 33#define AMDGPU_MAX_GFX_RINGS 1
34#define AMDGPU_MAX_COMPUTE_RINGS 8 34#define AMDGPU_MAX_COMPUTE_RINGS 8
35#define AMDGPU_MAX_VCE_RINGS 3 35#define AMDGPU_MAX_VCE_RINGS 3
@@ -42,6 +42,7 @@
42 42
43#define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 43#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
44#define AMDGPU_FENCE_FLAG_INT (1 << 1) 44#define AMDGPU_FENCE_FLAG_INT (1 << 1)
45#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
45 46
46enum amdgpu_ring_type { 47enum amdgpu_ring_type {
47 AMDGPU_RING_TYPE_GFX, 48 AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
90 unsigned irq_type); 91 unsigned irq_type);
91void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); 92void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
92void amdgpu_fence_driver_resume(struct amdgpu_device *adev); 93void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
93int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); 94int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
95 unsigned flags);
94int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); 96int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
95void amdgpu_fence_process(struct amdgpu_ring *ring); 97void amdgpu_fence_process(struct amdgpu_ring *ring);
96int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 98int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -154,6 +156,9 @@ struct amdgpu_ring_funcs {
154 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); 156 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
155 void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, 157 void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
156 uint32_t val, uint32_t mask); 158 uint32_t val, uint32_t mask);
159 void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
160 uint32_t reg0, uint32_t reg1,
161 uint32_t ref, uint32_t mask);
157 void (*emit_tmz)(struct amdgpu_ring *ring, bool start); 162 void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
158 /* priority functions */ 163 /* priority functions */
159 void (*set_priority) (struct amdgpu_ring *ring, 164 void (*set_priority) (struct amdgpu_ring *ring,
@@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
228 int *blacklist, int num_blacklist, 233 int *blacklist, int num_blacklist,
229 bool lru_pipe_order, struct amdgpu_ring **ring); 234 bool lru_pipe_order, struct amdgpu_ring **ring);
230void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); 235void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
236void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
237 uint32_t reg0, uint32_t val0,
238 uint32_t reg1, uint32_t val1);
239
231static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) 240static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
232{ 241{
233 int i = 0; 242 int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 2dbe87591f81..d167e8ab76d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
33 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 33 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
34 struct amdgpu_bo *vram_obj = NULL; 34 struct amdgpu_bo *vram_obj = NULL;
35 struct amdgpu_bo **gtt_obj = NULL; 35 struct amdgpu_bo **gtt_obj = NULL;
36 struct amdgpu_bo_param bp;
36 uint64_t gart_addr, vram_addr; 37 uint64_t gart_addr, vram_addr;
37 unsigned n, size; 38 unsigned n, size;
38 int i, r; 39 int i, r;
@@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
58 r = 1; 59 r = 1;
59 goto out_cleanup; 60 goto out_cleanup;
60 } 61 }
61 62 memset(&bp, 0, sizeof(bp));
62 r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, 63 bp.size = size;
63 ttm_bo_type_kernel, NULL, &vram_obj); 64 bp.byte_align = PAGE_SIZE;
65 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
66 bp.flags = 0;
67 bp.type = ttm_bo_type_kernel;
68 bp.resv = NULL;
69
70 r = amdgpu_bo_create(adev, &bp, &vram_obj);
64 if (r) { 71 if (r) {
65 DRM_ERROR("Failed to create VRAM object\n"); 72 DRM_ERROR("Failed to create VRAM object\n");
66 goto out_cleanup; 73 goto out_cleanup;
@@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
79 void **vram_start, **vram_end; 86 void **vram_start, **vram_end;
80 struct dma_fence *fence = NULL; 87 struct dma_fence *fence = NULL;
81 88
82 r = amdgpu_bo_create(adev, size, PAGE_SIZE, 89 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
83 AMDGPU_GEM_DOMAIN_GTT, 0, 90 r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
84 ttm_bo_type_kernel, NULL, gtt_obj + i);
85 if (r) { 91 if (r) {
86 DRM_ERROR("Failed to create GTT object %d\n", i); 92 DRM_ERROR("Failed to create GTT object %d\n", i);
87 goto out_lclean; 93 goto out_lclean;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263ab6e16..e96e26d3f3b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
275 ), 275 ),
276 276
277 TP_fast_assign( 277 TP_fast_assign(
278 __entry->bo = bo_va->base.bo; 278 __entry->bo = bo_va ? bo_va->base.bo : NULL;
279 __entry->start = mapping->start; 279 __entry->start = mapping->start;
280 __entry->last = mapping->last; 280 __entry->last = mapping->last;
281 __entry->offset = mapping->offset; 281 __entry->offset = mapping->offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3ff9cd0..e93a0a237dc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
63/* 63/*
64 * Global memory. 64 * Global memory.
65 */ 65 */
66
67/**
68 * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
69 * memory object
70 *
71 * @ref: Object for initialization.
72 *
73 * This is called by drm_global_item_ref() when an object is being
74 * initialized.
75 */
66static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) 76static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
67{ 77{
68 return ttm_mem_global_init(ref->object); 78 return ttm_mem_global_init(ref->object);
69} 79}
70 80
81/**
82 * amdgpu_ttm_mem_global_release - Drop reference to a memory object
83 *
84 * @ref: Object being removed
85 *
86 * This is called by drm_global_item_unref() when an object is being
87 * released.
88 */
71static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) 89static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
72{ 90{
73 ttm_mem_global_release(ref->object); 91 ttm_mem_global_release(ref->object);
74} 92}
75 93
94/**
95 * amdgpu_ttm_global_init - Initialize global TTM memory reference
96 * structures.
97 *
98 * @adev: AMDGPU device for which the global structures need to be
99 * registered.
100 *
101 * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
102 * during bring up.
103 */
76static int amdgpu_ttm_global_init(struct amdgpu_device *adev) 104static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
77{ 105{
78 struct drm_global_reference *global_ref; 106 struct drm_global_reference *global_ref;
@@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
80 struct drm_sched_rq *rq; 108 struct drm_sched_rq *rq;
81 int r; 109 int r;
82 110
111 /* ensure reference is false in case init fails */
83 adev->mman.mem_global_referenced = false; 112 adev->mman.mem_global_referenced = false;
113
84 global_ref = &adev->mman.mem_global_ref; 114 global_ref = &adev->mman.mem_global_ref;
85 global_ref->global_type = DRM_GLOBAL_TTM_MEM; 115 global_ref->global_type = DRM_GLOBAL_TTM_MEM;
86 global_ref->size = sizeof(struct ttm_mem_global); 116 global_ref->size = sizeof(struct ttm_mem_global);
@@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
111 ring = adev->mman.buffer_funcs_ring; 141 ring = adev->mman.buffer_funcs_ring;
112 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 142 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
113 r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, 143 r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
114 rq, amdgpu_sched_jobs, NULL); 144 rq, NULL);
115 if (r) { 145 if (r) {
116 DRM_ERROR("Failed setting up TTM BO move run queue.\n"); 146 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
117 goto error_entity; 147 goto error_entity;
@@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
146 return 0; 176 return 0;
147} 177}
148 178
179/**
180 * amdgpu_init_mem_type - Initialize a memory manager for a specific
181 * type of memory request.
182 *
183 * @bdev: The TTM BO device object (contains a reference to
184 * amdgpu_device)
185 * @type: The type of memory requested
186 * @man:
187 *
188 * This is called by ttm_bo_init_mm() when a buffer object is being
189 * initialized.
190 */
149static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, 191static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
150 struct ttm_mem_type_manager *man) 192 struct ttm_mem_type_manager *man)
151{ 193{
@@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
161 man->default_caching = TTM_PL_FLAG_CACHED; 203 man->default_caching = TTM_PL_FLAG_CACHED;
162 break; 204 break;
163 case TTM_PL_TT: 205 case TTM_PL_TT:
206 /* GTT memory */
164 man->func = &amdgpu_gtt_mgr_func; 207 man->func = &amdgpu_gtt_mgr_func;
165 man->gpu_offset = adev->gmc.gart_start; 208 man->gpu_offset = adev->gmc.gart_start;
166 man->available_caching = TTM_PL_MASK_CACHING; 209 man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
193 return 0; 236 return 0;
194} 237}
195 238
239/**
240 * amdgpu_evict_flags - Compute placement flags
241 *
242 * @bo: The buffer object to evict
243 * @placement: Possible destination(s) for evicted BO
244 *
245 * Fill in placement data when ttm_bo_evict() is called
246 */
196static void amdgpu_evict_flags(struct ttm_buffer_object *bo, 247static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
197 struct ttm_placement *placement) 248 struct ttm_placement *placement)
198{ 249{
@@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
204 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM 255 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
205 }; 256 };
206 257
258 /* Don't handle scatter gather BOs */
207 if (bo->type == ttm_bo_type_sg) { 259 if (bo->type == ttm_bo_type_sg) {
208 placement->num_placement = 0; 260 placement->num_placement = 0;
209 placement->num_busy_placement = 0; 261 placement->num_busy_placement = 0;
210 return; 262 return;
211 } 263 }
212 264
265 /* Object isn't an AMDGPU object so ignore */
213 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { 266 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
214 placement->placement = &placements; 267 placement->placement = &placements;
215 placement->busy_placement = &placements; 268 placement->busy_placement = &placements;
@@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
217 placement->num_busy_placement = 1; 270 placement->num_busy_placement = 1;
218 return; 271 return;
219 } 272 }
273
220 abo = ttm_to_amdgpu_bo(bo); 274 abo = ttm_to_amdgpu_bo(bo);
221 switch (bo->mem.mem_type) { 275 switch (bo->mem.mem_type) {
222 case TTM_PL_VRAM: 276 case TTM_PL_VRAM:
223 if (!adev->mman.buffer_funcs_enabled) { 277 if (!adev->mman.buffer_funcs_enabled) {
278 /* Move to system memory */
224 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 279 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
225 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && 280 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
226 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 281 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
227 unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; 282 amdgpu_bo_in_cpu_visible_vram(abo)) {
228 struct drm_mm_node *node = bo->mem.mm_node;
229 unsigned long pages_left;
230
231 for (pages_left = bo->mem.num_pages;
232 pages_left;
233 pages_left -= node->size, node++) {
234 if (node->start < fpfn)
235 break;
236 }
237
238 if (!pages_left)
239 goto gtt;
240 283
241 /* Try evicting to the CPU inaccessible part of VRAM 284 /* Try evicting to the CPU inaccessible part of VRAM
242 * first, but only set GTT as busy placement, so this 285 * first, but only set GTT as busy placement, so this
@@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
245 */ 288 */
246 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | 289 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
247 AMDGPU_GEM_DOMAIN_GTT); 290 AMDGPU_GEM_DOMAIN_GTT);
248 abo->placements[0].fpfn = fpfn; 291 abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
249 abo->placements[0].lpfn = 0; 292 abo->placements[0].lpfn = 0;
250 abo->placement.busy_placement = &abo->placements[1]; 293 abo->placement.busy_placement = &abo->placements[1];
251 abo->placement.num_busy_placement = 1; 294 abo->placement.num_busy_placement = 1;
252 } else { 295 } else {
253gtt: 296 /* Move to GTT memory */
254 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); 297 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
255 } 298 }
256 break; 299 break;
@@ -261,6 +304,15 @@ gtt:
261 *placement = abo->placement; 304 *placement = abo->placement;
262} 305}
263 306
307/**
308 * amdgpu_verify_access - Verify access for a mmap call
309 *
310 * @bo: The buffer object to map
311 * @filp: The file pointer from the process performing the mmap
312 *
313 * This is called by ttm_bo_mmap() to verify whether a process
314 * has the right to mmap a BO to their process space.
315 */
264static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) 316static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
265{ 317{
266 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); 318 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
278 filp->private_data); 330 filp->private_data);
279} 331}
280 332
333/**
334 * amdgpu_move_null - Register memory for a buffer object
335 *
336 * @bo: The bo to assign the memory to
337 * @new_mem: The memory to be assigned.
338 *
339 * Assign the memory from new_mem to the memory of the buffer object
340 * bo.
341 */
281static void amdgpu_move_null(struct ttm_buffer_object *bo, 342static void amdgpu_move_null(struct ttm_buffer_object *bo,
282 struct ttm_mem_reg *new_mem) 343 struct ttm_mem_reg *new_mem)
283{ 344{
@@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
288 new_mem->mm_node = NULL; 349 new_mem->mm_node = NULL;
289} 350}
290 351
352/**
353 * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT
354 * buffer.
355 */
291static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, 356static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
292 struct drm_mm_node *mm_node, 357 struct drm_mm_node *mm_node,
293 struct ttm_mem_reg *mem) 358 struct ttm_mem_reg *mem)
@@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
302} 367}
303 368
304/** 369/**
305 * amdgpu_find_mm_node - Helper function finds the drm_mm_node 370 * amdgpu_find_mm_node - Helper function finds the drm_mm_node
306 * corresponding to @offset. It also modifies the offset to be 371 * corresponding to @offset. It also modifies
307 * within the drm_mm_node returned 372 * the offset to be within the drm_mm_node
373 * returned
308 */ 374 */
309static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, 375static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
310 unsigned long *offset) 376 unsigned long *offset)
@@ -443,7 +509,12 @@ error:
443 return r; 509 return r;
444} 510}
445 511
446 512/**
513 * amdgpu_move_blit - Copy an entire buffer to another buffer
514 *
515 * This is a helper called by amdgpu_bo_move() and
516 * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
517 */
447static int amdgpu_move_blit(struct ttm_buffer_object *bo, 518static int amdgpu_move_blit(struct ttm_buffer_object *bo,
448 bool evict, bool no_wait_gpu, 519 bool evict, bool no_wait_gpu,
449 struct ttm_mem_reg *new_mem, 520 struct ttm_mem_reg *new_mem,
@@ -478,6 +549,11 @@ error:
478 return r; 549 return r;
479} 550}
480 551
552/**
553 * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
554 *
555 * Called by amdgpu_bo_move().
556 */
481static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, 557static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
482 struct ttm_operation_ctx *ctx, 558 struct ttm_operation_ctx *ctx,
483 struct ttm_mem_reg *new_mem) 559 struct ttm_mem_reg *new_mem)
@@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
490 int r; 566 int r;
491 567
492 adev = amdgpu_ttm_adev(bo->bdev); 568 adev = amdgpu_ttm_adev(bo->bdev);
569
570 /* create space/pages for new_mem in GTT space */
493 tmp_mem = *new_mem; 571 tmp_mem = *new_mem;
494 tmp_mem.mm_node = NULL; 572 tmp_mem.mm_node = NULL;
495 placement.num_placement = 1; 573 placement.num_placement = 1;
@@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
504 return r; 582 return r;
505 } 583 }
506 584
585 /* set caching flags */
507 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); 586 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
508 if (unlikely(r)) { 587 if (unlikely(r)) {
509 goto out_cleanup; 588 goto out_cleanup;
510 } 589 }
511 590
591 /* Bind the memory to the GTT space */
512 r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); 592 r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
513 if (unlikely(r)) { 593 if (unlikely(r)) {
514 goto out_cleanup; 594 goto out_cleanup;
515 } 595 }
596
597 /* blit VRAM to GTT */
516 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); 598 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
517 if (unlikely(r)) { 599 if (unlikely(r)) {
518 goto out_cleanup; 600 goto out_cleanup;
519 } 601 }
602
603 /* move BO (in tmp_mem) to new_mem */
520 r = ttm_bo_move_ttm(bo, ctx, new_mem); 604 r = ttm_bo_move_ttm(bo, ctx, new_mem);
521out_cleanup: 605out_cleanup:
522 ttm_bo_mem_put(bo, &tmp_mem); 606 ttm_bo_mem_put(bo, &tmp_mem);
523 return r; 607 return r;
524} 608}
525 609
610/**
611 * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
612 *
613 * Called by amdgpu_bo_move().
614 */
526static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, 615static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
527 struct ttm_operation_ctx *ctx, 616 struct ttm_operation_ctx *ctx,
528 struct ttm_mem_reg *new_mem) 617 struct ttm_mem_reg *new_mem)
@@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
535 int r; 624 int r;
536 625
537 adev = amdgpu_ttm_adev(bo->bdev); 626 adev = amdgpu_ttm_adev(bo->bdev);
627
628 /* make space in GTT for old_mem buffer */
538 tmp_mem = *new_mem; 629 tmp_mem = *new_mem;
539 tmp_mem.mm_node = NULL; 630 tmp_mem.mm_node = NULL;
540 placement.num_placement = 1; 631 placement.num_placement = 1;
@@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
548 if (unlikely(r)) { 639 if (unlikely(r)) {
549 return r; 640 return r;
550 } 641 }
642
643 /* move/bind old memory to GTT space */
551 r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); 644 r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
552 if (unlikely(r)) { 645 if (unlikely(r)) {
553 goto out_cleanup; 646 goto out_cleanup;
554 } 647 }
648
649 /* copy to VRAM */
555 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); 650 r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
556 if (unlikely(r)) { 651 if (unlikely(r)) {
557 goto out_cleanup; 652 goto out_cleanup;
@@ -561,6 +656,11 @@ out_cleanup:
561 return r; 656 return r;
562} 657}
563 658
659/**
660 * amdgpu_bo_move - Move a buffer object to a new memory location
661 *
662 * Called by ttm_bo_handle_move_mem()
663 */
564static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, 664static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
565 struct ttm_operation_ctx *ctx, 665 struct ttm_operation_ctx *ctx,
566 struct ttm_mem_reg *new_mem) 666 struct ttm_mem_reg *new_mem)
@@ -626,6 +726,11 @@ memcpy:
626 return 0; 726 return 0;
627} 727}
628 728
729/**
730 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
731 *
732 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
733 */
629static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) 734static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
630{ 735{
631 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; 736 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +800,7 @@ struct amdgpu_ttm_tt {
695 struct ttm_dma_tt ttm; 800 struct ttm_dma_tt ttm;
696 u64 offset; 801 u64 offset;
697 uint64_t userptr; 802 uint64_t userptr;
698 struct mm_struct *usermm; 803 struct task_struct *usertask;
699 uint32_t userflags; 804 uint32_t userflags;
700 spinlock_t guptasklock; 805 spinlock_t guptasklock;
701 struct list_head guptasks; 806 struct list_head guptasks;
@@ -703,17 +808,29 @@ struct amdgpu_ttm_tt {
703 uint32_t last_set_pages; 808 uint32_t last_set_pages;
704}; 809};
705 810
811/**
812 * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to
813 * by a USERPTR pointer to memory
814 *
815 * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
816 * This provides a wrapper around the get_user_pages() call to provide
817 * device accessible pages that back user memory.
818 */
706int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) 819int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
707{ 820{
708 struct amdgpu_ttm_tt *gtt = (void *)ttm; 821 struct amdgpu_ttm_tt *gtt = (void *)ttm;
822 struct mm_struct *mm = gtt->usertask->mm;
709 unsigned int flags = 0; 823 unsigned int flags = 0;
710 unsigned pinned = 0; 824 unsigned pinned = 0;
711 int r; 825 int r;
712 826
827 if (!mm) /* Happens during process shutdown */
828 return -ESRCH;
829
713 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) 830 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
714 flags |= FOLL_WRITE; 831 flags |= FOLL_WRITE;
715 832
716 down_read(&current->mm->mmap_sem); 833 down_read(&mm->mmap_sem);
717 834
718 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 835 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
719 /* check that we only use anonymous memory 836 /* check that we only use anonymous memory
@@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
721 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; 838 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
722 struct vm_area_struct *vma; 839 struct vm_area_struct *vma;
723 840
724 vma = find_vma(gtt->usermm, gtt->userptr); 841 vma = find_vma(mm, gtt->userptr);
725 if (!vma || vma->vm_file || vma->vm_end < end) { 842 if (!vma || vma->vm_file || vma->vm_end < end) {
726 up_read(&current->mm->mmap_sem); 843 up_read(&mm->mmap_sem);
727 return -EPERM; 844 return -EPERM;
728 } 845 }
729 } 846 }
730 847
848 /* loop enough times using contiguous pages of memory */
731 do { 849 do {
732 unsigned num_pages = ttm->num_pages - pinned; 850 unsigned num_pages = ttm->num_pages - pinned;
733 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; 851 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
739 list_add(&guptask.list, &gtt->guptasks); 857 list_add(&guptask.list, &gtt->guptasks);
740 spin_unlock(&gtt->guptasklock); 858 spin_unlock(&gtt->guptasklock);
741 859
742 r = get_user_pages(userptr, num_pages, flags, p, NULL); 860 if (mm == current->mm)
861 r = get_user_pages(userptr, num_pages, flags, p, NULL);
862 else
863 r = get_user_pages_remote(gtt->usertask,
864 mm, userptr, num_pages,
865 flags, p, NULL, NULL);
743 866
744 spin_lock(&gtt->guptasklock); 867 spin_lock(&gtt->guptasklock);
745 list_del(&guptask.list); 868 list_del(&guptask.list);
@@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
752 875
753 } while (pinned < ttm->num_pages); 876 } while (pinned < ttm->num_pages);
754 877
755 up_read(&current->mm->mmap_sem); 878 up_read(&mm->mmap_sem);
756 return 0; 879 return 0;
757 880
758release_pages: 881release_pages:
759 release_pages(pages, pinned); 882 release_pages(pages, pinned);
760 up_read(&current->mm->mmap_sem); 883 up_read(&mm->mmap_sem);
761 return r; 884 return r;
762} 885}
763 886
887/**
888 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages
889 * as necessary.
890 *
891 * Called by amdgpu_cs_list_validate(). This creates the page list
892 * that backs user memory and will ultimately be mapped into the device
893 * address space.
894 */
764void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 895void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
765{ 896{
766 struct amdgpu_ttm_tt *gtt = (void *)ttm; 897 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
775 } 906 }
776} 907}
777 908
909/**
910 * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
911 *
912 * Called while unpinning userptr pages
913 */
778void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) 914void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
779{ 915{
780 struct amdgpu_ttm_tt *gtt = (void *)ttm; 916 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
793 } 929 }
794} 930}
795 931
796/* prepare the sg table with the user pages */ 932/**
933 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the
934 * user pages
935 *
936 * Called by amdgpu_ttm_backend_bind()
937 **/
797static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) 938static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
798{ 939{
799 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 940 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
805 enum dma_data_direction direction = write ? 946 enum dma_data_direction direction = write ?
806 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 947 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
807 948
949 /* Allocate an SG array and squash pages into it */
808 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, 950 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
809 ttm->num_pages << PAGE_SHIFT, 951 ttm->num_pages << PAGE_SHIFT,
810 GFP_KERNEL); 952 GFP_KERNEL);
811 if (r) 953 if (r)
812 goto release_sg; 954 goto release_sg;
813 955
956 /* Map SG to device */
814 r = -ENOMEM; 957 r = -ENOMEM;
815 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 958 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
816 if (nents != ttm->sg->nents) 959 if (nents != ttm->sg->nents)
817 goto release_sg; 960 goto release_sg;
818 961
962 /* convert SG to linear array of pages and dma addresses */
819 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, 963 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
820 gtt->ttm.dma_address, ttm->num_pages); 964 gtt->ttm.dma_address, ttm->num_pages);
821 965
@@ -826,6 +970,9 @@ release_sg:
826 return r; 970 return r;
827} 971}
828 972
973/**
974 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
975 */
829static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) 976static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
830{ 977{
831 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 978 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
839 if (!ttm->sg->sgl) 986 if (!ttm->sg->sgl)
840 return; 987 return;
841 988
842 /* free the sg table and pages again */ 989 /* unmap the pages mapped to the device */
843 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 990 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
844 991
992 /* mark the pages as dirty */
845 amdgpu_ttm_tt_mark_user_pages(ttm); 993 amdgpu_ttm_tt_mark_user_pages(ttm);
846 994
847 sg_free_table(ttm->sg); 995 sg_free_table(ttm->sg);
848} 996}
849 997
998int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
999 struct ttm_buffer_object *tbo,
1000 uint64_t flags)
1001{
1002 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
1003 struct ttm_tt *ttm = tbo->ttm;
1004 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1005 int r;
1006
1007 if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
1008 uint64_t page_idx = 1;
1009
1010 r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
1011 ttm->pages, gtt->ttm.dma_address, flags);
1012 if (r)
1013 goto gart_bind_fail;
1014
1015 /* Patch mtype of the second part BO */
1016 flags &= ~AMDGPU_PTE_MTYPE_MASK;
1017 flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
1018
1019 r = amdgpu_gart_bind(adev,
1020 gtt->offset + (page_idx << PAGE_SHIFT),
1021 ttm->num_pages - page_idx,
1022 &ttm->pages[page_idx],
1023 &(gtt->ttm.dma_address[page_idx]), flags);
1024 } else {
1025 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
1026 ttm->pages, gtt->ttm.dma_address, flags);
1027 }
1028
1029gart_bind_fail:
1030 if (r)
1031 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
1032 ttm->num_pages, gtt->offset);
1033
1034 return r;
1035}
1036
1037/**
1038 * amdgpu_ttm_backend_bind - Bind GTT memory
1039 *
1040 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
1041 * This handles binding GTT memory to the device address space.
1042 */
850static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, 1043static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
851 struct ttm_mem_reg *bo_mem) 1044 struct ttm_mem_reg *bo_mem)
852{ 1045{
@@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
877 return 0; 1070 return 0;
878 } 1071 }
879 1072
1073 /* compute PTE flags relevant to this BO memory */
880 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); 1074 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
1075
1076 /* bind pages into GART page tables */
881 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; 1077 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
882 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, 1078 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
883 ttm->pages, gtt->ttm.dma_address, flags); 1079 ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
888 return r; 1084 return r;
889} 1085}
890 1086
1087/**
1088 * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
1089 */
891int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) 1090int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
892{ 1091{
893 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 1092 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
903 amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) 1102 amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
904 return 0; 1103 return 0;
905 1104
1105 /* allocate GTT space */
906 tmp = bo->mem; 1106 tmp = bo->mem;
907 tmp.mm_node = NULL; 1107 tmp.mm_node = NULL;
908 placement.num_placement = 1; 1108 placement.num_placement = 1;
@@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
918 if (unlikely(r)) 1118 if (unlikely(r))
919 return r; 1119 return r;
920 1120
1121 /* compute PTE flags for this buffer object */
921 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); 1122 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
1123
1124 /* Bind pages */
922 gtt->offset = (u64)tmp.start << PAGE_SHIFT; 1125 gtt->offset = (u64)tmp.start << PAGE_SHIFT;
923 r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, 1126 r = amdgpu_ttm_gart_bind(adev, bo, flags);
924 bo->ttm->pages, gtt->ttm.dma_address, flags);
925 if (unlikely(r)) { 1127 if (unlikely(r)) {
926 ttm_bo_mem_put(bo, &tmp); 1128 ttm_bo_mem_put(bo, &tmp);
927 return r; 1129 return r;
@@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
935 return 0; 1137 return 0;
936} 1138}
937 1139
1140/**
1141 * amdgpu_ttm_recover_gart - Rebind GTT pages
1142 *
1143 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
1144 * rebind GTT pages during a GPU reset.
1145 */
938int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) 1146int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
939{ 1147{
940 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 1148 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
941 struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
942 uint64_t flags; 1149 uint64_t flags;
943 int r; 1150 int r;
944 1151
945 if (!gtt) 1152 if (!tbo->ttm)
946 return 0; 1153 return 0;
947 1154
948 flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem); 1155 flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
949 r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, 1156 r = amdgpu_ttm_gart_bind(adev, tbo, flags);
950 gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); 1157
951 if (r)
952 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
953 gtt->ttm.ttm.num_pages, gtt->offset);
954 return r; 1158 return r;
955} 1159}
956 1160
1161/**
1162 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
1163 *
1164 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
1165 * ttm_tt_destroy().
1166 */
957static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) 1167static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
958{ 1168{
959 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1169 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
960 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1170 struct amdgpu_ttm_tt *gtt = (void *)ttm;
961 int r; 1171 int r;
962 1172
1173 /* if the pages have userptr pinning then clear that first */
963 if (gtt->userptr) 1174 if (gtt->userptr)
964 amdgpu_ttm_tt_unpin_userptr(ttm); 1175 amdgpu_ttm_tt_unpin_userptr(ttm);
965 1176
@@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
978{ 1189{
979 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1190 struct amdgpu_ttm_tt *gtt = (void *)ttm;
980 1191
1192 if (gtt->usertask)
1193 put_task_struct(gtt->usertask);
1194
981 ttm_dma_tt_fini(&gtt->ttm); 1195 ttm_dma_tt_fini(&gtt->ttm);
982 kfree(gtt); 1196 kfree(gtt);
983} 1197}
@@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = {
988 .destroy = &amdgpu_ttm_backend_destroy, 1202 .destroy = &amdgpu_ttm_backend_destroy,
989}; 1203};
990 1204
1205/**
1206 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
1207 *
1208 * @bo: The buffer object to create a GTT ttm_tt object around
1209 *
1210 * Called by ttm_tt_create().
1211 */
991static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, 1212static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
992 uint32_t page_flags) 1213 uint32_t page_flags)
993{ 1214{
@@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1001 return NULL; 1222 return NULL;
1002 } 1223 }
1003 gtt->ttm.ttm.func = &amdgpu_backend_func; 1224 gtt->ttm.ttm.func = &amdgpu_backend_func;
1225
1226 /* allocate space for the uninitialized page entries */
1004 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) { 1227 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
1005 kfree(gtt); 1228 kfree(gtt);
1006 return NULL; 1229 return NULL;
@@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
1008 return &gtt->ttm.ttm; 1231 return &gtt->ttm.ttm;
1009} 1232}
1010 1233
1234/**
1235 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
1236 *
1237 * Map the pages of a ttm_tt object to an address space visible
1238 * to the underlying device.
1239 */
1011static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, 1240static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1012 struct ttm_operation_ctx *ctx) 1241 struct ttm_operation_ctx *ctx)
1013{ 1242{
@@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1015 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1244 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1016 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); 1245 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
1017 1246
1247 /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1018 if (gtt && gtt->userptr) { 1248 if (gtt && gtt->userptr) {
1019 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); 1249 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1020 if (!ttm->sg) 1250 if (!ttm->sg)
@@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1039 } 1269 }
1040#endif 1270#endif
1041 1271
1272 /* fall back to generic helper to populate the page array
1273 * and map them to the device */
1042 return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx); 1274 return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
1043} 1275}
1044 1276
1277/**
1278 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
1279 *
1280 * Unmaps pages of a ttm_tt object from the device address space and
1281 * unpopulates the page array backing it.
1282 */
1045static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) 1283static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1046{ 1284{
1047 struct amdgpu_device *adev; 1285 struct amdgpu_device *adev;
@@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1067 } 1305 }
1068#endif 1306#endif
1069 1307
1308 /* fall back to generic helper to unmap and unpopulate array */
1070 ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm); 1309 ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
1071} 1310}
1072 1311
1312/**
1313 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt
1314 * for the current task
1315 *
1316 * @ttm: The ttm_tt object to bind this userptr object to
1317 * @addr: The address in the current tasks VM space to use
1318 * @flags: Requirements of userptr object.
1319 *
1320 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
1321 * to current task
1322 */
1073int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 1323int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1074 uint32_t flags) 1324 uint32_t flags)
1075{ 1325{
@@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1079 return -EINVAL; 1329 return -EINVAL;
1080 1330
1081 gtt->userptr = addr; 1331 gtt->userptr = addr;
1082 gtt->usermm = current->mm;
1083 gtt->userflags = flags; 1332 gtt->userflags = flags;
1333
1334 if (gtt->usertask)
1335 put_task_struct(gtt->usertask);
1336 gtt->usertask = current->group_leader;
1337 get_task_struct(gtt->usertask);
1338
1084 spin_lock_init(&gtt->guptasklock); 1339 spin_lock_init(&gtt->guptasklock);
1085 INIT_LIST_HEAD(&gtt->guptasks); 1340 INIT_LIST_HEAD(&gtt->guptasks);
1086 atomic_set(&gtt->mmu_invalidations, 0); 1341 atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1089 return 0; 1344 return 0;
1090} 1345}
1091 1346
1347/**
1348 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
1349 */
1092struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) 1350struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1093{ 1351{
1094 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1352 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
1096 if (gtt == NULL) 1354 if (gtt == NULL)
1097 return NULL; 1355 return NULL;
1098 1356
1099 return gtt->usermm; 1357 if (gtt->usertask == NULL)
1358 return NULL;
1359
1360 return gtt->usertask->mm;
1100} 1361}
1101 1362
1363/**
1364 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays
1365 * inside an address range for the
1366 * current task.
1367 *
1368 */
1102bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, 1369bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1103 unsigned long end) 1370 unsigned long end)
1104{ 1371{
@@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1109 if (gtt == NULL || !gtt->userptr) 1376 if (gtt == NULL || !gtt->userptr)
1110 return false; 1377 return false;
1111 1378
1379 /* Return false if no part of the ttm_tt object lies within
1380 * the range
1381 */
1112 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; 1382 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
1113 if (gtt->userptr > end || gtt->userptr + size <= start) 1383 if (gtt->userptr > end || gtt->userptr + size <= start)
1114 return false; 1384 return false;
1115 1385
1386 /* Search the lists of tasks that hold this mapping and see
1387 * if current is one of them. If it is return false.
1388 */
1116 spin_lock(&gtt->guptasklock); 1389 spin_lock(&gtt->guptasklock);
1117 list_for_each_entry(entry, &gtt->guptasks, list) { 1390 list_for_each_entry(entry, &gtt->guptasks, list) {
1118 if (entry->task == current) { 1391 if (entry->task == current) {
@@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1127 return true; 1400 return true;
1128} 1401}
1129 1402
1403/**
1404 * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been
1405 * invalidated?
1406 */
1130bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 1407bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1131 int *last_invalidated) 1408 int *last_invalidated)
1132{ 1409{
@@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1137 return prev_invalidated != *last_invalidated; 1414 return prev_invalidated != *last_invalidated;
1138} 1415}
1139 1416
1417/**
1418 * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this
1419 * ttm_tt object been invalidated
1420 * since the last time they've
1421 * been set?
1422 */
1140bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) 1423bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1141{ 1424{
1142 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1425 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1147 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages; 1430 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1148} 1431}
1149 1432
1433/**
1434 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
1435 */
1150bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 1436bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1151{ 1437{
1152 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1438 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1157 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 1443 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
1158} 1444}
1159 1445
1446/**
1447 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
1448 *
1449 * @ttm: The ttm_tt object to compute the flags for
1450 * @mem: The memory registry backing this ttm_tt object
1451 */
1160uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, 1452uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1161 struct ttm_mem_reg *mem) 1453 struct ttm_mem_reg *mem)
1162{ 1454{
@@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1181 return flags; 1473 return flags;
1182} 1474}
1183 1475
1476/**
1477 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict
1478 * a buffer object.
1479 *
1480 * Return true if eviction is sensible. Called by
1481 * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
1482 * which tries to evict buffer objects until it can find space
1483 * for a new object and by ttm_bo_force_list_clean() which is
1484 * used to clean out a memory space.
1485 */
1184static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, 1486static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1185 const struct ttm_place *place) 1487 const struct ttm_place *place)
1186{ 1488{
@@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1227 return ttm_bo_eviction_valuable(bo, place); 1529 return ttm_bo_eviction_valuable(bo, place);
1228} 1530}
1229 1531
1532/**
1533 * amdgpu_ttm_access_memory - Read or Write memory that backs a
1534 * buffer object.
1535 *
1536 * @bo: The buffer object to read/write
1537 * @offset: Offset into buffer object
1538 * @buf: Secondary buffer to write/read from
1539 * @len: Length in bytes of access
1540 * @write: true if writing
1541 *
1542 * This is used to access VRAM that backs a buffer object via MMIO
1543 * access for debugging purposes.
1544 */
1230static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, 1545static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1231 unsigned long offset, 1546 unsigned long offset,
1232 void *buf, int len, int write) 1547 void *buf, int len, int write)
@@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
1329static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) 1644static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1330{ 1645{
1331 struct ttm_operation_ctx ctx = { false, false }; 1646 struct ttm_operation_ctx ctx = { false, false };
1647 struct amdgpu_bo_param bp;
1332 int r = 0; 1648 int r = 0;
1333 int i; 1649 int i;
1334 u64 vram_size = adev->gmc.visible_vram_size; 1650 u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1336 u64 size = adev->fw_vram_usage.size; 1652 u64 size = adev->fw_vram_usage.size;
1337 struct amdgpu_bo *bo; 1653 struct amdgpu_bo *bo;
1338 1654
1655 memset(&bp, 0, sizeof(bp));
1656 bp.size = adev->fw_vram_usage.size;
1657 bp.byte_align = PAGE_SIZE;
1658 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
1659 bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1660 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1661 bp.type = ttm_bo_type_kernel;
1662 bp.resv = NULL;
1339 adev->fw_vram_usage.va = NULL; 1663 adev->fw_vram_usage.va = NULL;
1340 adev->fw_vram_usage.reserved_bo = NULL; 1664 adev->fw_vram_usage.reserved_bo = NULL;
1341 1665
1342 if (adev->fw_vram_usage.size > 0 && 1666 if (adev->fw_vram_usage.size > 0 &&
1343 adev->fw_vram_usage.size <= vram_size) { 1667 adev->fw_vram_usage.size <= vram_size) {
1344 1668
1345 r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, 1669 r = amdgpu_bo_create(adev, &bp,
1346 AMDGPU_GEM_DOMAIN_VRAM,
1347 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1348 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1349 ttm_bo_type_kernel, NULL,
1350 &adev->fw_vram_usage.reserved_bo); 1670 &adev->fw_vram_usage.reserved_bo);
1351 if (r) 1671 if (r)
1352 goto error_create; 1672 goto error_create;
@@ -1398,13 +1718,22 @@ error_create:
1398 adev->fw_vram_usage.reserved_bo = NULL; 1718 adev->fw_vram_usage.reserved_bo = NULL;
1399 return r; 1719 return r;
1400} 1720}
1401 1721/**
1722 * amdgpu_ttm_init - Init the memory management (ttm) as well as
1723 * various gtt/vram related fields.
1724 *
1725 * This initializes all of the memory space pools that the TTM layer
1726 * will need such as the GTT space (system memory mapped to the device),
1727 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
1728 * can be mapped per VMID.
1729 */
1402int amdgpu_ttm_init(struct amdgpu_device *adev) 1730int amdgpu_ttm_init(struct amdgpu_device *adev)
1403{ 1731{
1404 uint64_t gtt_size; 1732 uint64_t gtt_size;
1405 int r; 1733 int r;
1406 u64 vis_vram_limit; 1734 u64 vis_vram_limit;
1407 1735
1736 /* initialize global references for vram/gtt */
1408 r = amdgpu_ttm_global_init(adev); 1737 r = amdgpu_ttm_global_init(adev);
1409 if (r) { 1738 if (r) {
1410 return r; 1739 return r;
@@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1425 /* We opt to avoid OOM on system pages allocations */ 1754 /* We opt to avoid OOM on system pages allocations */
1426 adev->mman.bdev.no_retry = true; 1755 adev->mman.bdev.no_retry = true;
1427 1756
1757 /* Initialize VRAM pool with all of VRAM divided into pages */
1428 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, 1758 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1429 adev->gmc.real_vram_size >> PAGE_SHIFT); 1759 adev->gmc.real_vram_size >> PAGE_SHIFT);
1430 if (r) { 1760 if (r) {
@@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1454 return r; 1784 return r;
1455 } 1785 }
1456 1786
1457 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, 1787 /* allocate memory as required for VGA
1458 AMDGPU_GEM_DOMAIN_VRAM, 1788 * This is used for VGA emulation and pre-OS scanout buffers to
1459 &adev->stolen_vga_memory, 1789 * avoid display artifacts while transitioning between pre-OS
1460 NULL, NULL); 1790 * and driver. */
1461 if (r) 1791 if (adev->gmc.stolen_size) {
1462 return r; 1792 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1793 AMDGPU_GEM_DOMAIN_VRAM,
1794 &adev->stolen_vga_memory,
1795 NULL, NULL);
1796 if (r)
1797 return r;
1798 }
1463 DRM_INFO("amdgpu: %uM of VRAM memory ready\n", 1799 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1464 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); 1800 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1465 1801
1802 /* Compute GTT size, either bsaed on 3/4th the size of RAM size
1803 * or whatever the user passed on module init */
1466 if (amdgpu_gtt_size == -1) { 1804 if (amdgpu_gtt_size == -1) {
1467 struct sysinfo si; 1805 struct sysinfo si;
1468 1806
@@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1473 } 1811 }
1474 else 1812 else
1475 gtt_size = (uint64_t)amdgpu_gtt_size << 20; 1813 gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1814
1815 /* Initialize GTT memory pool */
1476 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); 1816 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
1477 if (r) { 1817 if (r) {
1478 DRM_ERROR("Failed initializing GTT heap.\n"); 1818 DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1481 DRM_INFO("amdgpu: %uM of GTT memory ready.\n", 1821 DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1482 (unsigned)(gtt_size / (1024 * 1024))); 1822 (unsigned)(gtt_size / (1024 * 1024)));
1483 1823
1824 /* Initialize various on-chip memory pools */
1484 adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; 1825 adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
1485 adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; 1826 adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
1486 adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; 1827 adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1520 } 1861 }
1521 } 1862 }
1522 1863
1864 /* Register debugfs entries for amdgpu_ttm */
1523 r = amdgpu_ttm_debugfs_init(adev); 1865 r = amdgpu_ttm_debugfs_init(adev);
1524 if (r) { 1866 if (r) {
1525 DRM_ERROR("Failed to init debugfs\n"); 1867 DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1528 return 0; 1870 return 0;
1529} 1871}
1530 1872
1873/**
1874 * amdgpu_ttm_late_init - Handle any late initialization for
1875 * amdgpu_ttm
1876 */
1877void amdgpu_ttm_late_init(struct amdgpu_device *adev)
1878{
1879 /* return the VGA stolen memory (if any) back to VRAM */
1880 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1881}
1882
1883/**
1884 * amdgpu_ttm_fini - De-initialize the TTM memory pools
1885 */
1531void amdgpu_ttm_fini(struct amdgpu_device *adev) 1886void amdgpu_ttm_fini(struct amdgpu_device *adev)
1532{ 1887{
1533 if (!adev->mman.initialized) 1888 if (!adev->mman.initialized)
1534 return; 1889 return;
1535 1890
1536 amdgpu_ttm_debugfs_fini(adev); 1891 amdgpu_ttm_debugfs_fini(adev);
1537 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1538 amdgpu_ttm_fw_reserve_vram_fini(adev); 1892 amdgpu_ttm_fw_reserve_vram_fini(adev);
1539 if (adev->mman.aper_base_kaddr) 1893 if (adev->mman.aper_base_kaddr)
1540 iounmap(adev->mman.aper_base_kaddr); 1894 iounmap(adev->mman.aper_base_kaddr);
@@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
1856#endif 2210#endif
1857}; 2211};
1858 2212
2213/**
2214 * amdgpu_ttm_vram_read - Linear read access to VRAM
2215 *
2216 * Accesses VRAM via MMIO for debugging purposes.
2217 */
1859static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, 2218static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1860 size_t size, loff_t *pos) 2219 size_t size, loff_t *pos)
1861{ 2220{
@@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1895 return result; 2254 return result;
1896} 2255}
1897 2256
2257/**
2258 * amdgpu_ttm_vram_write - Linear write access to VRAM
2259 *
2260 * Accesses VRAM via MMIO for debugging purposes.
2261 */
1898static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, 2262static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1899 size_t size, loff_t *pos) 2263 size_t size, loff_t *pos)
1900{ 2264{
@@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = {
1943 2307
1944#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 2308#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1945 2309
2310/**
2311 * amdgpu_ttm_gtt_read - Linear read access to GTT memory
2312 */
1946static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, 2313static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
1947 size_t size, loff_t *pos) 2314 size_t size, loff_t *pos)
1948{ 2315{
@@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
1990 2357
1991#endif 2358#endif
1992 2359
2360/**
2361 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
2362 *
2363 * This function is used to read memory that has been mapped to the
2364 * GPU and the known addresses are not physical addresses but instead
2365 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2366 */
1993static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, 2367static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
1994 size_t size, loff_t *pos) 2368 size_t size, loff_t *pos)
1995{ 2369{
@@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
1998 ssize_t result = 0; 2372 ssize_t result = 0;
1999 int r; 2373 int r;
2000 2374
2375 /* retrieve the IOMMU domain if any for this device */
2001 dom = iommu_get_domain_for_dev(adev->dev); 2376 dom = iommu_get_domain_for_dev(adev->dev);
2002 2377
2003 while (size) { 2378 while (size) {
@@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2010 2385
2011 bytes = bytes < size ? bytes : size; 2386 bytes = bytes < size ? bytes : size;
2012 2387
2388 /* Translate the bus address to a physical address. If
2389 * the domain is NULL it means there is no IOMMU active
2390 * and the address translation is the identity
2391 */
2013 addr = dom ? iommu_iova_to_phys(dom, addr) : addr; 2392 addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2014 2393
2015 pfn = addr >> PAGE_SHIFT; 2394 pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
2034 return result; 2413 return result;
2035} 2414}
2036 2415
2416/**
2417 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
2418 *
2419 * This function is used to write memory that has been mapped to the
2420 * GPU and the known addresses are not physical addresses but instead
2421 * bus addresses (e.g., what you'd put in an IB or ring buffer).
2422 */
2037static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, 2423static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2038 size_t size, loff_t *pos) 2424 size_t size, loff_t *pos)
2039{ 2425{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 6ea7de863041..e969c879d87e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
77uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); 77uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
78 78
79int amdgpu_ttm_init(struct amdgpu_device *adev); 79int amdgpu_ttm_init(struct amdgpu_device *adev);
80void amdgpu_ttm_late_init(struct amdgpu_device *adev);
80void amdgpu_ttm_fini(struct amdgpu_device *adev); 81void amdgpu_ttm_fini(struct amdgpu_device *adev);
81void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, 82void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
82 bool enable); 83 bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5916cc25e28b..f55f72a37ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
161 le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); 161 le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
162 DRM_DEBUG("reg_list_separate_size_bytes: %u\n", 162 DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
163 le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); 163 le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
164 DRM_DEBUG("reg_list_separate_size_bytes: %u\n", 164 DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
165 le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); 165 le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
166 if (version_minor == 1) {
167 const struct rlc_firmware_header_v2_1 *v2_1 =
168 container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
169 DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
170 le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
171 DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
172 le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
173 DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
174 le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
175 DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
176 le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
177 DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
178 le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
179 DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
180 le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
181 DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
182 le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
183 DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
184 le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
185 DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
186 le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
187 DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
188 le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
189 DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
190 le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
191 DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
192 le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
193 DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
194 le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
195 }
166 } else { 196 } else {
167 DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); 197 DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
168 } 198 }
@@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
265 case CHIP_POLARIS10: 295 case CHIP_POLARIS10:
266 case CHIP_POLARIS11: 296 case CHIP_POLARIS11:
267 case CHIP_POLARIS12: 297 case CHIP_POLARIS12:
298 case CHIP_VEGAM:
268 if (!load_type) 299 if (!load_type)
269 return AMDGPU_FW_LOAD_DIRECT; 300 return AMDGPU_FW_LOAD_DIRECT;
270 else 301 else
@@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
276 return AMDGPU_FW_LOAD_DIRECT; 307 return AMDGPU_FW_LOAD_DIRECT;
277 else 308 else
278 return AMDGPU_FW_LOAD_PSP; 309 return AMDGPU_FW_LOAD_PSP;
310 case CHIP_VEGA20:
311 return AMDGPU_FW_LOAD_DIRECT;
279 default: 312 default:
280 DRM_ERROR("Unknown firmware load type\n"); 313 DRM_ERROR("Unknown firmware load type\n");
281 } 314 }
@@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
307 (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && 340 (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
308 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && 341 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
309 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && 342 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
310 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { 343 ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
344 ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
345 ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
346 ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
311 ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); 347 ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
312 348
313 memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + 349 memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
329 le32_to_cpu(header->ucode_array_offset_bytes) + 365 le32_to_cpu(header->ucode_array_offset_bytes) +
330 le32_to_cpu(cp_hdr->jt_offset) * 4), 366 le32_to_cpu(cp_hdr->jt_offset) * 4),
331 ucode->ucode_size); 367 ucode->ucode_size);
368 } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
369 ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
370 memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
371 ucode->ucode_size);
372 } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
373 ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
374 memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
375 ucode->ucode_size);
376 } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
377 ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
378 memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
379 ucode->ucode_size);
332 } 380 }
333 381
334 return 0; 382 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 30b5500dc152..08e38579af24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 {
98 uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ 98 uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
99}; 99};
100 100
101/* version_major=2, version_minor=1 */
102struct rlc_firmware_header_v2_1 {
103 struct rlc_firmware_header_v2_0 v2_0;
104 uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
105 uint32_t save_restore_list_cntl_ucode_ver;
106 uint32_t save_restore_list_cntl_feature_ver;
107 uint32_t save_restore_list_cntl_size_bytes;
108 uint32_t save_restore_list_cntl_offset_bytes;
109 uint32_t save_restore_list_gpm_ucode_ver;
110 uint32_t save_restore_list_gpm_feature_ver;
111 uint32_t save_restore_list_gpm_size_bytes;
112 uint32_t save_restore_list_gpm_offset_bytes;
113 uint32_t save_restore_list_srm_ucode_ver;
114 uint32_t save_restore_list_srm_feature_ver;
115 uint32_t save_restore_list_srm_size_bytes;
116 uint32_t save_restore_list_srm_offset_bytes;
117};
118
101/* version_major=1, version_minor=0 */ 119/* version_major=1, version_minor=0 */
102struct sdma_firmware_header_v1_0 { 120struct sdma_firmware_header_v1_0 {
103 struct common_firmware_header header; 121 struct common_firmware_header header;
@@ -148,6 +166,7 @@ union amdgpu_firmware_header {
148 struct gfx_firmware_header_v1_0 gfx; 166 struct gfx_firmware_header_v1_0 gfx;
149 struct rlc_firmware_header_v1_0 rlc; 167 struct rlc_firmware_header_v1_0 rlc;
150 struct rlc_firmware_header_v2_0 rlc_v2_0; 168 struct rlc_firmware_header_v2_0 rlc_v2_0;
169 struct rlc_firmware_header_v2_1 rlc_v2_1;
151 struct sdma_firmware_header_v1_0 sdma; 170 struct sdma_firmware_header_v1_0 sdma;
152 struct sdma_firmware_header_v1_1 sdma_v1_1; 171 struct sdma_firmware_header_v1_1 sdma_v1_1;
153 struct gpu_info_firmware_header_v1_0 gpu_info; 172 struct gpu_info_firmware_header_v1_0 gpu_info;
@@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID {
168 AMDGPU_UCODE_ID_CP_MEC2, 187 AMDGPU_UCODE_ID_CP_MEC2,
169 AMDGPU_UCODE_ID_CP_MEC2_JT, 188 AMDGPU_UCODE_ID_CP_MEC2_JT,
170 AMDGPU_UCODE_ID_RLC_G, 189 AMDGPU_UCODE_ID_RLC_G,
190 AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
191 AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
192 AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
171 AMDGPU_UCODE_ID_STORAGE, 193 AMDGPU_UCODE_ID_STORAGE,
172 AMDGPU_UCODE_ID_SMC, 194 AMDGPU_UCODE_ID_SMC,
173 AMDGPU_UCODE_ID_UVD, 195 AMDGPU_UCODE_ID_UVD,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 627542b22ae4..bcf68f80bbf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -66,15 +66,18 @@
66#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" 66#define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin"
67#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" 67#define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin"
68#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" 68#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
69#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin"
69 70
70#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" 71#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
71#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" 72#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
73#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin"
72 74
73#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) 75/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */
74#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) 76#define UVD_GPCOM_VCPU_CMD 0x03c3
75#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00) 77#define UVD_GPCOM_VCPU_DATA0 0x03c4
76#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00) 78#define UVD_GPCOM_VCPU_DATA1 0x03c5
77#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00) 79#define UVD_NO_OP 0x03ff
80#define UVD_BASE_SI 0x3800
78 81
79/** 82/**
80 * amdgpu_uvd_cs_ctx - Command submission parser context 83 * amdgpu_uvd_cs_ctx - Command submission parser context
@@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
109MODULE_FIRMWARE(FIRMWARE_POLARIS10); 112MODULE_FIRMWARE(FIRMWARE_POLARIS10);
110MODULE_FIRMWARE(FIRMWARE_POLARIS11); 113MODULE_FIRMWARE(FIRMWARE_POLARIS11);
111MODULE_FIRMWARE(FIRMWARE_POLARIS12); 114MODULE_FIRMWARE(FIRMWARE_POLARIS12);
115MODULE_FIRMWARE(FIRMWARE_VEGAM);
112 116
113MODULE_FIRMWARE(FIRMWARE_VEGA10); 117MODULE_FIRMWARE(FIRMWARE_VEGA10);
114MODULE_FIRMWARE(FIRMWARE_VEGA12); 118MODULE_FIRMWARE(FIRMWARE_VEGA12);
119MODULE_FIRMWARE(FIRMWARE_VEGA20);
115 120
116static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 121static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
117 122
@@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
123 const char *fw_name; 128 const char *fw_name;
124 const struct common_firmware_header *hdr; 129 const struct common_firmware_header *hdr;
125 unsigned version_major, version_minor, family_id; 130 unsigned version_major, version_minor, family_id;
126 int i, r; 131 int i, j, r;
127 132
128 INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); 133 INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
129 134
130 switch (adev->asic_type) { 135 switch (adev->asic_type) {
131#ifdef CONFIG_DRM_AMDGPU_CIK 136#ifdef CONFIG_DRM_AMDGPU_CIK
@@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
172 case CHIP_VEGA12: 177 case CHIP_VEGA12:
173 fw_name = FIRMWARE_VEGA12; 178 fw_name = FIRMWARE_VEGA12;
174 break; 179 break;
180 case CHIP_VEGAM:
181 fw_name = FIRMWARE_VEGAM;
182 break;
183 case CHIP_VEGA20:
184 fw_name = FIRMWARE_VEGA20;
185 break;
175 default: 186 default:
176 return -EINVAL; 187 return -EINVAL;
177 } 188 }
@@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
226 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 237 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
227 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); 238 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
228 239
229 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, 240 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
230 AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
231 &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
232 if (r) {
233 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
234 return r;
235 }
236 241
237 ring = &adev->uvd.ring; 242 r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
238 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 243 AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
239 r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, 244 &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
240 rq, amdgpu_sched_jobs, NULL); 245 if (r) {
241 if (r != 0) { 246 dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
242 DRM_ERROR("Failed setting up UVD run queue.\n"); 247 return r;
243 return r; 248 }
244 }
245 249
246 for (i = 0; i < adev->uvd.max_handles; ++i) { 250 ring = &adev->uvd.inst[j].ring;
247 atomic_set(&adev->uvd.handles[i], 0); 251 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
248 adev->uvd.filp[i] = NULL; 252 r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
249 } 253 rq, NULL);
254 if (r != 0) {
255 DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
256 return r;
257 }
250 258
259 for (i = 0; i < adev->uvd.max_handles; ++i) {
260 atomic_set(&adev->uvd.inst[j].handles[i], 0);
261 adev->uvd.inst[j].filp[i] = NULL;
262 }
263 }
251 /* from uvd v5.0 HW addressing capacity increased to 64 bits */ 264 /* from uvd v5.0 HW addressing capacity increased to 64 bits */
252 if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) 265 if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
253 adev->uvd.address_64_bit = true; 266 adev->uvd.address_64_bit = true;
@@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
274 287
275int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 288int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
276{ 289{
277 int i; 290 int i, j;
278 kfree(adev->uvd.saved_bo);
279 291
280 drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); 292 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
293 kfree(adev->uvd.inst[j].saved_bo);
281 294
282 amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, 295 drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
283 &adev->uvd.gpu_addr,
284 (void **)&adev->uvd.cpu_addr);
285 296
286 amdgpu_ring_fini(&adev->uvd.ring); 297 amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
298 &adev->uvd.inst[j].gpu_addr,
299 (void **)&adev->uvd.inst[j].cpu_addr);
287 300
288 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) 301 amdgpu_ring_fini(&adev->uvd.inst[j].ring);
289 amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
290 302
303 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
304 amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
305 }
291 release_firmware(adev->uvd.fw); 306 release_firmware(adev->uvd.fw);
292 307
293 return 0; 308 return 0;
@@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
297{ 312{
298 unsigned size; 313 unsigned size;
299 void *ptr; 314 void *ptr;
300 int i; 315 int i, j;
301
302 if (adev->uvd.vcpu_bo == NULL)
303 return 0;
304 316
305 cancel_delayed_work_sync(&adev->uvd.idle_work); 317 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
318 if (adev->uvd.inst[j].vcpu_bo == NULL)
319 continue;
306 320
307 /* only valid for physical mode */ 321 cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
308 if (adev->asic_type < CHIP_POLARIS10) {
309 for (i = 0; i < adev->uvd.max_handles; ++i)
310 if (atomic_read(&adev->uvd.handles[i]))
311 break;
312 322
313 if (i == adev->uvd.max_handles) 323 /* only valid for physical mode */
314 return 0; 324 if (adev->asic_type < CHIP_POLARIS10) {
315 } 325 for (i = 0; i < adev->uvd.max_handles; ++i)
326 if (atomic_read(&adev->uvd.inst[j].handles[i]))
327 break;
316 328
317 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 329 if (i == adev->uvd.max_handles)
318 ptr = adev->uvd.cpu_addr; 330 continue;
331 }
319 332
320 adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); 333 size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
321 if (!adev->uvd.saved_bo) 334 ptr = adev->uvd.inst[j].cpu_addr;
322 return -ENOMEM;
323 335
324 memcpy_fromio(adev->uvd.saved_bo, ptr, size); 336 adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
337 if (!adev->uvd.inst[j].saved_bo)
338 return -ENOMEM;
325 339
340 memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
341 }
326 return 0; 342 return 0;
327} 343}
328 344
@@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
330{ 346{
331 unsigned size; 347 unsigned size;
332 void *ptr; 348 void *ptr;
349 int i;
333 350
334 if (adev->uvd.vcpu_bo == NULL) 351 for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
335 return -EINVAL; 352 if (adev->uvd.inst[i].vcpu_bo == NULL)
353 return -EINVAL;
336 354
337 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 355 size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
338 ptr = adev->uvd.cpu_addr; 356 ptr = adev->uvd.inst[i].cpu_addr;
339 357
340 if (adev->uvd.saved_bo != NULL) { 358 if (adev->uvd.inst[i].saved_bo != NULL) {
341 memcpy_toio(ptr, adev->uvd.saved_bo, size); 359 memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
342 kfree(adev->uvd.saved_bo); 360 kfree(adev->uvd.inst[i].saved_bo);
343 adev->uvd.saved_bo = NULL; 361 adev->uvd.inst[i].saved_bo = NULL;
344 } else { 362 } else {
345 const struct common_firmware_header *hdr; 363 const struct common_firmware_header *hdr;
346 unsigned offset; 364 unsigned offset;
347 365
348 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 366 hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
349 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 367 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
350 offset = le32_to_cpu(hdr->ucode_array_offset_bytes); 368 offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
351 memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, 369 memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
352 le32_to_cpu(hdr->ucode_size_bytes)); 370 le32_to_cpu(hdr->ucode_size_bytes));
353 size -= le32_to_cpu(hdr->ucode_size_bytes); 371 size -= le32_to_cpu(hdr->ucode_size_bytes);
354 ptr += le32_to_cpu(hdr->ucode_size_bytes); 372 ptr += le32_to_cpu(hdr->ucode_size_bytes);
373 }
374 memset_io(ptr, 0, size);
375 /* to restore uvd fence seq */
376 amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
355 } 377 }
356 memset_io(ptr, 0, size);
357 /* to restore uvd fence seq */
358 amdgpu_fence_driver_force_completion(&adev->uvd.ring);
359 } 378 }
360
361 return 0; 379 return 0;
362} 380}
363 381
364void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) 382void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
365{ 383{
366 struct amdgpu_ring *ring = &adev->uvd.ring; 384 struct amdgpu_ring *ring;
367 int i, r; 385 int i, j, r;
368 386
369 for (i = 0; i < adev->uvd.max_handles; ++i) { 387 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
370 uint32_t handle = atomic_read(&adev->uvd.handles[i]); 388 ring = &adev->uvd.inst[j].ring;
371 if (handle != 0 && adev->uvd.filp[i] == filp) { 389
372 struct dma_fence *fence; 390 for (i = 0; i < adev->uvd.max_handles; ++i) {
373 391 uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
374 r = amdgpu_uvd_get_destroy_msg(ring, handle, 392 if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
375 false, &fence); 393 struct dma_fence *fence;
376 if (r) { 394
377 DRM_ERROR("Error destroying UVD (%d)!\n", r); 395 r = amdgpu_uvd_get_destroy_msg(ring, handle,
378 continue; 396 false, &fence);
379 } 397 if (r) {
398 DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
399 continue;
400 }
380 401
381 dma_fence_wait(fence, false); 402 dma_fence_wait(fence, false);
382 dma_fence_put(fence); 403 dma_fence_put(fence);
383 404
384 adev->uvd.filp[i] = NULL; 405 adev->uvd.inst[j].filp[i] = NULL;
385 atomic_set(&adev->uvd.handles[i], 0); 406 atomic_set(&adev->uvd.inst[j].handles[i], 0);
407 }
386 } 408 }
387 } 409 }
388} 410}
@@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
657 void *ptr; 679 void *ptr;
658 long r; 680 long r;
659 int i; 681 int i;
682 uint32_t ip_instance = ctx->parser->job->ring->me;
660 683
661 if (offset & 0x3F) { 684 if (offset & 0x3F) {
662 DRM_ERROR("UVD messages must be 64 byte aligned!\n"); 685 DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
663 return -EINVAL; 686 return -EINVAL;
664 } 687 }
665 688
666 r = amdgpu_bo_kmap(bo, &ptr); 689 r = amdgpu_bo_kmap(bo, &ptr);
667 if (r) { 690 if (r) {
668 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 691 DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
669 return r; 692 return r;
670 } 693 }
671 694
@@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
675 handle = msg[2]; 698 handle = msg[2];
676 699
677 if (handle == 0) { 700 if (handle == 0) {
678 DRM_ERROR("Invalid UVD handle!\n"); 701 DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
679 return -EINVAL; 702 return -EINVAL;
680 } 703 }
681 704
@@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
686 709
687 /* try to alloc a new handle */ 710 /* try to alloc a new handle */
688 for (i = 0; i < adev->uvd.max_handles; ++i) { 711 for (i = 0; i < adev->uvd.max_handles; ++i) {
689 if (atomic_read(&adev->uvd.handles[i]) == handle) { 712 if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
690 DRM_ERROR("Handle 0x%x already in use!\n", handle); 713 DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
691 return -EINVAL; 714 return -EINVAL;
692 } 715 }
693 716
694 if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { 717 if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
695 adev->uvd.filp[i] = ctx->parser->filp; 718 adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
696 return 0; 719 return 0;
697 } 720 }
698 } 721 }
699 722
700 DRM_ERROR("No more free UVD handles!\n"); 723 DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
701 return -ENOSPC; 724 return -ENOSPC;
702 725
703 case 1: 726 case 1:
@@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
709 732
710 /* validate the handle */ 733 /* validate the handle */
711 for (i = 0; i < adev->uvd.max_handles; ++i) { 734 for (i = 0; i < adev->uvd.max_handles; ++i) {
712 if (atomic_read(&adev->uvd.handles[i]) == handle) { 735 if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
713 if (adev->uvd.filp[i] != ctx->parser->filp) { 736 if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
714 DRM_ERROR("UVD handle collision detected!\n"); 737 DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
715 return -EINVAL; 738 return -EINVAL;
716 } 739 }
717 return 0; 740 return 0;
718 } 741 }
719 } 742 }
720 743
721 DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); 744 DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
722 return -ENOENT; 745 return -ENOENT;
723 746
724 case 2: 747 case 2:
725 /* it's a destroy msg, free the handle */ 748 /* it's a destroy msg, free the handle */
726 for (i = 0; i < adev->uvd.max_handles; ++i) 749 for (i = 0; i < adev->uvd.max_handles; ++i)
727 atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); 750 atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
728 amdgpu_bo_kunmap(bo); 751 amdgpu_bo_kunmap(bo);
729 return 0; 752 return 0;
730 753
731 default: 754 default:
732 DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); 755 DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
733 return -EINVAL; 756 return -EINVAL;
734 } 757 }
735 BUG(); 758 BUG();
@@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
800 } 823 }
801 824
802 if ((cmd == 0 || cmd == 0x3) && 825 if ((cmd == 0 || cmd == 0x3) &&
803 (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { 826 (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
804 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", 827 DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
805 start, end); 828 start, end);
806 return -EINVAL; 829 return -EINVAL;
@@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
968 uint64_t addr; 991 uint64_t addr;
969 long r; 992 long r;
970 int i; 993 int i;
994 unsigned offset_idx = 0;
995 unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
971 996
972 amdgpu_bo_kunmap(bo); 997 amdgpu_bo_kunmap(bo);
973 amdgpu_bo_unpin(bo); 998 amdgpu_bo_unpin(bo);
@@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
987 goto err; 1012 goto err;
988 1013
989 if (adev->asic_type >= CHIP_VEGA10) { 1014 if (adev->asic_type >= CHIP_VEGA10) {
990 data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0); 1015 offset_idx = 1 + ring->me;
991 data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0); 1016 offset[1] = adev->reg_offset[UVD_HWIP][0][1];
992 data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0); 1017 offset[2] = adev->reg_offset[UVD_HWIP][1][1];
993 data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
994 } else {
995 data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
996 data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
997 data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
998 data[3] = PACKET0(mmUVD_NO_OP, 0);
999 } 1018 }
1000 1019
1020 data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
1021 data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
1022 data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
1023 data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
1024
1001 ib = &job->ibs[0]; 1025 ib = &job->ibs[0];
1002 addr = amdgpu_bo_gpu_offset(bo); 1026 addr = amdgpu_bo_gpu_offset(bo);
1003 ib->ptr[0] = data[0]; 1027 ib->ptr[0] = data[0];
@@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
1033 if (r) 1057 if (r)
1034 goto err_free; 1058 goto err_free;
1035 1059
1036 r = amdgpu_job_submit(job, ring, &adev->uvd.entity, 1060 r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
1037 AMDGPU_FENCE_OWNER_UNDEFINED, &f); 1061 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
1038 if (r) 1062 if (r)
1039 goto err_free; 1063 goto err_free;
@@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1121static void amdgpu_uvd_idle_work_handler(struct work_struct *work) 1145static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1122{ 1146{
1123 struct amdgpu_device *adev = 1147 struct amdgpu_device *adev =
1124 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1148 container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
1125 unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); 1149 unsigned fences = 0, i, j;
1150
1151 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
1152 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
1153 for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
1154 fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
1155 }
1156 }
1126 1157
1127 if (fences == 0) { 1158 if (fences == 0) {
1128 if (adev->pm.dpm_enabled) { 1159 if (adev->pm.dpm_enabled) {
@@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1136 AMD_CG_STATE_GATE); 1167 AMD_CG_STATE_GATE);
1137 } 1168 }
1138 } else { 1169 } else {
1139 schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); 1170 schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
1140 } 1171 }
1141} 1172}
1142 1173
@@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1148 if (amdgpu_sriov_vf(adev)) 1179 if (amdgpu_sriov_vf(adev))
1149 return; 1180 return;
1150 1181
1151 set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1182 set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
1152 if (set_clocks) { 1183 if (set_clocks) {
1153 if (adev->pm.dpm_enabled) { 1184 if (adev->pm.dpm_enabled) {
1154 amdgpu_dpm_enable_uvd(adev, true); 1185 amdgpu_dpm_enable_uvd(adev, true);
@@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1165void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) 1196void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
1166{ 1197{
1167 if (!amdgpu_sriov_vf(ring->adev)) 1198 if (!amdgpu_sriov_vf(ring->adev))
1168 schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); 1199 schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
1169} 1200}
1170 1201
1171/** 1202/**
@@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1179{ 1210{
1180 struct dma_fence *fence; 1211 struct dma_fence *fence;
1181 long r; 1212 long r;
1213 uint32_t ip_instance = ring->me;
1182 1214
1183 r = amdgpu_uvd_get_create_msg(ring, 1, NULL); 1215 r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
1184 if (r) { 1216 if (r) {
1185 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 1217 DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
1186 goto error; 1218 goto error;
1187 } 1219 }
1188 1220
1189 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); 1221 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
1190 if (r) { 1222 if (r) {
1191 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 1223 DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
1192 goto error; 1224 goto error;
1193 } 1225 }
1194 1226
1195 r = dma_fence_wait_timeout(fence, false, timeout); 1227 r = dma_fence_wait_timeout(fence, false, timeout);
1196 if (r == 0) { 1228 if (r == 0) {
1197 DRM_ERROR("amdgpu: IB test timed out.\n"); 1229 DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
1198 r = -ETIMEDOUT; 1230 r = -ETIMEDOUT;
1199 } else if (r < 0) { 1231 } else if (r < 0) {
1200 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 1232 DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
1201 } else { 1233 } else {
1202 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 1234 DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
1203 r = 0; 1235 r = 0;
1204 } 1236 }
1205 1237
@@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
1227 * necessarily linear. So we need to count 1259 * necessarily linear. So we need to count
1228 * all non-zero handles. 1260 * all non-zero handles.
1229 */ 1261 */
1230 if (atomic_read(&adev->uvd.handles[i])) 1262 if (atomic_read(&adev->uvd.inst->handles[i]))
1231 used_handles++; 1263 used_handles++;
1232 } 1264 }
1233 1265
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 32ea20b99e53..b1579fba134c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,30 +31,37 @@
31#define AMDGPU_UVD_SESSION_SIZE (50*1024) 31#define AMDGPU_UVD_SESSION_SIZE (50*1024)
32#define AMDGPU_UVD_FIRMWARE_OFFSET 256 32#define AMDGPU_UVD_FIRMWARE_OFFSET 256
33 33
34#define AMDGPU_MAX_UVD_INSTANCES 2
35
34#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ 36#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \
35 (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ 37 (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
36 8) - AMDGPU_UVD_FIRMWARE_OFFSET) 38 8) - AMDGPU_UVD_FIRMWARE_OFFSET)
37 39
38struct amdgpu_uvd { 40struct amdgpu_uvd_inst {
39 struct amdgpu_bo *vcpu_bo; 41 struct amdgpu_bo *vcpu_bo;
40 void *cpu_addr; 42 void *cpu_addr;
41 uint64_t gpu_addr; 43 uint64_t gpu_addr;
42 unsigned fw_version;
43 void *saved_bo; 44 void *saved_bo;
44 unsigned max_handles;
45 atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; 45 atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
46 struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; 46 struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
47 struct delayed_work idle_work; 47 struct delayed_work idle_work;
48 const struct firmware *fw; /* UVD firmware */
49 struct amdgpu_ring ring; 48 struct amdgpu_ring ring;
50 struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; 49 struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
51 struct amdgpu_irq_src irq; 50 struct amdgpu_irq_src irq;
52 bool address_64_bit;
53 bool use_ctx_buf;
54 struct drm_sched_entity entity; 51 struct drm_sched_entity entity;
55 struct drm_sched_entity entity_enc; 52 struct drm_sched_entity entity_enc;
56 uint32_t srbm_soft_reset; 53 uint32_t srbm_soft_reset;
54};
55
56struct amdgpu_uvd {
57 const struct firmware *fw; /* UVD firmware */
58 unsigned fw_version;
59 unsigned max_handles;
57 unsigned num_enc_rings; 60 unsigned num_enc_rings;
61 uint8_t num_uvd_inst;
62 bool address_64_bit;
63 bool use_ctx_buf;
64 struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
58}; 65};
59 66
60int amdgpu_uvd_sw_init(struct amdgpu_device *adev); 67int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a33804bd3314..23d960ec1cf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -51,11 +51,13 @@
51#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" 51#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
52#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" 52#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
53#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" 53#define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin"
54#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" 54#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin"
55#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" 55#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
56#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin"
56 57
57#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" 58#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
58#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" 59#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
60#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin"
59 61
60#ifdef CONFIG_DRM_AMDGPU_CIK 62#ifdef CONFIG_DRM_AMDGPU_CIK
61MODULE_FIRMWARE(FIRMWARE_BONAIRE); 63MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
71MODULE_FIRMWARE(FIRMWARE_POLARIS10); 73MODULE_FIRMWARE(FIRMWARE_POLARIS10);
72MODULE_FIRMWARE(FIRMWARE_POLARIS11); 74MODULE_FIRMWARE(FIRMWARE_POLARIS11);
73MODULE_FIRMWARE(FIRMWARE_POLARIS12); 75MODULE_FIRMWARE(FIRMWARE_POLARIS12);
76MODULE_FIRMWARE(FIRMWARE_VEGAM);
74 77
75MODULE_FIRMWARE(FIRMWARE_VEGA10); 78MODULE_FIRMWARE(FIRMWARE_VEGA10);
76MODULE_FIRMWARE(FIRMWARE_VEGA12); 79MODULE_FIRMWARE(FIRMWARE_VEGA12);
80MODULE_FIRMWARE(FIRMWARE_VEGA20);
77 81
78static void amdgpu_vce_idle_work_handler(struct work_struct *work); 82static void amdgpu_vce_idle_work_handler(struct work_struct *work);
79 83
@@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
132 case CHIP_POLARIS12: 136 case CHIP_POLARIS12:
133 fw_name = FIRMWARE_POLARIS12; 137 fw_name = FIRMWARE_POLARIS12;
134 break; 138 break;
139 case CHIP_VEGAM:
140 fw_name = FIRMWARE_VEGAM;
141 break;
135 case CHIP_VEGA10: 142 case CHIP_VEGA10:
136 fw_name = FIRMWARE_VEGA10; 143 fw_name = FIRMWARE_VEGA10;
137 break; 144 break;
138 case CHIP_VEGA12: 145 case CHIP_VEGA12:
139 fw_name = FIRMWARE_VEGA12; 146 fw_name = FIRMWARE_VEGA12;
140 break; 147 break;
148 case CHIP_VEGA20:
149 fw_name = FIRMWARE_VEGA20;
150 break;
141 151
142 default: 152 default:
143 return -EINVAL; 153 return -EINVAL;
@@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
181 ring = &adev->vce.ring[0]; 191 ring = &adev->vce.ring[0];
182 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 192 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
183 r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, 193 r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
184 rq, amdgpu_sched_jobs, NULL); 194 rq, NULL);
185 if (r != 0) { 195 if (r != 0) {
186 DRM_ERROR("Failed setting up VCE run queue.\n"); 196 DRM_ERROR("Failed setting up VCE run queue.\n");
187 return r; 197 return r;
@@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
755 if (r) 765 if (r)
756 goto out; 766 goto out;
757 break; 767 break;
768
769 case 0x0500000d: /* MV buffer */
770 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
771 idx + 2, 0, 0);
772 if (r)
773 goto out;
774
775 r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
776 idx + 7, 0, 0);
777 if (r)
778 goto out;
779 break;
758 } 780 }
759 781
760 idx += len / 4; 782 idx += len / 4;
@@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
860 goto out; 882 goto out;
861 break; 883 break;
862 884
885 case 0x0500000d: /* MV buffer */
886 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
887 idx + 2, *size, 0);
888 if (r)
889 goto out;
890
891 r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
892 idx + 7, *size / 12, 0);
893 if (r)
894 goto out;
895 break;
896
863 default: 897 default:
864 DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); 898 DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
865 r = -EINVAL; 899 r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 58e495330b38..8851bcdfc260 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -105,7 +105,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
105 ring = &adev->vcn.ring_dec; 105 ring = &adev->vcn.ring_dec;
106 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 106 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
107 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, 107 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
108 rq, amdgpu_sched_jobs, NULL); 108 rq, NULL);
109 if (r != 0) { 109 if (r != 0) {
110 DRM_ERROR("Failed setting up VCN dec run queue.\n"); 110 DRM_ERROR("Failed setting up VCN dec run queue.\n");
111 return r; 111 return r;
@@ -114,7 +114,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
114 ring = &adev->vcn.ring_enc[0]; 114 ring = &adev->vcn.ring_enc[0];
115 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 115 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
116 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, 116 r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
117 rq, amdgpu_sched_jobs, NULL); 117 rq, NULL);
118 if (r != 0) { 118 if (r != 0) {
119 DRM_ERROR("Failed setting up VCN enc run queue.\n"); 119 DRM_ERROR("Failed setting up VCN enc run queue.\n");
120 return r; 120 return r;
@@ -205,13 +205,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
205 struct amdgpu_device *adev = 205 struct amdgpu_device *adev =
206 container_of(work, struct amdgpu_device, vcn.idle_work.work); 206 container_of(work, struct amdgpu_device, vcn.idle_work.work);
207 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); 207 unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
208 unsigned i;
209
210 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
211 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
212 }
208 213
209 if (fences == 0) { 214 if (fences == 0) {
210 if (adev->pm.dpm_enabled) { 215 if (adev->pm.dpm_enabled)
211 /* might be used when with pg/cg
212 amdgpu_dpm_enable_uvd(adev, false); 216 amdgpu_dpm_enable_uvd(adev, false);
213 */ 217 else
214 } 218 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
219 AMD_PG_STATE_GATE);
215 } else { 220 } else {
216 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); 221 schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
217 } 222 }
@@ -223,9 +228,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
223 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); 228 bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
224 229
225 if (set_clocks && adev->pm.dpm_enabled) { 230 if (set_clocks && adev->pm.dpm_enabled) {
226 /* might be used when with pg/cg 231 if (adev->pm.dpm_enabled)
227 amdgpu_dpm_enable_uvd(adev, true); 232 amdgpu_dpm_enable_uvd(adev, true);
228 */ 233 else
234 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
235 AMD_PG_STATE_UNGATE);
229 } 236 }
230} 237}
231 238
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 2fd7db891689..181e6afa9847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,17 @@
45#define VCN_ENC_CMD_REG_WRITE 0x0000000b 45#define VCN_ENC_CMD_REG_WRITE 0x0000000b
46#define VCN_ENC_CMD_REG_WAIT 0x0000000c 46#define VCN_ENC_CMD_REG_WAIT 0x0000000c
47 47
48enum engine_status_constants {
49 UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
50 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
51 UVD_STATUS__UVD_BUSY = 0x00000004,
52 GB_ADDR_CONFIG_DEFAULT = 0x26010011,
53 UVD_STATUS__IDLE = 0x2,
54 UVD_STATUS__BUSY = 0x5,
55 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
56 UVD_STATUS__RBC_BUSY = 0x1,
57};
58
48struct amdgpu_vcn { 59struct amdgpu_vcn {
49 struct amdgpu_bo *vcpu_bo; 60 struct amdgpu_bo *vcpu_bo;
50 void *cpu_addr; 61 void *cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index da55a78d7380..ccba88cc8c54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -94,6 +94,34 @@ struct amdgpu_prt_cb {
94 struct dma_fence_cb cb; 94 struct dma_fence_cb cb;
95}; 95};
96 96
97static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
98 struct amdgpu_vm *vm,
99 struct amdgpu_bo *bo)
100{
101 base->vm = vm;
102 base->bo = bo;
103 INIT_LIST_HEAD(&base->bo_list);
104 INIT_LIST_HEAD(&base->vm_status);
105
106 if (!bo)
107 return;
108 list_add_tail(&base->bo_list, &bo->va);
109
110 if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
111 return;
112
113 if (bo->preferred_domains &
114 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
115 return;
116
117 /*
118 * we checked all the prerequisites, but it looks like this per vm bo
119 * is currently evicted. add the bo to the evicted list to make sure it
120 * is validated on next vm use to avoid fault.
121 * */
122 list_move_tail(&base->vm_status, &vm->evicted);
123}
124
97/** 125/**
98 * amdgpu_vm_level_shift - return the addr shift for each level 126 * amdgpu_vm_level_shift - return the addr shift for each level
99 * 127 *
@@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
196 void *param) 224 void *param)
197{ 225{
198 struct ttm_bo_global *glob = adev->mman.bdev.glob; 226 struct ttm_bo_global *glob = adev->mman.bdev.glob;
199 int r; 227 struct amdgpu_vm_bo_base *bo_base, *tmp;
228 int r = 0;
200 229
201 spin_lock(&vm->status_lock); 230 list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
202 while (!list_empty(&vm->evicted)) { 231 struct amdgpu_bo *bo = bo_base->bo;
203 struct amdgpu_vm_bo_base *bo_base;
204 struct amdgpu_bo *bo;
205 232
206 bo_base = list_first_entry(&vm->evicted,
207 struct amdgpu_vm_bo_base,
208 vm_status);
209 spin_unlock(&vm->status_lock);
210
211 bo = bo_base->bo;
212 BUG_ON(!bo);
213 if (bo->parent) { 233 if (bo->parent) {
214 r = validate(param, bo); 234 r = validate(param, bo);
215 if (r) 235 if (r)
216 return r; 236 break;
217 237
218 spin_lock(&glob->lru_lock); 238 spin_lock(&glob->lru_lock);
219 ttm_bo_move_to_lru_tail(&bo->tbo); 239 ttm_bo_move_to_lru_tail(&bo->tbo);
@@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
222 spin_unlock(&glob->lru_lock); 242 spin_unlock(&glob->lru_lock);
223 } 243 }
224 244
225 if (bo->tbo.type == ttm_bo_type_kernel && 245 if (bo->tbo.type != ttm_bo_type_kernel) {
226 vm->use_cpu_for_update) { 246 spin_lock(&vm->moved_lock);
227 r = amdgpu_bo_kmap(bo, NULL);
228 if (r)
229 return r;
230 }
231
232 spin_lock(&vm->status_lock);
233 if (bo->tbo.type != ttm_bo_type_kernel)
234 list_move(&bo_base->vm_status, &vm->moved); 247 list_move(&bo_base->vm_status, &vm->moved);
235 else 248 spin_unlock(&vm->moved_lock);
249 } else {
236 list_move(&bo_base->vm_status, &vm->relocated); 250 list_move(&bo_base->vm_status, &vm->relocated);
251 }
237 } 252 }
238 spin_unlock(&vm->status_lock);
239 253
240 return 0; 254 spin_lock(&glob->lru_lock);
255 list_for_each_entry(bo_base, &vm->idle, vm_status) {
256 struct amdgpu_bo *bo = bo_base->bo;
257
258 if (!bo->parent)
259 continue;
260
261 ttm_bo_move_to_lru_tail(&bo->tbo);
262 if (bo->shadow)
263 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
264 }
265 spin_unlock(&glob->lru_lock);
266
267 return r;
241} 268}
242 269
243/** 270/**
@@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
249 */ 276 */
250bool amdgpu_vm_ready(struct amdgpu_vm *vm) 277bool amdgpu_vm_ready(struct amdgpu_vm *vm)
251{ 278{
252 bool ready; 279 return list_empty(&vm->evicted);
253
254 spin_lock(&vm->status_lock);
255 ready = list_empty(&vm->evicted);
256 spin_unlock(&vm->status_lock);
257
258 return ready;
259} 280}
260 281
261/** 282/**
@@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
412 struct amdgpu_bo *pt; 433 struct amdgpu_bo *pt;
413 434
414 if (!entry->base.bo) { 435 if (!entry->base.bo) {
415 r = amdgpu_bo_create(adev, 436 struct amdgpu_bo_param bp;
416 amdgpu_vm_bo_size(adev, level), 437
417 AMDGPU_GPU_PAGE_SIZE, 438 memset(&bp, 0, sizeof(bp));
418 AMDGPU_GEM_DOMAIN_VRAM, flags, 439 bp.size = amdgpu_vm_bo_size(adev, level);
419 ttm_bo_type_kernel, resv, &pt); 440 bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
441 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
442 bp.flags = flags;
443 bp.type = ttm_bo_type_kernel;
444 bp.resv = resv;
445 r = amdgpu_bo_create(adev, &bp, &pt);
420 if (r) 446 if (r)
421 return r; 447 return r;
422 448
@@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
441 */ 467 */
442 pt->parent = amdgpu_bo_ref(parent->base.bo); 468 pt->parent = amdgpu_bo_ref(parent->base.bo);
443 469
444 entry->base.vm = vm; 470 amdgpu_vm_bo_base_init(&entry->base, vm, pt);
445 entry->base.bo = pt; 471 list_move(&entry->base.vm_status, &vm->relocated);
446 list_add_tail(&entry->base.bo_list, &pt->va);
447 spin_lock(&vm->status_lock);
448 list_add(&entry->base.vm_status, &vm->relocated);
449 spin_unlock(&vm->status_lock);
450 } 472 }
451 473
452 if (level < AMDGPU_VM_PTB) { 474 if (level < AMDGPU_VM_PTB) {
@@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
628 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 650 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
629 651
630 if (vm_flush_needed || pasid_mapping_needed) { 652 if (vm_flush_needed || pasid_mapping_needed) {
631 r = amdgpu_fence_emit(ring, &fence); 653 r = amdgpu_fence_emit(ring, &fence, 0);
632 if (r) 654 if (r)
633 return r; 655 return r;
634 } 656 }
@@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
893 if (!entry->base.bo) 915 if (!entry->base.bo)
894 continue; 916 continue;
895 917
896 spin_lock(&vm->status_lock); 918 if (!entry->base.moved)
897 if (list_empty(&entry->base.vm_status)) 919 list_move(&entry->base.vm_status, &vm->relocated);
898 list_add(&entry->base.vm_status, &vm->relocated);
899 spin_unlock(&vm->status_lock);
900 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); 920 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
901 } 921 }
902} 922}
@@ -926,6 +946,14 @@ restart:
926 params.adev = adev; 946 params.adev = adev;
927 947
928 if (vm->use_cpu_for_update) { 948 if (vm->use_cpu_for_update) {
949 struct amdgpu_vm_bo_base *bo_base;
950
951 list_for_each_entry(bo_base, &vm->relocated, vm_status) {
952 r = amdgpu_bo_kmap(bo_base->bo, NULL);
953 if (unlikely(r))
954 return r;
955 }
956
929 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 957 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
930 if (unlikely(r)) 958 if (unlikely(r))
931 return r; 959 return r;
@@ -941,7 +969,6 @@ restart:
941 params.func = amdgpu_vm_do_set_ptes; 969 params.func = amdgpu_vm_do_set_ptes;
942 } 970 }
943 971
944 spin_lock(&vm->status_lock);
945 while (!list_empty(&vm->relocated)) { 972 while (!list_empty(&vm->relocated)) {
946 struct amdgpu_vm_bo_base *bo_base, *parent; 973 struct amdgpu_vm_bo_base *bo_base, *parent;
947 struct amdgpu_vm_pt *pt, *entry; 974 struct amdgpu_vm_pt *pt, *entry;
@@ -950,14 +977,12 @@ restart:
950 bo_base = list_first_entry(&vm->relocated, 977 bo_base = list_first_entry(&vm->relocated,
951 struct amdgpu_vm_bo_base, 978 struct amdgpu_vm_bo_base,
952 vm_status); 979 vm_status);
953 list_del_init(&bo_base->vm_status); 980 bo_base->moved = false;
954 spin_unlock(&vm->status_lock); 981 list_move(&bo_base->vm_status, &vm->idle);
955 982
956 bo = bo_base->bo->parent; 983 bo = bo_base->bo->parent;
957 if (!bo) { 984 if (!bo)
958 spin_lock(&vm->status_lock);
959 continue; 985 continue;
960 }
961 986
962 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, 987 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
963 bo_list); 988 bo_list);
@@ -966,12 +991,10 @@ restart:
966 991
967 amdgpu_vm_update_pde(&params, vm, pt, entry); 992 amdgpu_vm_update_pde(&params, vm, pt, entry);
968 993
969 spin_lock(&vm->status_lock);
970 if (!vm->use_cpu_for_update && 994 if (!vm->use_cpu_for_update &&
971 (ndw - params.ib->length_dw) < 32) 995 (ndw - params.ib->length_dw) < 32)
972 break; 996 break;
973 } 997 }
974 spin_unlock(&vm->status_lock);
975 998
976 if (vm->use_cpu_for_update) { 999 if (vm->use_cpu_for_update) {
977 /* Flush HDP */ 1000 /* Flush HDP */
@@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1074 if (entry->huge) { 1097 if (entry->huge) {
1075 /* Add the entry to the relocated list to update it. */ 1098 /* Add the entry to the relocated list to update it. */
1076 entry->huge = false; 1099 entry->huge = false;
1077 spin_lock(&p->vm->status_lock);
1078 list_move(&entry->base.vm_status, &p->vm->relocated); 1100 list_move(&entry->base.vm_status, &p->vm->relocated);
1079 spin_unlock(&p->vm->status_lock);
1080 } 1101 }
1081 return; 1102 return;
1082 } 1103 }
@@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1555 amdgpu_asic_flush_hdp(adev, NULL); 1576 amdgpu_asic_flush_hdp(adev, NULL);
1556 } 1577 }
1557 1578
1558 spin_lock(&vm->status_lock); 1579 spin_lock(&vm->moved_lock);
1559 list_del_init(&bo_va->base.vm_status); 1580 list_del_init(&bo_va->base.vm_status);
1560 spin_unlock(&vm->status_lock); 1581 spin_unlock(&vm->moved_lock);
1582
1583 /* If the BO is not in its preferred location add it back to
1584 * the evicted list so that it gets validated again on the
1585 * next command submission.
1586 */
1587 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1588 uint32_t mem_type = bo->tbo.mem.mem_type;
1589
1590 if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
1591 list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1592 else
1593 list_add(&bo_va->base.vm_status, &vm->idle);
1594 }
1561 1595
1562 list_splice_init(&bo_va->invalids, &bo_va->valids); 1596 list_splice_init(&bo_va->invalids, &bo_va->valids);
1563 bo_va->cleared = clear; 1597 bo_va->cleared = clear;
@@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1766int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 1800int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1767 struct amdgpu_vm *vm) 1801 struct amdgpu_vm *vm)
1768{ 1802{
1803 struct amdgpu_bo_va *bo_va, *tmp;
1804 struct list_head moved;
1769 bool clear; 1805 bool clear;
1770 int r = 0; 1806 int r;
1771
1772 spin_lock(&vm->status_lock);
1773 while (!list_empty(&vm->moved)) {
1774 struct amdgpu_bo_va *bo_va;
1775 struct reservation_object *resv;
1776 1807
1777 bo_va = list_first_entry(&vm->moved, 1808 INIT_LIST_HEAD(&moved);
1778 struct amdgpu_bo_va, base.vm_status); 1809 spin_lock(&vm->moved_lock);
1779 spin_unlock(&vm->status_lock); 1810 list_splice_init(&vm->moved, &moved);
1811 spin_unlock(&vm->moved_lock);
1780 1812
1781 resv = bo_va->base.bo->tbo.resv; 1813 list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
1814 struct reservation_object *resv = bo_va->base.bo->tbo.resv;
1782 1815
1783 /* Per VM BOs never need to bo cleared in the page tables */ 1816 /* Per VM BOs never need to bo cleared in the page tables */
1784 if (resv == vm->root.base.bo->tbo.resv) 1817 if (resv == vm->root.base.bo->tbo.resv)
@@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1791 clear = true; 1824 clear = true;
1792 1825
1793 r = amdgpu_vm_bo_update(adev, bo_va, clear); 1826 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1794 if (r) 1827 if (r) {
1828 spin_lock(&vm->moved_lock);
1829 list_splice(&moved, &vm->moved);
1830 spin_unlock(&vm->moved_lock);
1795 return r; 1831 return r;
1832 }
1796 1833
1797 if (!clear && resv != vm->root.base.bo->tbo.resv) 1834 if (!clear && resv != vm->root.base.bo->tbo.resv)
1798 reservation_object_unlock(resv); 1835 reservation_object_unlock(resv);
1799 1836
1800 spin_lock(&vm->status_lock);
1801 } 1837 }
1802 spin_unlock(&vm->status_lock);
1803 1838
1804 return r; 1839 return 0;
1805} 1840}
1806 1841
1807/** 1842/**
@@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1827 if (bo_va == NULL) { 1862 if (bo_va == NULL) {
1828 return NULL; 1863 return NULL;
1829 } 1864 }
1830 bo_va->base.vm = vm; 1865 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1831 bo_va->base.bo = bo;
1832 INIT_LIST_HEAD(&bo_va->base.bo_list);
1833 INIT_LIST_HEAD(&bo_va->base.vm_status);
1834 1866
1835 bo_va->ref_count = 1; 1867 bo_va->ref_count = 1;
1836 INIT_LIST_HEAD(&bo_va->valids); 1868 INIT_LIST_HEAD(&bo_va->valids);
1837 INIT_LIST_HEAD(&bo_va->invalids); 1869 INIT_LIST_HEAD(&bo_va->invalids);
1838 1870
1839 if (!bo)
1840 return bo_va;
1841
1842 list_add_tail(&bo_va->base.bo_list, &bo->va);
1843
1844 if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
1845 return bo_va;
1846
1847 if (bo->preferred_domains &
1848 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
1849 return bo_va;
1850
1851 /*
1852 * We checked all the prerequisites, but it looks like this per VM BO
1853 * is currently evicted. add the BO to the evicted list to make sure it
1854 * is validated on next VM use to avoid fault.
1855 * */
1856 spin_lock(&vm->status_lock);
1857 list_move_tail(&bo_va->base.vm_status, &vm->evicted);
1858 spin_unlock(&vm->status_lock);
1859
1860 return bo_va; 1871 return bo_va;
1861} 1872}
1862 1873
@@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1884 if (mapping->flags & AMDGPU_PTE_PRT) 1895 if (mapping->flags & AMDGPU_PTE_PRT)
1885 amdgpu_vm_prt_get(adev); 1896 amdgpu_vm_prt_get(adev);
1886 1897
1887 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 1898 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1888 spin_lock(&vm->status_lock); 1899 !bo_va->base.moved) {
1889 if (list_empty(&bo_va->base.vm_status)) 1900 spin_lock(&vm->moved_lock);
1890 list_add(&bo_va->base.vm_status, &vm->moved); 1901 list_move(&bo_va->base.vm_status, &vm->moved);
1891 spin_unlock(&vm->status_lock); 1902 spin_unlock(&vm->moved_lock);
1892 } 1903 }
1893 trace_amdgpu_vm_bo_map(bo_va, mapping); 1904 trace_amdgpu_vm_bo_map(bo_va, mapping);
1894} 1905}
@@ -2198,9 +2209,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2198 2209
2199 list_del(&bo_va->base.bo_list); 2210 list_del(&bo_va->base.bo_list);
2200 2211
2201 spin_lock(&vm->status_lock); 2212 spin_lock(&vm->moved_lock);
2202 list_del(&bo_va->base.vm_status); 2213 list_del(&bo_va->base.vm_status);
2203 spin_unlock(&vm->status_lock); 2214 spin_unlock(&vm->moved_lock);
2204 2215
2205 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2216 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2206 list_del(&mapping->list); 2217 list_del(&mapping->list);
@@ -2234,33 +2245,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2234{ 2245{
2235 struct amdgpu_vm_bo_base *bo_base; 2246 struct amdgpu_vm_bo_base *bo_base;
2236 2247
2248 /* shadow bo doesn't have bo base, its validation needs its parent */
2249 if (bo->parent && bo->parent->shadow == bo)
2250 bo = bo->parent;
2251
2237 list_for_each_entry(bo_base, &bo->va, bo_list) { 2252 list_for_each_entry(bo_base, &bo->va, bo_list) {
2238 struct amdgpu_vm *vm = bo_base->vm; 2253 struct amdgpu_vm *vm = bo_base->vm;
2254 bool was_moved = bo_base->moved;
2239 2255
2240 bo_base->moved = true; 2256 bo_base->moved = true;
2241 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2257 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2242 spin_lock(&bo_base->vm->status_lock);
2243 if (bo->tbo.type == ttm_bo_type_kernel) 2258 if (bo->tbo.type == ttm_bo_type_kernel)
2244 list_move(&bo_base->vm_status, &vm->evicted); 2259 list_move(&bo_base->vm_status, &vm->evicted);
2245 else 2260 else
2246 list_move_tail(&bo_base->vm_status, 2261 list_move_tail(&bo_base->vm_status,
2247 &vm->evicted); 2262 &vm->evicted);
2248 spin_unlock(&bo_base->vm->status_lock);
2249 continue; 2263 continue;
2250 } 2264 }
2251 2265
2252 if (bo->tbo.type == ttm_bo_type_kernel) { 2266 if (was_moved)
2253 spin_lock(&bo_base->vm->status_lock);
2254 if (list_empty(&bo_base->vm_status))
2255 list_add(&bo_base->vm_status, &vm->relocated);
2256 spin_unlock(&bo_base->vm->status_lock);
2257 continue; 2267 continue;
2258 }
2259 2268
2260 spin_lock(&bo_base->vm->status_lock); 2269 if (bo->tbo.type == ttm_bo_type_kernel) {
2261 if (list_empty(&bo_base->vm_status)) 2270 list_move(&bo_base->vm_status, &vm->relocated);
2262 list_add(&bo_base->vm_status, &vm->moved); 2271 } else {
2263 spin_unlock(&bo_base->vm->status_lock); 2272 spin_lock(&bo_base->vm->moved_lock);
2273 list_move(&bo_base->vm_status, &vm->moved);
2274 spin_unlock(&bo_base->vm->moved_lock);
2275 }
2264 } 2276 }
2265} 2277}
2266 2278
@@ -2355,6 +2367,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2355int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2367int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2356 int vm_context, unsigned int pasid) 2368 int vm_context, unsigned int pasid)
2357{ 2369{
2370 struct amdgpu_bo_param bp;
2371 struct amdgpu_bo *root;
2358 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2372 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2359 AMDGPU_VM_PTE_COUNT(adev) * 8); 2373 AMDGPU_VM_PTE_COUNT(adev) * 8);
2360 unsigned ring_instance; 2374 unsigned ring_instance;
@@ -2367,10 +2381,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2367 vm->va = RB_ROOT_CACHED; 2381 vm->va = RB_ROOT_CACHED;
2368 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2382 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2369 vm->reserved_vmid[i] = NULL; 2383 vm->reserved_vmid[i] = NULL;
2370 spin_lock_init(&vm->status_lock);
2371 INIT_LIST_HEAD(&vm->evicted); 2384 INIT_LIST_HEAD(&vm->evicted);
2372 INIT_LIST_HEAD(&vm->relocated); 2385 INIT_LIST_HEAD(&vm->relocated);
2386 spin_lock_init(&vm->moved_lock);
2373 INIT_LIST_HEAD(&vm->moved); 2387 INIT_LIST_HEAD(&vm->moved);
2388 INIT_LIST_HEAD(&vm->idle);
2374 INIT_LIST_HEAD(&vm->freed); 2389 INIT_LIST_HEAD(&vm->freed);
2375 2390
2376 /* create scheduler entity for page table updates */ 2391 /* create scheduler entity for page table updates */
@@ -2380,7 +2395,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2380 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2395 ring = adev->vm_manager.vm_pte_rings[ring_instance];
2381 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 2396 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2382 r = drm_sched_entity_init(&ring->sched, &vm->entity, 2397 r = drm_sched_entity_init(&ring->sched, &vm->entity,
2383 rq, amdgpu_sched_jobs, NULL); 2398 rq, NULL);
2384 if (r) 2399 if (r)
2385 return r; 2400 return r;
2386 2401
@@ -2409,24 +2424,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2409 flags |= AMDGPU_GEM_CREATE_SHADOW; 2424 flags |= AMDGPU_GEM_CREATE_SHADOW;
2410 2425
2411 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2426 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2412 r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, 2427 memset(&bp, 0, sizeof(bp));
2413 ttm_bo_type_kernel, NULL, &vm->root.base.bo); 2428 bp.size = size;
2429 bp.byte_align = align;
2430 bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2431 bp.flags = flags;
2432 bp.type = ttm_bo_type_kernel;
2433 bp.resv = NULL;
2434 r = amdgpu_bo_create(adev, &bp, &root);
2414 if (r) 2435 if (r)
2415 goto error_free_sched_entity; 2436 goto error_free_sched_entity;
2416 2437
2417 r = amdgpu_bo_reserve(vm->root.base.bo, true); 2438 r = amdgpu_bo_reserve(root, true);
2418 if (r) 2439 if (r)
2419 goto error_free_root; 2440 goto error_free_root;
2420 2441
2421 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, 2442 r = amdgpu_vm_clear_bo(adev, vm, root,
2422 adev->vm_manager.root_level, 2443 adev->vm_manager.root_level,
2423 vm->pte_support_ats); 2444 vm->pte_support_ats);
2424 if (r) 2445 if (r)
2425 goto error_unreserve; 2446 goto error_unreserve;
2426 2447
2427 vm->root.base.vm = vm; 2448 amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2428 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
2429 list_add_tail(&vm->root.base.vm_status, &vm->evicted);
2430 amdgpu_bo_unreserve(vm->root.base.bo); 2449 amdgpu_bo_unreserve(vm->root.base.bo);
2431 2450
2432 if (pasid) { 2451 if (pasid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 30f080364c97..061b99a18cb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry;
75/* PDE Block Fragment Size for VEGA10 */ 75/* PDE Block Fragment Size for VEGA10 */
76#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) 76#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
77 77
78/* VEGA10 only */ 78
79/* For GFX9 */
79#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) 80#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
80#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) 81#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
81 82
82/* For Raven */ 83#define AMDGPU_MTYPE_NC 0
83#define AMDGPU_MTYPE_CC 2 84#define AMDGPU_MTYPE_CC 2
84 85
85#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ 86#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \
@@ -167,9 +168,6 @@ struct amdgpu_vm {
167 /* tree of virtual addresses mapped */ 168 /* tree of virtual addresses mapped */
168 struct rb_root_cached va; 169 struct rb_root_cached va;
169 170
170 /* protecting invalidated */
171 spinlock_t status_lock;
172
173 /* BOs who needs a validation */ 171 /* BOs who needs a validation */
174 struct list_head evicted; 172 struct list_head evicted;
175 173
@@ -178,6 +176,10 @@ struct amdgpu_vm {
178 176
179 /* BOs moved, but not yet updated in the PT */ 177 /* BOs moved, but not yet updated in the PT */
180 struct list_head moved; 178 struct list_head moved;
179 spinlock_t moved_lock;
180
181 /* All BOs of this VM not currently in the state machine */
182 struct list_head idle;
181 183
182 /* BO mappings freed, but not yet updated in the PT */ 184 /* BO mappings freed, but not yet updated in the PT */
183 struct list_head freed; 185 struct list_head freed;
@@ -186,9 +188,6 @@ struct amdgpu_vm {
186 struct amdgpu_vm_pt root; 188 struct amdgpu_vm_pt root;
187 struct dma_fence *last_update; 189 struct dma_fence *last_update;
188 190
189 /* protecting freed */
190 spinlock_t freed_lock;
191
192 /* Scheduler entity for page table updates */ 191 /* Scheduler entity for page table updates */
193 struct drm_sched_entity entity; 192 struct drm_sched_entity entity;
194 193
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 47ef3e6e7178..a266dcf5daed 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
5903 pi->pcie_dpm_key_disabled = 0; 5903 pi->pcie_dpm_key_disabled = 0;
5904 pi->thermal_sclk_dpm_enabled = 0; 5904 pi->thermal_sclk_dpm_enabled = 0;
5905 5905
5906 if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) 5906 if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
5907 pi->caps_sclk_ds = true; 5907 pi->caps_sclk_ds = true;
5908 else 5908 else
5909 pi->caps_sclk_ds = false; 5909 pi->caps_sclk_ds = false;
@@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle)
6255 int ret; 6255 int ret;
6256 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6256 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6257 6257
6258 if (!amdgpu_dpm) 6258 if (!adev->pm.dpm_enabled)
6259 return 0; 6259 return 0;
6260 6260
6261 /* init the sysfs and debugfs files late */ 6261 /* init the sysfs and debugfs files late */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 0df22030e713..8ff4c60d1b59 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev,
1735 } 1735 }
1736} 1736}
1737 1737
1738static bool cik_need_full_reset(struct amdgpu_device *adev)
1739{
1740 /* change this when we support soft reset */
1741 return true;
1742}
1743
1738static const struct amdgpu_asic_funcs cik_asic_funcs = 1744static const struct amdgpu_asic_funcs cik_asic_funcs =
1739{ 1745{
1740 .read_disabled_bios = &cik_read_disabled_bios, 1746 .read_disabled_bios = &cik_read_disabled_bios,
@@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
1748 .get_config_memsize = &cik_get_config_memsize, 1754 .get_config_memsize = &cik_get_config_memsize,
1749 .flush_hdp = &cik_flush_hdp, 1755 .flush_hdp = &cik_flush_hdp,
1750 .invalidate_hdp = &cik_invalidate_hdp, 1756 .invalidate_hdp = &cik_invalidate_hdp,
1757 .need_full_reset = &cik_need_full_reset,
1751}; 1758};
1752 1759
1753static int cik_common_early_init(void *handle) 1760static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 452f88ea46a2..ada241bfeee9 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
1823 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 1823 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1824 struct drm_device *dev = crtc->dev; 1824 struct drm_device *dev = crtc->dev;
1825 struct amdgpu_device *adev = dev->dev_private; 1825 struct amdgpu_device *adev = dev->dev_private;
1826 struct amdgpu_framebuffer *amdgpu_fb;
1827 struct drm_framebuffer *target_fb; 1826 struct drm_framebuffer *target_fb;
1828 struct drm_gem_object *obj; 1827 struct drm_gem_object *obj;
1829 struct amdgpu_bo *abo; 1828 struct amdgpu_bo *abo;
@@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
1842 return 0; 1841 return 0;
1843 } 1842 }
1844 1843
1845 if (atomic) { 1844 if (atomic)
1846 amdgpu_fb = to_amdgpu_framebuffer(fb);
1847 target_fb = fb; 1845 target_fb = fb;
1848 } else { 1846 else
1849 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
1850 target_fb = crtc->primary->fb; 1847 target_fb = crtc->primary->fb;
1851 }
1852 1848
1853 /* If atomic, assume fb object is pinned & idle & fenced and 1849 /* If atomic, assume fb object is pinned & idle & fenced and
1854 * just update base pointers 1850 * just update base pointers
1855 */ 1851 */
1856 obj = amdgpu_fb->obj; 1852 obj = target_fb->obj[0];
1857 abo = gem_to_amdgpu_bo(obj); 1853 abo = gem_to_amdgpu_bo(obj);
1858 r = amdgpu_bo_reserve(abo, false); 1854 r = amdgpu_bo_reserve(abo, false);
1859 if (unlikely(r != 0)) 1855 if (unlikely(r != 0))
@@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
2043 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); 2039 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
2044 2040
2045 if (!atomic && fb && fb != crtc->primary->fb) { 2041 if (!atomic && fb && fb != crtc->primary->fb) {
2046 amdgpu_fb = to_amdgpu_framebuffer(fb); 2042 abo = gem_to_amdgpu_bo(fb->obj[0]);
2047 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2048 r = amdgpu_bo_reserve(abo, true); 2043 r = amdgpu_bo_reserve(abo, true);
2049 if (unlikely(r != 0)) 2044 if (unlikely(r != 0))
2050 return r; 2045 return r;
@@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
2526 dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); 2521 dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
2527 if (crtc->primary->fb) { 2522 if (crtc->primary->fb) {
2528 int r; 2523 int r;
2529 struct amdgpu_framebuffer *amdgpu_fb;
2530 struct amdgpu_bo *abo; 2524 struct amdgpu_bo *abo;
2531 2525
2532 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2526 abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
2533 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2534 r = amdgpu_bo_reserve(abo, true); 2527 r = amdgpu_bo_reserve(abo, true);
2535 if (unlikely(r)) 2528 if (unlikely(r))
2536 DRM_ERROR("failed to reserve abo before unpin\n"); 2529 DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index a7c1c584a191..a5b96eac3033 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
173 ARRAY_SIZE(polaris11_golden_settings_a11)); 173 ARRAY_SIZE(polaris11_golden_settings_a11));
174 break; 174 break;
175 case CHIP_POLARIS10: 175 case CHIP_POLARIS10:
176 case CHIP_VEGAM:
176 amdgpu_device_program_register_sequence(adev, 177 amdgpu_device_program_register_sequence(adev,
177 polaris10_golden_settings_a11, 178 polaris10_golden_settings_a11,
178 ARRAY_SIZE(polaris10_golden_settings_a11)); 179 ARRAY_SIZE(polaris10_golden_settings_a11));
@@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev)
473 num_crtc = 2; 474 num_crtc = 2;
474 break; 475 break;
475 case CHIP_POLARIS10: 476 case CHIP_POLARIS10:
477 case CHIP_VEGAM:
476 num_crtc = 6; 478 num_crtc = 6;
477 break; 479 break;
478 case CHIP_POLARIS11: 480 case CHIP_POLARIS11:
@@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)
1445 adev->mode_info.audio.num_pins = 7; 1447 adev->mode_info.audio.num_pins = 7;
1446 break; 1448 break;
1447 case CHIP_POLARIS10: 1449 case CHIP_POLARIS10:
1450 case CHIP_VEGAM:
1448 adev->mode_info.audio.num_pins = 8; 1451 adev->mode_info.audio.num_pins = 8;
1449 break; 1452 break;
1450 case CHIP_POLARIS11: 1453 case CHIP_POLARIS11:
@@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
1862 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 1865 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1863 struct drm_device *dev = crtc->dev; 1866 struct drm_device *dev = crtc->dev;
1864 struct amdgpu_device *adev = dev->dev_private; 1867 struct amdgpu_device *adev = dev->dev_private;
1865 struct amdgpu_framebuffer *amdgpu_fb;
1866 struct drm_framebuffer *target_fb; 1868 struct drm_framebuffer *target_fb;
1867 struct drm_gem_object *obj; 1869 struct drm_gem_object *obj;
1868 struct amdgpu_bo *abo; 1870 struct amdgpu_bo *abo;
@@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
1881 return 0; 1883 return 0;
1882 } 1884 }
1883 1885
1884 if (atomic) { 1886 if (atomic)
1885 amdgpu_fb = to_amdgpu_framebuffer(fb);
1886 target_fb = fb; 1887 target_fb = fb;
1887 } else { 1888 else
1888 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
1889 target_fb = crtc->primary->fb; 1889 target_fb = crtc->primary->fb;
1890 }
1891 1890
1892 /* If atomic, assume fb object is pinned & idle & fenced and 1891 /* If atomic, assume fb object is pinned & idle & fenced and
1893 * just update base pointers 1892 * just update base pointers
1894 */ 1893 */
1895 obj = amdgpu_fb->obj; 1894 obj = target_fb->obj[0];
1896 abo = gem_to_amdgpu_bo(obj); 1895 abo = gem_to_amdgpu_bo(obj);
1897 r = amdgpu_bo_reserve(abo, false); 1896 r = amdgpu_bo_reserve(abo, false);
1898 if (unlikely(r != 0)) 1897 if (unlikely(r != 0))
@@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
2082 WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); 2081 WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
2083 2082
2084 if (!atomic && fb && fb != crtc->primary->fb) { 2083 if (!atomic && fb && fb != crtc->primary->fb) {
2085 amdgpu_fb = to_amdgpu_framebuffer(fb); 2084 abo = gem_to_amdgpu_bo(fb->obj[0]);
2086 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2087 r = amdgpu_bo_reserve(abo, true); 2085 r = amdgpu_bo_reserve(abo, true);
2088 if (unlikely(r != 0)) 2086 if (unlikely(r != 0))
2089 return r; 2087 return r;
@@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
2253 2251
2254 if ((adev->asic_type == CHIP_POLARIS10) || 2252 if ((adev->asic_type == CHIP_POLARIS10) ||
2255 (adev->asic_type == CHIP_POLARIS11) || 2253 (adev->asic_type == CHIP_POLARIS11) ||
2256 (adev->asic_type == CHIP_POLARIS12)) { 2254 (adev->asic_type == CHIP_POLARIS12) ||
2255 (adev->asic_type == CHIP_VEGAM)) {
2257 struct amdgpu_encoder *amdgpu_encoder = 2256 struct amdgpu_encoder *amdgpu_encoder =
2258 to_amdgpu_encoder(amdgpu_crtc->encoder); 2257 to_amdgpu_encoder(amdgpu_crtc->encoder);
2259 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; 2258 struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc)
2601 dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); 2600 dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
2602 if (crtc->primary->fb) { 2601 if (crtc->primary->fb) {
2603 int r; 2602 int r;
2604 struct amdgpu_framebuffer *amdgpu_fb;
2605 struct amdgpu_bo *abo; 2603 struct amdgpu_bo *abo;
2606 2604
2607 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2605 abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
2608 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2609 r = amdgpu_bo_reserve(abo, true); 2606 r = amdgpu_bo_reserve(abo, true);
2610 if (unlikely(r)) 2607 if (unlikely(r))
2611 DRM_ERROR("failed to reserve abo before unpin\n"); 2608 DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
2673 2670
2674 if ((adev->asic_type == CHIP_POLARIS10) || 2671 if ((adev->asic_type == CHIP_POLARIS10) ||
2675 (adev->asic_type == CHIP_POLARIS11) || 2672 (adev->asic_type == CHIP_POLARIS11) ||
2676 (adev->asic_type == CHIP_POLARIS12)) { 2673 (adev->asic_type == CHIP_POLARIS12) ||
2674 (adev->asic_type == CHIP_VEGAM)) {
2677 struct amdgpu_encoder *amdgpu_encoder = 2675 struct amdgpu_encoder *amdgpu_encoder =
2678 to_amdgpu_encoder(amdgpu_crtc->encoder); 2676 to_amdgpu_encoder(amdgpu_crtc->encoder);
2679 int encoder_mode = 2677 int encoder_mode =
@@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle)
2830 adev->mode_info.num_dig = 9; 2828 adev->mode_info.num_dig = 9;
2831 break; 2829 break;
2832 case CHIP_POLARIS10: 2830 case CHIP_POLARIS10:
2831 case CHIP_VEGAM:
2833 adev->mode_info.num_hpd = 6; 2832 adev->mode_info.num_hpd = 6;
2834 adev->mode_info.num_dig = 6; 2833 adev->mode_info.num_dig = 6;
2835 break; 2834 break;
@@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle)
2949 amdgpu_atombios_encoder_init_dig(adev); 2948 amdgpu_atombios_encoder_init_dig(adev);
2950 if ((adev->asic_type == CHIP_POLARIS10) || 2949 if ((adev->asic_type == CHIP_POLARIS10) ||
2951 (adev->asic_type == CHIP_POLARIS11) || 2950 (adev->asic_type == CHIP_POLARIS11) ||
2952 (adev->asic_type == CHIP_POLARIS12)) { 2951 (adev->asic_type == CHIP_POLARIS12) ||
2952 (adev->asic_type == CHIP_VEGAM)) {
2953 amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, 2953 amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
2954 DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); 2954 DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
2955 amdgpu_atombios_crtc_set_dce_clock(adev, 0, 2955 amdgpu_atombios_crtc_set_dce_clock(adev, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 9f67b7fd3487..394cc1e8fe20 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
1780 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 1780 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1781 struct drm_device *dev = crtc->dev; 1781 struct drm_device *dev = crtc->dev;
1782 struct amdgpu_device *adev = dev->dev_private; 1782 struct amdgpu_device *adev = dev->dev_private;
1783 struct amdgpu_framebuffer *amdgpu_fb;
1784 struct drm_framebuffer *target_fb; 1783 struct drm_framebuffer *target_fb;
1785 struct drm_gem_object *obj; 1784 struct drm_gem_object *obj;
1786 struct amdgpu_bo *abo; 1785 struct amdgpu_bo *abo;
@@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
1798 return 0; 1797 return 0;
1799 } 1798 }
1800 1799
1801 if (atomic) { 1800 if (atomic)
1802 amdgpu_fb = to_amdgpu_framebuffer(fb);
1803 target_fb = fb; 1801 target_fb = fb;
1804 } else { 1802 else
1805 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
1806 target_fb = crtc->primary->fb; 1803 target_fb = crtc->primary->fb;
1807 }
1808 1804
1809 /* If atomic, assume fb object is pinned & idle & fenced and 1805 /* If atomic, assume fb object is pinned & idle & fenced and
1810 * just update base pointers 1806 * just update base pointers
1811 */ 1807 */
1812 obj = amdgpu_fb->obj; 1808 obj = target_fb->obj[0];
1813 abo = gem_to_amdgpu_bo(obj); 1809 abo = gem_to_amdgpu_bo(obj);
1814 r = amdgpu_bo_reserve(abo, false); 1810 r = amdgpu_bo_reserve(abo, false);
1815 if (unlikely(r != 0)) 1811 if (unlikely(r != 0))
@@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
1978 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); 1974 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
1979 1975
1980 if (!atomic && fb && fb != crtc->primary->fb) { 1976 if (!atomic && fb && fb != crtc->primary->fb) {
1981 amdgpu_fb = to_amdgpu_framebuffer(fb); 1977 abo = gem_to_amdgpu_bo(fb->obj[0]);
1982 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
1983 r = amdgpu_bo_reserve(abo, true); 1978 r = amdgpu_bo_reserve(abo, true);
1984 if (unlikely(r != 0)) 1979 if (unlikely(r != 0))
1985 return r; 1980 return r;
@@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
2414 dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); 2409 dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
2415 if (crtc->primary->fb) { 2410 if (crtc->primary->fb) {
2416 int r; 2411 int r;
2417 struct amdgpu_framebuffer *amdgpu_fb;
2418 struct amdgpu_bo *abo; 2412 struct amdgpu_bo *abo;
2419 2413
2420 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2414 abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
2421 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2422 r = amdgpu_bo_reserve(abo, true); 2415 r = amdgpu_bo_reserve(abo, true);
2423 if (unlikely(r)) 2416 if (unlikely(r))
2424 DRM_ERROR("failed to reserve abo before unpin\n"); 2417 DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index f55422cbd77a..c9b9ab8f1b05 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
1754 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 1754 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1755 struct drm_device *dev = crtc->dev; 1755 struct drm_device *dev = crtc->dev;
1756 struct amdgpu_device *adev = dev->dev_private; 1756 struct amdgpu_device *adev = dev->dev_private;
1757 struct amdgpu_framebuffer *amdgpu_fb;
1758 struct drm_framebuffer *target_fb; 1757 struct drm_framebuffer *target_fb;
1759 struct drm_gem_object *obj; 1758 struct drm_gem_object *obj;
1760 struct amdgpu_bo *abo; 1759 struct amdgpu_bo *abo;
@@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
1773 return 0; 1772 return 0;
1774 } 1773 }
1775 1774
1776 if (atomic) { 1775 if (atomic)
1777 amdgpu_fb = to_amdgpu_framebuffer(fb);
1778 target_fb = fb; 1776 target_fb = fb;
1779 } else { 1777 else
1780 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
1781 target_fb = crtc->primary->fb; 1778 target_fb = crtc->primary->fb;
1782 }
1783 1779
1784 /* If atomic, assume fb object is pinned & idle & fenced and 1780 /* If atomic, assume fb object is pinned & idle & fenced and
1785 * just update base pointers 1781 * just update base pointers
1786 */ 1782 */
1787 obj = amdgpu_fb->obj; 1783 obj = target_fb->obj[0];
1788 abo = gem_to_amdgpu_bo(obj); 1784 abo = gem_to_amdgpu_bo(obj);
1789 r = amdgpu_bo_reserve(abo, false); 1785 r = amdgpu_bo_reserve(abo, false);
1790 if (unlikely(r != 0)) 1786 if (unlikely(r != 0))
@@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
1955 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); 1951 WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
1956 1952
1957 if (!atomic && fb && fb != crtc->primary->fb) { 1953 if (!atomic && fb && fb != crtc->primary->fb) {
1958 amdgpu_fb = to_amdgpu_framebuffer(fb); 1954 abo = gem_to_amdgpu_bo(fb->obj[0]);
1959 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
1960 r = amdgpu_bo_reserve(abo, true); 1955 r = amdgpu_bo_reserve(abo, true);
1961 if (unlikely(r != 0)) 1956 if (unlikely(r != 0))
1962 return r; 1957 return r;
@@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
2430 dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); 2425 dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
2431 if (crtc->primary->fb) { 2426 if (crtc->primary->fb) {
2432 int r; 2427 int r;
2433 struct amdgpu_framebuffer *amdgpu_fb;
2434 struct amdgpu_bo *abo; 2428 struct amdgpu_bo *abo;
2435 2429
2436 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 2430 abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
2437 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
2438 r = amdgpu_bo_reserve(abo, true); 2431 r = amdgpu_bo_reserve(abo, true);
2439 if (unlikely(r)) 2432 if (unlikely(r))
2440 DRM_ERROR("failed to reserve abo before unpin\n"); 2433 DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index b51f05dc9582..dbf2ccd0c744 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
168 dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); 168 dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
169 if (crtc->primary->fb) { 169 if (crtc->primary->fb) {
170 int r; 170 int r;
171 struct amdgpu_framebuffer *amdgpu_fb;
172 struct amdgpu_bo *abo; 171 struct amdgpu_bo *abo;
173 172
174 amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); 173 abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
175 abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
176 r = amdgpu_bo_reserve(abo, true); 174 r = amdgpu_bo_reserve(abo, true);
177 if (unlikely(r)) 175 if (unlikely(r))
178 DRM_ERROR("failed to reserve abo before unpin\n"); 176 DRM_ERROR("failed to reserve abo before unpin\n");
@@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector)
329 return 0; 327 return 0;
330} 328}
331 329
332static int dce_virtual_mode_valid(struct drm_connector *connector, 330static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
333 struct drm_display_mode *mode) 331 struct drm_display_mode *mode)
334{ 332{
335 return MODE_OK; 333 return MODE_OK;
@@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle)
462 break; 460 break;
463 case CHIP_CARRIZO: 461 case CHIP_CARRIZO:
464 case CHIP_STONEY: 462 case CHIP_STONEY:
465 case CHIP_POLARIS11:
466 case CHIP_POLARIS10: 463 case CHIP_POLARIS10:
464 case CHIP_POLARIS11:
465 case CHIP_VEGAM:
467 dce_v11_0_disable_dce(adev); 466 dce_v11_0_disable_dce(adev);
468 break; 467 break;
469 case CHIP_TOPAZ: 468 case CHIP_TOPAZ:
@@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
474 break; 473 break;
475 case CHIP_VEGA10: 474 case CHIP_VEGA10:
476 case CHIP_VEGA12: 475 case CHIP_VEGA12:
476 case CHIP_VEGA20:
477 break; 477 break;
478 default: 478 default:
479 DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); 479 DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 000000000000..9935371db7ce
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,120 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include "amdgpu.h"
24#include "df_v1_7.h"
25
26#include "df/df_1_7_default.h"
27#include "df/df_1_7_offset.h"
28#include "df/df_1_7_sh_mask.h"
29
30static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
31
32static void df_v1_7_init (struct amdgpu_device *adev)
33{
34}
35
36static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
37 bool enable)
38{
39 u32 tmp;
40
41 if (enable) {
42 tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
43 tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
44 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
45 } else
46 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
47 mmFabricConfigAccessControl_DEFAULT);
48}
49
50static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
51{
52 u32 tmp;
53
54 tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
55 tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
56 tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
57
58 return tmp;
59}
60
61static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
62{
63 int fb_channel_number;
64
65 fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
66
67 return df_v1_7_channel_number[fb_channel_number];
68}
69
70static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
71 bool enable)
72{
73 u32 tmp;
74
75 /* Put DF on broadcast mode */
76 adev->df_funcs->enable_broadcast_mode(adev, true);
77
78 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
79 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
80 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
81 tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
82 WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
83 } else {
84 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
85 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
86 tmp |= DF_V1_7_MGCG_DISABLE;
87 WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
88 }
89
90 /* Exit boradcast mode */
91 adev->df_funcs->enable_broadcast_mode(adev, false);
92}
93
94static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
95 u32 *flags)
96{
97 u32 tmp;
98
99 /* AMD_CG_SUPPORT_DF_MGCG */
100 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
101 if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
102 *flags |= AMD_CG_SUPPORT_DF_MGCG;
103}
104
105static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
106 bool enable)
107{
108 WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
109 ForceParWrRMW, enable);
110}
111
112const struct amdgpu_df_funcs df_v1_7_funcs = {
113 .init = df_v1_7_init,
114 .enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
115 .get_fb_channel_number = df_v1_7_get_fb_channel_number,
116 .get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
117 .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
118 .get_clockgating_state = df_v1_7_get_clockgating_state,
119 .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
120};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
new file mode 100644
index 000000000000..74621104c487
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __DF_V1_7_H__
25#define __DF_V1_7_H__
26
27#include "soc15_common.h"
28enum DF_V1_7_MGCG
29{
30 DF_V1_7_MGCG_DISABLE = 0,
31 DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
32 DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
33 DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
34 DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
35 DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
36};
37
38extern const struct amdgpu_df_funcs df_v1_7_funcs;
39
40#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 000000000000..60608b3df881
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,116 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include "amdgpu.h"
24#include "df_v3_6.h"
25
26#include "df/df_3_6_default.h"
27#include "df/df_3_6_offset.h"
28#include "df/df_3_6_sh_mask.h"
29
30static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
31 16, 32, 0, 0, 0, 2, 4, 8};
32
33static void df_v3_6_init(struct amdgpu_device *adev)
34{
35}
36
37static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
38 bool enable)
39{
40 u32 tmp;
41
42 if (enable) {
43 tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
44 tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
45 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
46 } else
47 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
48 mmFabricConfigAccessControl_DEFAULT);
49}
50
51static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
52{
53 u32 tmp;
54
55 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
56 tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
57 tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
58
59 return tmp;
60}
61
62static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
63{
64 int fb_channel_number;
65
66 fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
67 if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number))
68 fb_channel_number = 0;
69
70 return df_v3_6_channel_number[fb_channel_number];
71}
72
73static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
74 bool enable)
75{
76 u32 tmp;
77
78 /* Put DF on broadcast mode */
79 adev->df_funcs->enable_broadcast_mode(adev, true);
80
81 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
82 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
83 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
84 tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
85 WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
86 } else {
87 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
88 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
89 tmp |= DF_V3_6_MGCG_DISABLE;
90 WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
91 }
92
93 /* Exit broadcast mode */
94 adev->df_funcs->enable_broadcast_mode(adev, false);
95}
96
97static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
98 u32 *flags)
99{
100 u32 tmp;
101
102 /* AMD_CG_SUPPORT_DF_MGCG */
103 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
104 if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
105 *flags |= AMD_CG_SUPPORT_DF_MGCG;
106}
107
108const struct amdgpu_df_funcs df_v3_6_funcs = {
109 .init = df_v3_6_init,
110 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
111 .get_fb_channel_number = df_v3_6_get_fb_channel_number,
112 .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
113 .update_medium_grain_clock_gating =
114 df_v3_6_update_medium_grain_clock_gating,
115 .get_clockgating_state = df_v3_6_get_clockgating_state,
116};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 000000000000..e79c58e5efcb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __DF_V3_6_H__
25#define __DF_V3_6_H__
26
27#include "soc15_common.h"
28
29enum DF_V3_6_MGCG {
30 DF_V3_6_MGCG_DISABLE = 0,
31 DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
32 DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
33 DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
34 DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
35 DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
36};
37
38extern const struct amdgpu_df_funcs df_v3_6_funcs;
39
40#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e14263fca1c9..818874b13c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 127
128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 128MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 129MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 130MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 137MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 138MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151 139
140MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
141MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
142MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
143MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
144MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
145MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
146MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
147MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
148MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
149MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
150MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
151
152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163 163
164MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
165MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
166MODULE_FIRMWARE("amdgpu/vegam_me.bin");
167MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
168MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
169MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
170
164static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 171static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165{ 172{
166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 173 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] =
292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 299 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293}; 300};
294 301
302static const u32 golden_settings_vegam_a11[] =
303{
304 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x01180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321};
322
323static const u32 vegam_golden_common_all[] =
324{
325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
327 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
330 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
331};
332
295static const u32 golden_settings_polaris11_a11[] = 333static const u32 golden_settings_polaris11_a11[] =
296{ 334{
297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 335 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
712 tonga_golden_common_all, 750 tonga_golden_common_all,
713 ARRAY_SIZE(tonga_golden_common_all)); 751 ARRAY_SIZE(tonga_golden_common_all));
714 break; 752 break;
753 case CHIP_VEGAM:
754 amdgpu_device_program_register_sequence(adev,
755 golden_settings_vegam_a11,
756 ARRAY_SIZE(golden_settings_vegam_a11));
757 amdgpu_device_program_register_sequence(adev,
758 vegam_golden_common_all,
759 ARRAY_SIZE(vegam_golden_common_all));
760 break;
715 case CHIP_POLARIS11: 761 case CHIP_POLARIS11:
716 case CHIP_POLARIS12: 762 case CHIP_POLARIS12:
717 amdgpu_device_program_register_sequence(adev, 763 amdgpu_device_program_register_sequence(adev,
@@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
918 case CHIP_FIJI: 964 case CHIP_FIJI:
919 chip_name = "fiji"; 965 chip_name = "fiji";
920 break; 966 break;
921 case CHIP_POLARIS11: 967 case CHIP_STONEY:
922 chip_name = "polaris11"; 968 chip_name = "stoney";
923 break; 969 break;
924 case CHIP_POLARIS10: 970 case CHIP_POLARIS10:
925 chip_name = "polaris10"; 971 chip_name = "polaris10";
926 break; 972 break;
973 case CHIP_POLARIS11:
974 chip_name = "polaris11";
975 break;
927 case CHIP_POLARIS12: 976 case CHIP_POLARIS12:
928 chip_name = "polaris12"; 977 chip_name = "polaris12";
929 break; 978 break;
930 case CHIP_STONEY: 979 case CHIP_VEGAM:
931 chip_name = "stoney"; 980 chip_name = "vegam";
932 break; 981 break;
933 default: 982 default:
934 BUG(); 983 BUG();
@@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1770 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1819 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1771 break; 1820 break;
1772 case CHIP_POLARIS10: 1821 case CHIP_POLARIS10:
1822 case CHIP_VEGAM:
1773 ret = amdgpu_atombios_get_gfx_info(adev); 1823 ret = amdgpu_atombios_get_gfx_info(adev);
1774 if (ret) 1824 if (ret)
1775 return ret; 1825 return ret;
@@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle)
1957 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958 2008
1959 switch (adev->asic_type) { 2009 switch (adev->asic_type) {
1960 case CHIP_FIJI:
1961 case CHIP_TONGA: 2010 case CHIP_TONGA:
2011 case CHIP_CARRIZO:
2012 case CHIP_FIJI:
2013 case CHIP_POLARIS10:
1962 case CHIP_POLARIS11: 2014 case CHIP_POLARIS11:
1963 case CHIP_POLARIS12: 2015 case CHIP_POLARIS12:
1964 case CHIP_POLARIS10: 2016 case CHIP_VEGAM:
1965 case CHIP_CARRIZO:
1966 adev->gfx.mec.num_mec = 2; 2017 adev->gfx.mec.num_mec = 2;
1967 break; 2018 break;
1968 case CHIP_TOPAZ: 2019 case CHIP_TOPAZ:
@@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2323 2374
2324 break; 2375 break;
2325 case CHIP_FIJI: 2376 case CHIP_FIJI:
2377 case CHIP_VEGAM:
2326 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2378 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3504{ 3556{
3505 switch (adev->asic_type) { 3557 switch (adev->asic_type) {
3506 case CHIP_FIJI: 3558 case CHIP_FIJI:
3559 case CHIP_VEGAM:
3507 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3560 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3508 RB_XSEL2(1) | PKR_MAP(2) | 3561 RB_XSEL2(1) | PKR_MAP(2) |
3509 PKR_XSEL(1) | PKR_YSEL(1) | 3562 PKR_XSEL(1) | PKR_YSEL(1) |
@@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4071 gfx_v8_0_init_power_gating(adev); 4124 gfx_v8_0_init_power_gating(adev);
4072 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4125 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4073 } else if ((adev->asic_type == CHIP_POLARIS11) || 4126 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4074 (adev->asic_type == CHIP_POLARIS12)) { 4127 (adev->asic_type == CHIP_POLARIS12) ||
4128 (adev->asic_type == CHIP_VEGAM)) {
4075 gfx_v8_0_init_csb(adev); 4129 gfx_v8_0_init_csb(adev);
4076 gfx_v8_0_init_save_restore_list(adev); 4130 gfx_v8_0_init_save_restore_list(adev);
4077 gfx_v8_0_enable_save_restore_machine(adev); 4131 gfx_v8_0_enable_save_restore_machine(adev);
@@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4146 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4200 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4147 if (adev->asic_type == CHIP_POLARIS11 || 4201 if (adev->asic_type == CHIP_POLARIS11 ||
4148 adev->asic_type == CHIP_POLARIS10 || 4202 adev->asic_type == CHIP_POLARIS10 ||
4149 adev->asic_type == CHIP_POLARIS12) { 4203 adev->asic_type == CHIP_POLARIS12 ||
4204 adev->asic_type == CHIP_VEGAM) {
4150 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4205 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4151 tmp &= ~0x3; 4206 tmp &= ~0x3;
4152 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4207 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
5498 bool enable) 5553 bool enable)
5499{ 5554{
5500 if ((adev->asic_type == CHIP_POLARIS11) || 5555 if ((adev->asic_type == CHIP_POLARIS11) ||
5501 (adev->asic_type == CHIP_POLARIS12)) 5556 (adev->asic_type == CHIP_POLARIS12) ||
5557 (adev->asic_type == CHIP_VEGAM))
5502 /* Send msg to SMU via Powerplay */ 5558 /* Send msg to SMU via Powerplay */
5503 amdgpu_device_ip_set_powergating_state(adev, 5559 amdgpu_device_ip_set_powergating_state(adev,
5504 AMD_IP_BLOCK_TYPE_SMC, 5560 AMD_IP_BLOCK_TYPE_SMC,
@@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
5588 break; 5644 break;
5589 case CHIP_POLARIS11: 5645 case CHIP_POLARIS11:
5590 case CHIP_POLARIS12: 5646 case CHIP_POLARIS12:
5647 case CHIP_VEGAM:
5591 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5649 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593 else 5650 else
@@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle,
6154 case CHIP_POLARIS10: 6211 case CHIP_POLARIS10:
6155 case CHIP_POLARIS11: 6212 case CHIP_POLARIS11:
6156 case CHIP_POLARIS12: 6213 case CHIP_POLARIS12:
6214 case CHIP_VEGAM:
6157 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6215 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6158 break; 6216 break;
6159 default: 6217 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5b1822..d7530fdfaad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -27,6 +27,7 @@
27#include "amdgpu_gfx.h" 27#include "amdgpu_gfx.h"
28#include "soc15.h" 28#include "soc15.h"
29#include "soc15d.h" 29#include "soc15d.h"
30#include "amdgpu_atomfirmware.h"
30 31
31#include "gc/gc_9_0_offset.h" 32#include "gc/gc_9_0_offset.h"
32#include "gc/gc_9_0_sh_mask.h" 33#include "gc/gc_9_0_sh_mask.h"
@@ -41,7 +42,6 @@
41#define GFX9_MEC_HPD_SIZE 2048 42#define GFX9_MEC_HPD_SIZE 2048
42#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 43#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
43#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 44#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
44#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
45 45
46#define mmPWR_MISC_CNTL_STATUS 0x0183 46#define mmPWR_MISC_CNTL_STATUS 0x0183
47#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 47#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0
@@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
64MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 64MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
65MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 65MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
66 66
67MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
68MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
69MODULE_FIRMWARE("amdgpu/vega20_me.bin");
70MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
71MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
72MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
73
67MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 74MODULE_FIRMWARE("amdgpu/raven_ce.bin");
68MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 75MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
69MODULE_FIRMWARE("amdgpu/raven_me.bin"); 76MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
73 80
74static const struct soc15_reg_golden golden_settings_gc_9_0[] = 81static const struct soc15_reg_golden golden_settings_gc_9_0[] =
75{ 82{
76 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
77 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
78 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
79 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 83 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
80 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 84 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
81 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
82 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 85 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
83 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 86 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
84 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 87 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
85 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
86 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
87 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
88 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
89 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
90 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 88 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
91 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 89 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
90 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
91 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
92 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
92 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 93 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
93 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 94 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
94 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 95 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
95 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 96 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
96 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 97 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
97 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 98 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
98 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
99}; 99};
100 100
101static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 101static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -109,6 +109,20 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
110}; 110};
111 111
112static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
113{
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
124};
125
112static const struct soc15_reg_golden golden_settings_gc_9_1[] = 126static const struct soc15_reg_golden golden_settings_gc_9_1[] =
113{ 127{
114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
@@ -185,6 +199,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
186}; 200};
187 201
202static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
203{
204 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
205 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
206 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
207 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
208 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
209 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
210 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
211 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
212};
213
214static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
215{
216 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
217 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
218 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
219 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
220 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
221 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
222 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
223 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
224};
225
188#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 226#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
189#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 227#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
190#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 228#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -218,6 +256,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
218 golden_settings_gc_9_2_1_vg12, 256 golden_settings_gc_9_2_1_vg12,
219 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 257 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
220 break; 258 break;
259 case CHIP_VEGA20:
260 soc15_program_register_sequence(adev,
261 golden_settings_gc_9_0,
262 ARRAY_SIZE(golden_settings_gc_9_0));
263 soc15_program_register_sequence(adev,
264 golden_settings_gc_9_0_vg20,
265 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
266 break;
221 case CHIP_RAVEN: 267 case CHIP_RAVEN:
222 soc15_program_register_sequence(adev, 268 soc15_program_register_sequence(adev,
223 golden_settings_gc_9_1, 269 golden_settings_gc_9_1,
@@ -401,6 +447,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
401 kfree(adev->gfx.rlc.register_list_format); 447 kfree(adev->gfx.rlc.register_list_format);
402} 448}
403 449
450static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
451{
452 const struct rlc_firmware_header_v2_1 *rlc_hdr;
453
454 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
455 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
456 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
457 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
458 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
459 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
460 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
461 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
462 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
463 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
464 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
465 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
466 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
467 adev->gfx.rlc.reg_list_format_direct_reg_list_length =
468 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
469}
470
404static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 471static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
405{ 472{
406 const char *chip_name; 473 const char *chip_name;
@@ -412,6 +479,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
412 const struct rlc_firmware_header_v2_0 *rlc_hdr; 479 const struct rlc_firmware_header_v2_0 *rlc_hdr;
413 unsigned int *tmp = NULL; 480 unsigned int *tmp = NULL;
414 unsigned int i = 0; 481 unsigned int i = 0;
482 uint16_t version_major;
483 uint16_t version_minor;
415 484
416 DRM_DEBUG("\n"); 485 DRM_DEBUG("\n");
417 486
@@ -422,6 +491,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
422 case CHIP_VEGA12: 491 case CHIP_VEGA12:
423 chip_name = "vega12"; 492 chip_name = "vega12";
424 break; 493 break;
494 case CHIP_VEGA20:
495 chip_name = "vega20";
496 break;
425 case CHIP_RAVEN: 497 case CHIP_RAVEN:
426 chip_name = "raven"; 498 chip_name = "raven";
427 break; 499 break;
@@ -468,6 +540,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
468 goto out; 540 goto out;
469 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 541 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
470 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 542 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
543
544 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
545 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
546 if (version_major == 2 && version_minor == 1)
547 adev->gfx.rlc.is_rlc_v2_1 = true;
548
471 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 549 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
472 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 550 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
473 adev->gfx.rlc.save_and_restore_offset = 551 adev->gfx.rlc.save_and_restore_offset =
@@ -508,6 +586,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
508 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 586 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
509 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 587 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
510 588
589 if (adev->gfx.rlc.is_rlc_v2_1)
590 gfx_v9_0_init_rlc_ext_microcode(adev);
591
511 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 592 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
512 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 593 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
513 if (err) 594 if (err)
@@ -566,6 +647,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
566 adev->firmware.fw_size += 647 adev->firmware.fw_size +=
567 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 648 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
568 649
650 if (adev->gfx.rlc.is_rlc_v2_1) {
651 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
652 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
653 info->fw = adev->gfx.rlc_fw;
654 adev->firmware.fw_size +=
655 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
656
657 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
658 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
659 info->fw = adev->gfx.rlc_fw;
660 adev->firmware.fw_size +=
661 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
662
663 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
664 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
665 info->fw = adev->gfx.rlc_fw;
666 adev->firmware.fw_size +=
667 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
668 }
669
569 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 670 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
570 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 671 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
571 info->fw = adev->gfx.mec_fw; 672 info->fw = adev->gfx.mec_fw;
@@ -1013,9 +1114,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1013 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1114 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1014}; 1115};
1015 1116
1016static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1117static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1017{ 1118{
1018 u32 gb_addr_config; 1119 u32 gb_addr_config;
1120 int err;
1019 1121
1020 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1122 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1021 1123
@@ -1037,6 +1139,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1037 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1139 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1038 DRM_INFO("fix gfx.config for vega12\n"); 1140 DRM_INFO("fix gfx.config for vega12\n");
1039 break; 1141 break;
1142 case CHIP_VEGA20:
1143 adev->gfx.config.max_hw_contexts = 8;
1144 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1145 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1146 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1147 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1148 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1149 gb_addr_config &= ~0xf3e777ff;
1150 gb_addr_config |= 0x22014042;
1151 /* check vbios table if gpu info is not available */
1152 err = amdgpu_atomfirmware_get_gfx_info(adev);
1153 if (err)
1154 return err;
1155 break;
1040 case CHIP_RAVEN: 1156 case CHIP_RAVEN:
1041 adev->gfx.config.max_hw_contexts = 8; 1157 adev->gfx.config.max_hw_contexts = 8;
1042 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1158 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1086,6 +1202,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1086 adev->gfx.config.gb_addr_config, 1202 adev->gfx.config.gb_addr_config,
1087 GB_ADDR_CONFIG, 1203 GB_ADDR_CONFIG,
1088 PIPE_INTERLEAVE_SIZE)); 1204 PIPE_INTERLEAVE_SIZE));
1205
1206 return 0;
1089} 1207}
1090 1208
1091static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1209static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@@ -1319,6 +1437,7 @@ static int gfx_v9_0_sw_init(void *handle)
1319 switch (adev->asic_type) { 1437 switch (adev->asic_type) {
1320 case CHIP_VEGA10: 1438 case CHIP_VEGA10:
1321 case CHIP_VEGA12: 1439 case CHIP_VEGA12:
1440 case CHIP_VEGA20:
1322 case CHIP_RAVEN: 1441 case CHIP_RAVEN:
1323 adev->gfx.mec.num_mec = 2; 1442 adev->gfx.mec.num_mec = 2;
1324 break; 1443 break;
@@ -1446,7 +1565,9 @@ static int gfx_v9_0_sw_init(void *handle)
1446 1565
1447 adev->gfx.ce_ram_size = 0x8000; 1566 adev->gfx.ce_ram_size = 0x8000;
1448 1567
1449 gfx_v9_0_gpu_early_init(adev); 1568 r = gfx_v9_0_gpu_early_init(adev);
1569 if (r)
1570 return r;
1450 1571
1451 r = gfx_v9_0_ngg_init(adev); 1572 r = gfx_v9_0_ngg_init(adev);
1452 if (r) 1573 if (r)
@@ -1600,6 +1721,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1600 1721
1601 gfx_v9_0_setup_rb(adev); 1722 gfx_v9_0_setup_rb(adev);
1602 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1723 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1724 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1603 1725
1604 /* XXX SH_MEM regs */ 1726 /* XXX SH_MEM regs */
1605 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1727 /* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -1616,7 +1738,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1616 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1738 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1617 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1739 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1618 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1740 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1619 tmp = adev->gmc.shared_aperture_start >> 48; 1741 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1742 (adev->gmc.private_aperture_start >> 48));
1743 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1744 (adev->gmc.shared_aperture_start >> 48));
1620 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); 1745 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1621 } 1746 }
1622 } 1747 }
@@ -1708,55 +1833,42 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
1708 adev->gfx.rlc.clear_state_size); 1833 adev->gfx.rlc.clear_state_size);
1709} 1834}
1710 1835
1711static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, 1836static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
1712 int indirect_offset, 1837 int indirect_offset,
1713 int list_size, 1838 int list_size,
1714 int *unique_indirect_regs, 1839 int *unique_indirect_regs,
1715 int *unique_indirect_reg_count, 1840 int *unique_indirect_reg_count,
1716 int max_indirect_reg_count,
1717 int *indirect_start_offsets, 1841 int *indirect_start_offsets,
1718 int *indirect_start_offsets_count, 1842 int *indirect_start_offsets_count)
1719 int max_indirect_start_offsets_count)
1720{ 1843{
1721 int idx; 1844 int idx;
1722 bool new_entry = true;
1723 1845
1724 for (; indirect_offset < list_size; indirect_offset++) { 1846 for (; indirect_offset < list_size; indirect_offset++) {
1847 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1848 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1725 1849
1726 if (new_entry) { 1850 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
1727 new_entry = false; 1851 indirect_offset += 2;
1728 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1729 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1730 BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
1731 }
1732 1852
1733 if (register_list_format[indirect_offset] == 0xFFFFFFFF) { 1853 /* look for the matching indice */
1734 new_entry = true; 1854 for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
1735 continue; 1855 if (unique_indirect_regs[idx] ==
1736 } 1856 register_list_format[indirect_offset] ||
1857 !unique_indirect_regs[idx])
1858 break;
1859 }
1737 1860
1738 indirect_offset += 2; 1861 BUG_ON(idx >= *unique_indirect_reg_count);
1739 1862
1740 /* look for the matching indice */ 1863 if (!unique_indirect_regs[idx])
1741 for (idx = 0; idx < *unique_indirect_reg_count; idx++) { 1864 unique_indirect_regs[idx] = register_list_format[indirect_offset];
1742 if (unique_indirect_regs[idx] ==
1743 register_list_format[indirect_offset])
1744 break;
1745 }
1746 1865
1747 if (idx >= *unique_indirect_reg_count) { 1866 indirect_offset++;
1748 unique_indirect_regs[*unique_indirect_reg_count] =
1749 register_list_format[indirect_offset];
1750 idx = *unique_indirect_reg_count;
1751 *unique_indirect_reg_count = *unique_indirect_reg_count + 1;
1752 BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
1753 } 1867 }
1754
1755 register_list_format[indirect_offset] = idx;
1756 } 1868 }
1757} 1869}
1758 1870
1759static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) 1871static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
1760{ 1872{
1761 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 1873 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1762 int unique_indirect_reg_count = 0; 1874 int unique_indirect_reg_count = 0;
@@ -1765,7 +1877,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1765 int indirect_start_offsets_count = 0; 1877 int indirect_start_offsets_count = 0;
1766 1878
1767 int list_size = 0; 1879 int list_size = 0;
1768 int i = 0; 1880 int i = 0, j = 0;
1769 u32 tmp = 0; 1881 u32 tmp = 0;
1770 1882
1771 u32 *register_list_format = 1883 u32 *register_list_format =
@@ -1776,15 +1888,14 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1776 adev->gfx.rlc.reg_list_format_size_bytes); 1888 adev->gfx.rlc.reg_list_format_size_bytes);
1777 1889
1778 /* setup unique_indirect_regs array and indirect_start_offsets array */ 1890 /* setup unique_indirect_regs array and indirect_start_offsets array */
1779 gfx_v9_0_parse_ind_reg_list(register_list_format, 1891 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
1780 GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, 1892 gfx_v9_1_parse_ind_reg_list(register_list_format,
1781 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 1893 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
1782 unique_indirect_regs, 1894 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
1783 &unique_indirect_reg_count, 1895 unique_indirect_regs,
1784 ARRAY_SIZE(unique_indirect_regs), 1896 &unique_indirect_reg_count,
1785 indirect_start_offsets, 1897 indirect_start_offsets,
1786 &indirect_start_offsets_count, 1898 &indirect_start_offsets_count);
1787 ARRAY_SIZE(indirect_start_offsets));
1788 1899
1789 /* enable auto inc in case it is disabled */ 1900 /* enable auto inc in case it is disabled */
1790 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 1901 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1798,19 +1909,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 1909 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1799 adev->gfx.rlc.register_restore[i]); 1910 adev->gfx.rlc.register_restore[i]);
1800 1911
1801 /* load direct register */
1802 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
1803 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1804 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1805 adev->gfx.rlc.register_restore[i]);
1806
1807 /* load indirect register */ 1912 /* load indirect register */
1808 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 1913 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1809 adev->gfx.rlc.reg_list_format_start); 1914 adev->gfx.rlc.reg_list_format_start);
1810 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 1915
1916 /* direct register portion */
1917 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
1811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 1918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1812 register_list_format[i]); 1919 register_list_format[i]);
1813 1920
1921 /* indirect register portion */
1922 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
1923 if (register_list_format[i] == 0xFFFFFFFF) {
1924 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
1925 continue;
1926 }
1927
1928 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
1929 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
1930
1931 for (j = 0; j < unique_indirect_reg_count; j++) {
1932 if (register_list_format[i] == unique_indirect_regs[j]) {
1933 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
1934 break;
1935 }
1936 }
1937
1938 BUG_ON(j >= unique_indirect_reg_count);
1939
1940 i++;
1941 }
1942
1814 /* set save/restore list size */ 1943 /* set save/restore list size */
1815 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 1944 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
1816 list_size = list_size >> 1; 1945 list_size = list_size >> 1;
@@ -1823,14 +1952,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1823 adev->gfx.rlc.starting_offsets_start); 1952 adev->gfx.rlc.starting_offsets_start);
1824 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 1953 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
1825 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 1954 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1826 indirect_start_offsets[i]); 1955 indirect_start_offsets[i]);
1827 1956
1828 /* load unique indirect regs*/ 1957 /* load unique indirect regs*/
1829 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 1958 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
1830 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, 1959 if (unique_indirect_regs[i] != 0) {
1831 unique_indirect_regs[i] & 0x3FFFF); 1960 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
1832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, 1961 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
1833 unique_indirect_regs[i] >> 20); 1962 unique_indirect_regs[i] & 0x3FFFF);
1963
1964 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
1965 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
1966 unique_indirect_regs[i] >> 20);
1967 }
1834 } 1968 }
1835 1969
1836 kfree(register_list_format); 1970 kfree(register_list_format);
@@ -2010,6 +2144,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
2010 2144
2011static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2145static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2012{ 2146{
2147 if (!adev->gfx.rlc.is_rlc_v2_1)
2148 return;
2149
2013 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2150 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2014 AMD_PG_SUPPORT_GFX_SMG | 2151 AMD_PG_SUPPORT_GFX_SMG |
2015 AMD_PG_SUPPORT_GFX_DMG | 2152 AMD_PG_SUPPORT_GFX_DMG |
@@ -2017,27 +2154,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2017 AMD_PG_SUPPORT_GDS | 2154 AMD_PG_SUPPORT_GDS |
2018 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2155 AMD_PG_SUPPORT_RLC_SMU_HS)) {
2019 gfx_v9_0_init_csb(adev); 2156 gfx_v9_0_init_csb(adev);
2020 gfx_v9_0_init_rlc_save_restore_list(adev); 2157 gfx_v9_1_init_rlc_save_restore_list(adev);
2021 gfx_v9_0_enable_save_restore_machine(adev); 2158 gfx_v9_0_enable_save_restore_machine(adev);
2022 2159
2023 if (adev->asic_type == CHIP_RAVEN) { 2160 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2024 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2161 adev->gfx.rlc.cp_table_gpu_addr >> 8);
2025 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2162 gfx_v9_0_init_gfx_power_gating(adev);
2026 gfx_v9_0_init_gfx_power_gating(adev);
2027
2028 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
2029 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
2030 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
2031 } else {
2032 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
2033 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
2034 }
2035
2036 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
2037 gfx_v9_0_enable_cp_power_gating(adev, true);
2038 else
2039 gfx_v9_0_enable_cp_power_gating(adev, false);
2040 }
2041 } 2163 }
2042} 2164}
2043 2165
@@ -3061,6 +3183,9 @@ static int gfx_v9_0_hw_fini(void *handle)
3061 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3183 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3062 int i; 3184 int i;
3063 3185
3186 amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
3187 AMD_PG_STATE_UNGATE);
3188
3064 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3189 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3065 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3190 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3066 3191
@@ -3279,6 +3404,11 @@ static int gfx_v9_0_late_init(void *handle)
3279 if (r) 3404 if (r)
3280 return r; 3405 return r;
3281 3406
3407 r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
3408 AMD_PG_STATE_GATE);
3409 if (r)
3410 return r;
3411
3282 return 0; 3412 return 0;
3283} 3413}
3284 3414
@@ -3339,8 +3469,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3339static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3469static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3340 bool enable) 3470 bool enable)
3341{ 3471{
3342 /* TODO: double check if we need to perform under safe mdoe */ 3472 gfx_v9_0_enter_rlc_safe_mode(adev);
3343 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
3344 3473
3345 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3474 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3346 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3475 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3351,7 +3480,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3351 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3480 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3352 } 3481 }
3353 3482
3354 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 3483 gfx_v9_0_exit_rlc_safe_mode(adev);
3355} 3484}
3356 3485
3357static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3486static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3605,6 +3734,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
3605 switch (adev->asic_type) { 3734 switch (adev->asic_type) {
3606 case CHIP_VEGA10: 3735 case CHIP_VEGA10:
3607 case CHIP_VEGA12: 3736 case CHIP_VEGA12:
3737 case CHIP_VEGA20:
3608 case CHIP_RAVEN: 3738 case CHIP_RAVEN:
3609 gfx_v9_0_update_gfx_clock_gating(adev, 3739 gfx_v9_0_update_gfx_clock_gating(adev,
3610 state == AMD_CG_STATE_GATE ? true : false); 3740 state == AMD_CG_STATE_GATE ? true : false);
@@ -3742,7 +3872,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3742 } 3872 }
3743 3873
3744 amdgpu_ring_write(ring, header); 3874 amdgpu_ring_write(ring, header);
3745BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3875 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3746 amdgpu_ring_write(ring, 3876 amdgpu_ring_write(ring,
3747#ifdef __BIG_ENDIAN 3877#ifdef __BIG_ENDIAN
3748 (2 << 0) | 3878 (2 << 0) |
@@ -3774,13 +3904,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3774{ 3904{
3775 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 3905 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3776 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 3906 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3907 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
3777 3908
3778 /* RELEASE_MEM - flush caches, send int */ 3909 /* RELEASE_MEM - flush caches, send int */
3779 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 3910 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3780 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 3911 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
3781 EOP_TC_ACTION_EN | 3912 EOP_TC_NC_ACTION_EN) :
3782 EOP_TC_WB_ACTION_EN | 3913 (EOP_TCL1_ACTION_EN |
3783 EOP_TC_MD_ACTION_EN | 3914 EOP_TC_ACTION_EN |
3915 EOP_TC_WB_ACTION_EN |
3916 EOP_TC_MD_ACTION_EN)) |
3784 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3917 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3785 EVENT_INDEX(5))); 3918 EVENT_INDEX(5)));
3786 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 3919 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4137,6 +4270,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4137 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4270 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4138} 4271}
4139 4272
4273static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4274 uint32_t reg0, uint32_t reg1,
4275 uint32_t ref, uint32_t mask)
4276{
4277 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4278
4279 if (amdgpu_sriov_vf(ring->adev))
4280 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4281 ref, mask, 0x20);
4282 else
4283 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4284 ref, mask);
4285}
4286
4140static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4287static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4141 enum amdgpu_interrupt_state state) 4288 enum amdgpu_interrupt_state state)
4142{ 4289{
@@ -4458,6 +4605,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4458 .emit_tmz = gfx_v9_0_ring_emit_tmz, 4605 .emit_tmz = gfx_v9_0_ring_emit_tmz,
4459 .emit_wreg = gfx_v9_0_ring_emit_wreg, 4606 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4460 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 4607 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4608 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
4461}; 4609};
4462 4610
4463static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 4611static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4492,6 +4640,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4492 .set_priority = gfx_v9_0_ring_set_priority_compute, 4640 .set_priority = gfx_v9_0_ring_set_priority_compute,
4493 .emit_wreg = gfx_v9_0_ring_emit_wreg, 4641 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4494 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 4642 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4643 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
4495}; 4644};
4496 4645
4497static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 4646static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4522,6 +4671,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4522 .emit_rreg = gfx_v9_0_ring_emit_rreg, 4671 .emit_rreg = gfx_v9_0_ring_emit_rreg,
4523 .emit_wreg = gfx_v9_0_ring_emit_wreg, 4672 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4524 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 4673 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4674 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
4525}; 4675};
4526 4676
4527static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 4677static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4577,6 +4727,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
4577 switch (adev->asic_type) { 4727 switch (adev->asic_type) {
4578 case CHIP_VEGA10: 4728 case CHIP_VEGA10:
4579 case CHIP_VEGA12: 4729 case CHIP_VEGA12:
4730 case CHIP_VEGA20:
4580 case CHIP_RAVEN: 4731 case CHIP_RAVEN:
4581 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 4732 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
4582 break; 4733 break;
@@ -4686,6 +4837,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
4686 4837
4687 cu_info->number = active_cu_number; 4838 cu_info->number = active_cu_number;
4688 cu_info->ao_cu_mask = ao_cu_mask; 4839 cu_info->ao_cu_mask = ao_cu_mask;
4840 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
4689 4841
4690 return 0; 4842 return 0;
4691} 4843}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5617cf62c566..79f9ac29019b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle)
819{ 819{
820 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 820 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
821 821
822 amdgpu_bo_late_init(adev);
823
822 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 824 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
823 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 825 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
824 else 826 else
825 return 0; 827 return 0;
826} 828}
827 829
830static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
831{
832 u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
833 unsigned size;
834
835 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
836 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
837 } else {
838 u32 viewport = RREG32(mmVIEWPORT_SIZE);
839 size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
840 REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
841 4);
842 }
843 /* return 0 if the pre-OS buffer uses up most of vram */
844 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
845 return 0;
846 return size;
847}
848
828static int gmc_v6_0_sw_init(void *handle) 849static int gmc_v6_0_sw_init(void *handle)
829{ 850{
830 int r; 851 int r;
@@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle)
851 872
852 adev->gmc.mc_mask = 0xffffffffffULL; 873 adev->gmc.mc_mask = 0xffffffffffULL;
853 874
854 adev->gmc.stolen_size = 256 * 1024;
855
856 adev->need_dma32 = false; 875 adev->need_dma32 = false;
857 dma_bits = adev->need_dma32 ? 32 : 40; 876 dma_bits = adev->need_dma32 ? 32 : 40;
858 r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); 877 r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle)
878 if (r) 897 if (r)
879 return r; 898 return r;
880 899
900 adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
901
881 r = amdgpu_bo_init(adev); 902 r = amdgpu_bo_init(adev);
882 if (r) 903 if (r)
883 return r; 904 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 80054f36e487..7147bfe25a23 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle)
958{ 958{
959 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 959 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
960 960
961 amdgpu_bo_late_init(adev);
962
961 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 963 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
962 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 964 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
963 else 965 else
964 return 0; 966 return 0;
965} 967}
966 968
969static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
970{
971 u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
972 unsigned size;
973
974 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
975 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
976 } else {
977 u32 viewport = RREG32(mmVIEWPORT_SIZE);
978 size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
979 REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
980 4);
981 }
982 /* return 0 if the pre-OS buffer uses up most of vram */
983 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
984 return 0;
985 return size;
986}
987
967static int gmc_v7_0_sw_init(void *handle) 988static int gmc_v7_0_sw_init(void *handle)
968{ 989{
969 int r; 990 int r;
@@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle)
998 */ 1019 */
999 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 1020 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
1000 1021
1001 adev->gmc.stolen_size = 256 * 1024;
1002
1003 /* set DMA mask + need_dma32 flags. 1022 /* set DMA mask + need_dma32 flags.
1004 * PCIE - can handle 40-bits. 1023 * PCIE - can handle 40-bits.
1005 * IGP - can handle 40-bits 1024 * IGP - can handle 40-bits
@@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle)
1030 if (r) 1049 if (r)
1031 return r; 1050 return r;
1032 1051
1052 adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
1053
1033 /* Memory manager */ 1054 /* Memory manager */
1034 r = amdgpu_bo_init(adev); 1055 r = amdgpu_bo_init(adev);
1035 if (r) 1056 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d71d4cb68f9c..1edbe6b477b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
138 break; 138 break;
139 case CHIP_POLARIS11: 139 case CHIP_POLARIS11:
140 case CHIP_POLARIS12: 140 case CHIP_POLARIS12:
141 case CHIP_VEGAM:
141 amdgpu_device_program_register_sequence(adev, 142 amdgpu_device_program_register_sequence(adev,
142 golden_settings_polaris11_a11, 143 golden_settings_polaris11_a11,
143 ARRAY_SIZE(golden_settings_polaris11_a11)); 144 ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
231 case CHIP_FIJI: 232 case CHIP_FIJI:
232 case CHIP_CARRIZO: 233 case CHIP_CARRIZO:
233 case CHIP_STONEY: 234 case CHIP_STONEY:
235 case CHIP_VEGAM:
234 return 0; 236 return 0;
235 default: BUG(); 237 default: BUG();
236 } 238 }
@@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
567 /* set the gart size */ 569 /* set the gart size */
568 if (amdgpu_gart_size == -1) { 570 if (amdgpu_gart_size == -1) {
569 switch (adev->asic_type) { 571 switch (adev->asic_type) {
570 case CHIP_POLARIS11: /* all engines support GPUVM */
571 case CHIP_POLARIS10: /* all engines support GPUVM */ 572 case CHIP_POLARIS10: /* all engines support GPUVM */
573 case CHIP_POLARIS11: /* all engines support GPUVM */
572 case CHIP_POLARIS12: /* all engines support GPUVM */ 574 case CHIP_POLARIS12: /* all engines support GPUVM */
575 case CHIP_VEGAM: /* all engines support GPUVM */
573 default: 576 default:
574 adev->gmc.gart_size = 256ULL << 20; 577 adev->gmc.gart_size = 256ULL << 20;
575 break; 578 break;
@@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle)
1049{ 1052{
1050 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1053 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1051 1054
1055 amdgpu_bo_late_init(adev);
1056
1052 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 1057 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
1053 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); 1058 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1054 else 1059 else
1055 return 0; 1060 return 0;
1056} 1061}
1057 1062
1063static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
1064{
1065 u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
1066 unsigned size;
1067
1068 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1069 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
1070 } else {
1071 u32 viewport = RREG32(mmVIEWPORT_SIZE);
1072 size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1073 REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1074 4);
1075 }
1076 /* return 0 if the pre-OS buffer uses up most of vram */
1077 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
1078 return 0;
1079 return size;
1080}
1081
1058#define mmMC_SEQ_MISC0_FIJI 0xA71 1082#define mmMC_SEQ_MISC0_FIJI 0xA71
1059 1083
1060static int gmc_v8_0_sw_init(void *handle) 1084static int gmc_v8_0_sw_init(void *handle)
@@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle)
1068 } else { 1092 } else {
1069 u32 tmp; 1093 u32 tmp;
1070 1094
1071 if (adev->asic_type == CHIP_FIJI) 1095 if ((adev->asic_type == CHIP_FIJI) ||
1096 (adev->asic_type == CHIP_VEGAM))
1072 tmp = RREG32(mmMC_SEQ_MISC0_FIJI); 1097 tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
1073 else 1098 else
1074 tmp = RREG32(mmMC_SEQ_MISC0); 1099 tmp = RREG32(mmMC_SEQ_MISC0);
@@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle)
1096 */ 1121 */
1097 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 1122 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
1098 1123
1099 adev->gmc.stolen_size = 256 * 1024;
1100
1101 /* set DMA mask + need_dma32 flags. 1124 /* set DMA mask + need_dma32 flags.
1102 * PCIE - can handle 40-bits. 1125 * PCIE - can handle 40-bits.
1103 * IGP - can handle 40-bits 1126 * IGP - can handle 40-bits
@@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle)
1128 if (r) 1151 if (r)
1129 return r; 1152 return r;
1130 1153
1154 adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
1155
1131 /* Memory manager */ 1156 /* Memory manager */
1132 r = amdgpu_bo_init(adev); 1157 r = amdgpu_bo_init(adev);
1133 if (r) 1158 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e687363900bb..3c0a85d4e4ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -43,19 +43,13 @@
43#include "gfxhub_v1_0.h" 43#include "gfxhub_v1_0.h"
44#include "mmhub_v1_0.h" 44#include "mmhub_v1_0.h"
45 45
46#define mmDF_CS_AON0_DramBaseAddress0 0x0044 46/* add these here since we already include dce12 headers and these are for DCN */
47#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 47#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
48//DF_CS_AON0_DramBaseAddress0 48#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
49#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 49#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
50#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 50#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
51#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 51#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
52#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 52#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
53#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc
54#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L
55#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L
56#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L
57#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L
58#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L
59 53
60/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ 54/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
61#define AMDGPU_NUM_OF_VMIDS 8 55#define AMDGPU_NUM_OF_VMIDS 8
@@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
385 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), 379 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
386 upper_32_bits(pd_addr)); 380 upper_32_bits(pd_addr));
387 381
388 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); 382 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
389 383 hub->vm_inv_eng0_ack + eng,
390 /* wait for the invalidate to complete */ 384 req, 1 << vmid);
391 amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
392 1 << vmid, 1 << vmid);
393 385
394 return pd_addr; 386 return pd_addr;
395} 387}
@@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle)
556 adev->gmc.shared_aperture_start = 0x2000000000000000ULL; 548 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
557 adev->gmc.shared_aperture_end = 549 adev->gmc.shared_aperture_end =
558 adev->gmc.shared_aperture_start + (4ULL << 30) - 1; 550 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
559 adev->gmc.private_aperture_start = 551 adev->gmc.private_aperture_start = 0x1000000000000000ULL;
560 adev->gmc.shared_aperture_end + 1;
561 adev->gmc.private_aperture_end = 552 adev->gmc.private_aperture_end =
562 adev->gmc.private_aperture_start + (4ULL << 30) - 1; 553 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
563 554
@@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle)
659 unsigned i; 650 unsigned i;
660 int r; 651 int r;
661 652
653 /*
654 * TODO - Uncomment once GART corruption issue is fixed.
655 */
656 /* amdgpu_bo_late_init(adev); */
657
662 for(i = 0; i < adev->num_rings; ++i) { 658 for(i = 0; i < adev->num_rings; ++i) {
663 struct amdgpu_ring *ring = adev->rings[i]; 659 struct amdgpu_ring *ring = adev->rings[i];
664 unsigned vmhub = ring->funcs->vmhub; 660 unsigned vmhub = ring->funcs->vmhub;
@@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle)
679 DRM_INFO("ECC is active.\n"); 675 DRM_INFO("ECC is active.\n");
680 } else if (r == 0) { 676 } else if (r == 0) {
681 DRM_INFO("ECC is not present.\n"); 677 DRM_INFO("ECC is not present.\n");
678 adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
682 } else { 679 } else {
683 DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); 680 DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
684 return r; 681 return r;
@@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
697 amdgpu_device_vram_location(adev, &adev->gmc, base); 694 amdgpu_device_vram_location(adev, &adev->gmc, base);
698 amdgpu_device_gart_location(adev, mc); 695 amdgpu_device_gart_location(adev, mc);
699 /* base offset of vram pages */ 696 /* base offset of vram pages */
700 if (adev->flags & AMD_IS_APU) 697 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
701 adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
702 else
703 adev->vm_manager.vram_base_offset = 0;
704} 698}
705 699
706/** 700/**
@@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
714 */ 708 */
715static int gmc_v9_0_mc_init(struct amdgpu_device *adev) 709static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
716{ 710{
717 u32 tmp;
718 int chansize, numchan; 711 int chansize, numchan;
719 int r; 712 int r;
720 713
@@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
727 else 720 else
728 chansize = 128; 721 chansize = 128;
729 722
730 tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); 723 numchan = adev->df_funcs->get_hbm_channel_number(adev);
731 tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
732 tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
733 switch (tmp) {
734 case 0:
735 default:
736 numchan = 1;
737 break;
738 case 1:
739 numchan = 2;
740 break;
741 case 2:
742 numchan = 0;
743 break;
744 case 3:
745 numchan = 4;
746 break;
747 case 4:
748 numchan = 0;
749 break;
750 case 5:
751 numchan = 8;
752 break;
753 case 6:
754 numchan = 0;
755 break;
756 case 7:
757 numchan = 16;
758 break;
759 case 8:
760 numchan = 2;
761 break;
762 }
763 adev->gmc.vram_width = numchan * chansize; 724 adev->gmc.vram_width = numchan * chansize;
764 } 725 }
765 726
@@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
792 switch (adev->asic_type) { 753 switch (adev->asic_type) {
793 case CHIP_VEGA10: /* all engines support GPUVM */ 754 case CHIP_VEGA10: /* all engines support GPUVM */
794 case CHIP_VEGA12: /* all engines support GPUVM */ 755 case CHIP_VEGA12: /* all engines support GPUVM */
756 case CHIP_VEGA20:
795 default: 757 default:
796 adev->gmc.gart_size = 512ULL << 20; 758 adev->gmc.gart_size = 512ULL << 20;
797 break; 759 break;
@@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
826 return amdgpu_gart_table_vram_alloc(adev); 788 return amdgpu_gart_table_vram_alloc(adev);
827} 789}
828 790
791static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
792{
793#if 0
794 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
795#endif
796 unsigned size;
797
798 /*
799 * TODO Remove once GART corruption is resolved
800 * Check related code in gmc_v9_0_sw_fini
801 * */
802 size = 9 * 1024 * 1024;
803
804#if 0
805 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
806 size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
807 } else {
808 u32 viewport;
809
810 switch (adev->asic_type) {
811 case CHIP_RAVEN:
812 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
813 size = (REG_GET_FIELD(viewport,
814 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
815 REG_GET_FIELD(viewport,
816 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
817 4);
818 break;
819 case CHIP_VEGA10:
820 case CHIP_VEGA12:
821 default:
822 viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
823 size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
824 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
825 4);
826 break;
827 }
828 }
829 /* return 0 if the pre-OS buffer uses up most of vram */
830 if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
831 return 0;
832
833#endif
834 return size;
835}
836
829static int gmc_v9_0_sw_init(void *handle) 837static int gmc_v9_0_sw_init(void *handle)
830{ 838{
831 int r; 839 int r;
@@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle)
851 break; 859 break;
852 case CHIP_VEGA10: 860 case CHIP_VEGA10:
853 case CHIP_VEGA12: 861 case CHIP_VEGA12:
862 case CHIP_VEGA20:
854 /* 863 /*
855 * To fulfill 4-level page support, 864 * To fulfill 4-level page support,
856 * vm size is 256TB (48bit), maximum size of Vega10, 865 * vm size is 256TB (48bit), maximum size of Vega10,
@@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle)
877 */ 886 */
878 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 887 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
879 888
880 /*
881 * It needs to reserve 8M stolen memory for vega10
882 * TODO: Figure out how to avoid that...
883 */
884 adev->gmc.stolen_size = 8 * 1024 * 1024;
885
886 /* set DMA mask + need_dma32 flags. 889 /* set DMA mask + need_dma32 flags.
887 * PCIE - can handle 44-bits. 890 * PCIE - can handle 44-bits.
888 * IGP - can handle 44-bits 891 * IGP - can handle 44-bits
@@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle)
907 if (r) 910 if (r)
908 return r; 911 return r;
909 912
913 adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
914
910 /* Memory manager */ 915 /* Memory manager */
911 r = amdgpu_bo_init(adev); 916 r = amdgpu_bo_init(adev);
912 if (r) 917 if (r)
@@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle)
950 amdgpu_gem_force_release(adev); 955 amdgpu_gem_force_release(adev);
951 amdgpu_vm_manager_fini(adev); 956 amdgpu_vm_manager_fini(adev);
952 gmc_v9_0_gart_fini(adev); 957 gmc_v9_0_gart_fini(adev);
958
959 /*
960 * TODO:
961 * Currently there is a bug where some memory client outside
962 * of the driver writes to first 8M of VRAM on S3 resume,
963 * this overrides GART which by default gets placed in first 8M and
964 * causes VM_FAULTS once GTT is accessed.
965 * Keep the stolen memory reservation until the while this is not solved.
966 * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
967 */
968 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
969
953 amdgpu_bo_fini(adev); 970 amdgpu_bo_fini(adev);
954 971
955 return 0; 972 return 0;
@@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
960 977
961 switch (adev->asic_type) { 978 switch (adev->asic_type) {
962 case CHIP_VEGA10: 979 case CHIP_VEGA10:
980 case CHIP_VEGA20:
963 soc15_program_register_sequence(adev, 981 soc15_program_register_sequence(adev,
964 golden_settings_mmhub_1_0_0, 982 golden_settings_mmhub_1_0_0,
965 ARRAY_SIZE(golden_settings_mmhub_1_0_0)); 983 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 26ba984ab2b7..17f7f074cedc 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
2817 pi->caps_tcp_ramping = true; 2817 pi->caps_tcp_ramping = true;
2818 } 2818 }
2819 2819
2820 if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) 2820 if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
2821 pi->caps_sclk_ds = true; 2821 pi->caps_sclk_ds = true;
2822 else 2822 else
2823 pi->caps_sclk_ds = false; 2823 pi->caps_sclk_ds = false;
@@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle)
2974 /* powerdown unused blocks for now */ 2974 /* powerdown unused blocks for now */
2975 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2975 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2976 2976
2977 if (!amdgpu_dpm) 2977 if (!adev->pm.dpm_enabled)
2978 return 0; 2978 return 0;
2979 2979
2980 kv_dpm_powergate_acp(adev, true); 2980 kv_dpm_powergate_acp(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 43f925773b57..3d53c4413f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
734 switch (adev->asic_type) { 734 switch (adev->asic_type) {
735 case CHIP_VEGA10: 735 case CHIP_VEGA10:
736 case CHIP_VEGA12: 736 case CHIP_VEGA12:
737 case CHIP_VEGA20:
737 case CHIP_RAVEN: 738 case CHIP_RAVEN:
738 mmhub_v1_0_update_medium_grain_clock_gating(adev, 739 mmhub_v1_0_update_medium_grain_clock_gating(adev,
739 state == AMD_CG_STATE_GATE ? true : false); 740 state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 493348672475..078f70faedcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
260 } while (timeout > 1); 260 } while (timeout > 1);
261 261
262flr_done: 262flr_done:
263 if (locked) 263 if (locked) {
264 adev->in_gpu_reset = 0;
264 mutex_unlock(&adev->lock_reset); 265 mutex_unlock(&adev->lock_reset);
266 }
265 267
266 /* Trigger recovery for world switch failure if no TDR */ 268 /* Trigger recovery for world switch failure if no TDR */
267 if (amdgpu_lockup_timeout == 0) 269 if (amdgpu_lockup_timeout == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index df34dc79d444..365517c0121e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -34,10 +34,19 @@
34#define smnCPM_CONTROL 0x11180460 34#define smnCPM_CONTROL 0x11180460
35#define smnPCIE_CNTL2 0x11180070 35#define smnPCIE_CNTL2 0x11180070
36 36
37/* vega20 */
38#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
39#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
40
37static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) 41static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
38{ 42{
39 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); 43 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
40 44
45 if (adev->asic_type == CHIP_VEGA20)
46 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
47 else
48 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
49
41 tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; 50 tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
42 tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; 51 tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
43 52
@@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
75 SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); 84 SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
76 85
77 u32 doorbell_range = RREG32(reg); 86 u32 doorbell_range = RREG32(reg);
87 u32 range = 2;
88
89 if (adev->asic_type == CHIP_VEGA20)
90 range = 8;
78 91
79 if (use_doorbell) { 92 if (use_doorbell) {
80 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); 93 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
81 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); 94 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
82 } else 95 } else
83 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); 96 doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
84 97
@@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
133{ 146{
134 uint32_t def, data; 147 uint32_t def, data;
135 148
149 if (adev->asic_type == CHIP_VEGA20)
150 return;
151
136 /* NBIF_MGCG_CTRL_LCLK */ 152 /* NBIF_MGCG_CTRL_LCLK */
137 def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); 153 def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
138 154
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 8da6da90b1c9..0cf48d26c676 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id
40 GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ 40 GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */
41 GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ 41 GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */
42 GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ 42 GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */
43 GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */
44 GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */
45 GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */
43 46
44 GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ 47 GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */
45}; 48};
46 49
47 50
51/*-----------------------------------------------------------------------------
52 NOTE: All physical addresses used in this interface are actually
53 GPU Virtual Addresses.
54*/
55
56
48/* Control registers of the TEE Gfx interface. These are located in 57/* Control registers of the TEE Gfx interface. These are located in
49* SRBM-to-PSP mailbox registers (total 8 registers). 58* SRBM-to-PSP mailbox registers (total 8 registers).
50*/ 59*/
@@ -55,8 +64,8 @@ struct psp_gfx_ctrl
55 volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ 64 volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */
56 volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ 65 volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */
57 volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ 66 volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */
58 volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ 67 volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
59 volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ 68 volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
60 volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ 69 volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */
61 70
62}; 71};
@@ -78,6 +87,8 @@ enum psp_gfx_cmd_id
78 GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ 87 GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */
79 GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ 88 GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */
80 GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ 89 GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */
90 GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */
91 GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
81 92
82}; 93};
83 94
@@ -85,11 +96,11 @@ enum psp_gfx_cmd_id
85/* Command to load Trusted Application binary into PSP OS. */ 96/* Command to load Trusted Application binary into PSP OS. */
86struct psp_gfx_cmd_load_ta 97struct psp_gfx_cmd_load_ta
87{ 98{
88 uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ 99 uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
89 uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ 100 uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */
90 uint32_t app_len; /* length of the TA binary in bytes */ 101 uint32_t app_len; /* length of the TA binary in bytes */
91 uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ 102 uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
92 uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ 103 uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */
93 uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ 104 uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */
94 105
95 /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided 106 /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta
111*/ 122*/
112struct psp_gfx_buf_desc 123struct psp_gfx_buf_desc
113{ 124{
114 uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ 125 uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
115 uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ 126 uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */
116 uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ 127 uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
117 128
118}; 129};
@@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd
145/* Command to setup TMR region. */ 156/* Command to setup TMR region. */
146struct psp_gfx_cmd_setup_tmr 157struct psp_gfx_cmd_setup_tmr
147{ 158{
148 uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ 159 uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
149 uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ 160 uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */
150 uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ 161 uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */
151 162
152}; 163};
@@ -174,18 +185,32 @@ enum psp_gfx_fw_type
174 GFX_FW_TYPE_ISP = 16, 185 GFX_FW_TYPE_ISP = 16,
175 GFX_FW_TYPE_ACP = 17, 186 GFX_FW_TYPE_ACP = 17,
176 GFX_FW_TYPE_SMU = 18, 187 GFX_FW_TYPE_SMU = 18,
188 GFX_FW_TYPE_MMSCH = 19,
189 GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
190 GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
191 GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
192 GFX_FW_TYPE_MAX = 23
177}; 193};
178 194
179/* Command to load HW IP FW. */ 195/* Command to load HW IP FW. */
180struct psp_gfx_cmd_load_ip_fw 196struct psp_gfx_cmd_load_ip_fw
181{ 197{
182 uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ 198 uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
183 uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ 199 uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
184 uint32_t fw_size; /* FW buffer size in bytes */ 200 uint32_t fw_size; /* FW buffer size in bytes */
185 enum psp_gfx_fw_type fw_type; /* FW type */ 201 enum psp_gfx_fw_type fw_type; /* FW type */
186 202
187}; 203};
188 204
205/* Command to save/restore HW IP FW. */
206struct psp_gfx_cmd_save_restore_ip_fw
207{
208 uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/
209 uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
210 uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
211 uint32_t buf_size; /* Size of the save/restore buffer in bytes */
212 enum psp_gfx_fw_type fw_type; /* FW type */
213};
189 214
190/* All GFX ring buffer commands. */ 215/* All GFX ring buffer commands. */
191union psp_gfx_commands 216union psp_gfx_commands
@@ -195,7 +220,7 @@ union psp_gfx_commands
195 struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; 220 struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd;
196 struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; 221 struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
197 struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; 222 struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
198 223 struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
199}; 224};
200 225
201 226
@@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp
226 251
227 /* These fields are used for RBI only. They are all 0 in GPCOM commands 252 /* These fields are used for RBI only. They are all 0 in GPCOM commands
228 */ 253 */
229 uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ 254 uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
230 uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ 255 uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */
231 uint32_t resp_offset; /* +20 offset within response buffer */ 256 uint32_t resp_offset; /* +20 offset within response buffer */
232 uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ 257 uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */
233 258
@@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp
251/* Structure of the Ring Buffer Frame */ 276/* Structure of the Ring Buffer Frame */
252struct psp_gfx_rb_frame 277struct psp_gfx_rb_frame
253{ 278{
254 uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ 279 uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
255 uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ 280 uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */
256 uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ 281 uint32_t cmd_buf_size; /* +8 command buffer size in bytes */
257 uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ 282 uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
258 uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ 283 uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
259 uint32_t fence_value; /* +20 Fence value */ 284 uint32_t fence_value; /* +20 Fence value */
260 uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ 285 uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */
261 uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ 286 uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */
262 uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ 287 uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */
263 uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ 288 uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
264 uint8_t reserved1[2]; /* +34 reserved, must be 0 */ 289 uint8_t reserved1[2]; /* +34 reserved, must be 0 */
265 uint32_t reserved2[7]; /* +40 reserved, must be 0 */ 290 uint32_t reserved2[7]; /* +36 reserved, must be 0 */
266 /* total 64 bytes */ 291 /* total 64 bytes */
267}; 292};
268 293
269#endif /* _PSP_TEE_GFX_IF_H_ */ 294#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 8873d833a7f7..0ff136d02d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
70 case AMDGPU_UCODE_ID_RLC_G: 70 case AMDGPU_UCODE_ID_RLC_G:
71 *type = GFX_FW_TYPE_RLC_G; 71 *type = GFX_FW_TYPE_RLC_G;
72 break; 72 break;
73 case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
74 *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
75 break;
76 case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
77 *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
78 break;
79 case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
80 *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
81 break;
73 case AMDGPU_UCODE_ID_SMC: 82 case AMDGPU_UCODE_ID_SMC:
74 *type = GFX_FW_TYPE_SMU; 83 *type = GFX_FW_TYPE_SMU;
75 break; 84 break;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 196e75def1f2..0c768e388ace 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -41,6 +41,9 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
41MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); 41MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
42MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); 42MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
43MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); 43MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
44MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
45MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
46
44 47
45#define smnMP1_FIRMWARE_FLAGS 0x3010028 48#define smnMP1_FIRMWARE_FLAGS 0x3010028
46 49
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index be20a387d961..aa9ab299fd32 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin");
62MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); 62MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
63MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); 63MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
64MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); 64MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
65MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
66MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
65 67
66 68
67static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 69static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
209 break; 211 break;
210 case CHIP_POLARIS11: 212 case CHIP_POLARIS11:
211 case CHIP_POLARIS12: 213 case CHIP_POLARIS12:
214 case CHIP_VEGAM:
212 amdgpu_device_program_register_sequence(adev, 215 amdgpu_device_program_register_sequence(adev,
213 golden_settings_polaris11_a11, 216 golden_settings_polaris11_a11,
214 ARRAY_SIZE(golden_settings_polaris11_a11)); 217 ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
275 case CHIP_FIJI: 278 case CHIP_FIJI:
276 chip_name = "fiji"; 279 chip_name = "fiji";
277 break; 280 break;
278 case CHIP_POLARIS11:
279 chip_name = "polaris11";
280 break;
281 case CHIP_POLARIS10: 281 case CHIP_POLARIS10:
282 chip_name = "polaris10"; 282 chip_name = "polaris10";
283 break; 283 break;
284 case CHIP_POLARIS11:
285 chip_name = "polaris11";
286 break;
284 case CHIP_POLARIS12: 287 case CHIP_POLARIS12:
285 chip_name = "polaris12"; 288 chip_name = "polaris12";
286 break; 289 break;
290 case CHIP_VEGAM:
291 chip_name = "vegam";
292 break;
287 case CHIP_CARRIZO: 293 case CHIP_CARRIZO:
288 chip_name = "carrizo"; 294 chip_name = "carrizo";
289 break; 295 break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 399f876f9cad..ca53b3fba422 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
42MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); 42MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
43MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); 43MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
44MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); 44MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
45MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
46MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
45MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); 47MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
46 48
47#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L 49#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
107 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) 109 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
108}; 110};
109 111
112static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
113{
114 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
115 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
116 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
117 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
118 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
119 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
120 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
121 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
122 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
123 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
124 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
125 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
126 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
127 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
128 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
129 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
130 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
131 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
132};
133
110static const struct soc15_reg_golden golden_settings_sdma_rv1[] = 134static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
111{ 135{
112 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), 136 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
139 golden_settings_sdma_vg12, 163 golden_settings_sdma_vg12,
140 ARRAY_SIZE(golden_settings_sdma_vg12)); 164 ARRAY_SIZE(golden_settings_sdma_vg12));
141 break; 165 break;
166 case CHIP_VEGA20:
167 soc15_program_register_sequence(adev,
168 golden_settings_sdma_4_2,
169 ARRAY_SIZE(golden_settings_sdma_4_2));
170 break;
142 case CHIP_RAVEN: 171 case CHIP_RAVEN:
143 soc15_program_register_sequence(adev, 172 soc15_program_register_sequence(adev,
144 golden_settings_sdma_4_1, 173 golden_settings_sdma_4_1,
@@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
182 case CHIP_VEGA12: 211 case CHIP_VEGA12:
183 chip_name = "vega12"; 212 chip_name = "vega12";
184 break; 213 break;
214 case CHIP_VEGA20:
215 chip_name = "vega20";
216 break;
185 case CHIP_RAVEN: 217 case CHIP_RAVEN:
186 chip_name = "raven"; 218 chip_name = "raven";
187 break; 219 break;
@@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
360 392
361} 393}
362 394
395static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
396 int mem_space, int hdp,
397 uint32_t addr0, uint32_t addr1,
398 uint32_t ref, uint32_t mask,
399 uint32_t inv)
400{
401 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
402 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
403 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
404 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
405 if (mem_space) {
406 /* memory */
407 amdgpu_ring_write(ring, addr0);
408 amdgpu_ring_write(ring, addr1);
409 } else {
410 /* registers */
411 amdgpu_ring_write(ring, addr0 << 2);
412 amdgpu_ring_write(ring, addr1 << 2);
413 }
414 amdgpu_ring_write(ring, ref); /* reference */
415 amdgpu_ring_write(ring, mask); /* mask */
416 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
417 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
418}
419
363/** 420/**
364 * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring 421 * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
365 * 422 *
@@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
378 else 435 else
379 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; 436 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
380 437
381 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 438 sdma_v4_0_wait_reg_mem(ring, 0, 1,
382 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | 439 adev->nbio_funcs->get_hdp_flush_done_offset(adev),
383 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ 440 adev->nbio_funcs->get_hdp_flush_req_offset(adev),
384 amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); 441 ref_and_mask, ref_and_mask, 10);
385 amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
386 amdgpu_ring_write(ring, ref_and_mask); /* reference */
387 amdgpu_ring_write(ring, ref_and_mask); /* mask */
388 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
389 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
390} 442}
391 443
392/** 444/**
@@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1114 uint64_t addr = ring->fence_drv.gpu_addr; 1166 uint64_t addr = ring->fence_drv.gpu_addr;
1115 1167
1116 /* wait for idle */ 1168 /* wait for idle */
1117 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1169 sdma_v4_0_wait_reg_mem(ring, 1, 0,
1118 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1170 addr & 0xfffffffc,
1119 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 1171 upper_32_bits(addr) & 0xffffffff,
1120 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 1172 seq, 0xffffffff, 4);
1121 amdgpu_ring_write(ring, addr & 0xfffffffc);
1122 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1123 amdgpu_ring_write(ring, seq); /* reference */
1124 amdgpu_ring_write(ring, 0xffffffff); /* mask */
1125 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1126 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1127} 1173}
1128 1174
1129 1175
@@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
1154static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 1200static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1155 uint32_t val, uint32_t mask) 1201 uint32_t val, uint32_t mask)
1156{ 1202{
1157 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1203 sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
1158 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1159 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1160 amdgpu_ring_write(ring, reg << 2);
1161 amdgpu_ring_write(ring, 0);
1162 amdgpu_ring_write(ring, val); /* reference */
1163 amdgpu_ring_write(ring, mask); /* mask */
1164 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1165 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1166} 1204}
1167 1205
1168static int sdma_v4_0_early_init(void *handle) 1206static int sdma_v4_0_early_init(void *handle)
@@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
1510 switch (adev->asic_type) { 1548 switch (adev->asic_type) {
1511 case CHIP_VEGA10: 1549 case CHIP_VEGA10:
1512 case CHIP_VEGA12: 1550 case CHIP_VEGA12:
1551 case CHIP_VEGA20:
1513 case CHIP_RAVEN: 1552 case CHIP_RAVEN:
1514 sdma_v4_0_update_medium_grain_clock_gating(adev, 1553 sdma_v4_0_update_medium_grain_clock_gating(adev,
1515 state == AMD_CG_STATE_GATE ? true : false); 1554 state == AMD_CG_STATE_GATE ? true : false);
@@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
1605 .pad_ib = sdma_v4_0_ring_pad_ib, 1644 .pad_ib = sdma_v4_0_ring_pad_ib,
1606 .emit_wreg = sdma_v4_0_ring_emit_wreg, 1645 .emit_wreg = sdma_v4_0_ring_emit_wreg,
1607 .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, 1646 .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
1647 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1608}; 1648};
1609 1649
1610static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1650static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index a675ec6d2811..c364ef94cc36 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
1252 } 1252 }
1253} 1253}
1254 1254
1255static bool si_need_full_reset(struct amdgpu_device *adev)
1256{
1257 /* change this when we support soft reset */
1258 return true;
1259}
1260
1255static int si_get_pcie_lanes(struct amdgpu_device *adev) 1261static int si_get_pcie_lanes(struct amdgpu_device *adev)
1256{ 1262{
1257 u32 link_width_cntl; 1263 u32 link_width_cntl;
@@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
1332 .get_config_memsize = &si_get_config_memsize, 1338 .get_config_memsize = &si_get_config_memsize,
1333 .flush_hdp = &si_flush_hdp, 1339 .flush_hdp = &si_flush_hdp,
1334 .invalidate_hdp = &si_invalidate_hdp, 1340 .invalidate_hdp = &si_invalidate_hdp,
1341 .need_full_reset = &si_need_full_reset,
1335}; 1342};
1336 1343
1337static uint32_t si_get_rev_id(struct amdgpu_device *adev) 1344static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 797d505bf9ee..b12d7c9d42a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -7580,7 +7580,7 @@ static int si_dpm_late_init(void *handle)
7580 int ret; 7580 int ret;
7581 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7581 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7582 7582
7583 if (!amdgpu_dpm) 7583 if (!adev->pm.dpm_enabled)
7584 return 0; 7584 return 0;
7585 7585
7586 ret = si_set_temperature_range(adev); 7586 ret = si_set_temperature_range(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 51cf8a30f6c2..68b4a22a8892 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -41,8 +41,6 @@
41#include "sdma1/sdma1_4_0_offset.h" 41#include "sdma1/sdma1_4_0_offset.h"
42#include "hdp/hdp_4_0_offset.h" 42#include "hdp/hdp_4_0_offset.h"
43#include "hdp/hdp_4_0_sh_mask.h" 43#include "hdp/hdp_4_0_sh_mask.h"
44#include "mp/mp_9_0_offset.h"
45#include "mp/mp_9_0_sh_mask.h"
46#include "smuio/smuio_9_0_offset.h" 44#include "smuio/smuio_9_0_offset.h"
47#include "smuio/smuio_9_0_sh_mask.h" 45#include "smuio/smuio_9_0_sh_mask.h"
48 46
@@ -52,6 +50,8 @@
52#include "gmc_v9_0.h" 50#include "gmc_v9_0.h"
53#include "gfxhub_v1_0.h" 51#include "gfxhub_v1_0.h"
54#include "mmhub_v1_0.h" 52#include "mmhub_v1_0.h"
53#include "df_v1_7.h"
54#include "df_v3_6.h"
55#include "vega10_ih.h" 55#include "vega10_ih.h"
56#include "sdma_v4_0.h" 56#include "sdma_v4_0.h"
57#include "uvd_v7_0.h" 57#include "uvd_v7_0.h"
@@ -60,33 +60,6 @@
60#include "dce_virtual.h" 60#include "dce_virtual.h"
61#include "mxgpu_ai.h" 61#include "mxgpu_ai.h"
62 62
63#define mmFabricConfigAccessControl 0x0410
64#define mmFabricConfigAccessControl_BASE_IDX 0
65#define mmFabricConfigAccessControl_DEFAULT 0x00000000
66//FabricConfigAccessControl
67#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0
68#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1
69#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10
70#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L
71#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L
72#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L
73
74
75#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc
76#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0
77//DF_PIE_AON0_DfGlobalClkGater
78#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0
79#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL
80
81enum {
82 DF_MGCG_DISABLE = 0,
83 DF_MGCG_ENABLE_00_CYCLE_DELAY =1,
84 DF_MGCG_ENABLE_01_CYCLE_DELAY =2,
85 DF_MGCG_ENABLE_15_CYCLE_DELAY =13,
86 DF_MGCG_ENABLE_31_CYCLE_DELAY =14,
87 DF_MGCG_ENABLE_63_CYCLE_DELAY =15
88};
89
90#define mmMP0_MISC_CGTT_CTRL0 0x01b9 63#define mmMP0_MISC_CGTT_CTRL0 0x01b9
91#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 64#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
92#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba 65#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba
@@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
313 { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, 286 { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
314 { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, 287 { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
315 { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, 288 { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
289 { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)},
316}; 290};
317 291
318static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, 292static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev,
341 } else { 315 } else {
342 if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) 316 if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
343 return adev->gfx.config.gb_addr_config; 317 return adev->gfx.config.gb_addr_config;
318 else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))
319 return adev->gfx.config.db_debug2;
344 return RREG32(reg_offset); 320 return RREG32(reg_offset);
345 } 321 }
346} 322}
@@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
512 case CHIP_RAVEN: 488 case CHIP_RAVEN:
513 vega10_reg_base_init(adev); 489 vega10_reg_base_init(adev);
514 break; 490 break;
491 case CHIP_VEGA20:
492 vega20_reg_base_init(adev);
493 break;
515 default: 494 default:
516 return -EINVAL; 495 return -EINVAL;
517 } 496 }
518 497
519 if (adev->flags & AMD_IS_APU) 498 if (adev->flags & AMD_IS_APU)
520 adev->nbio_funcs = &nbio_v7_0_funcs; 499 adev->nbio_funcs = &nbio_v7_0_funcs;
500 else if (adev->asic_type == CHIP_VEGA20)
501 adev->nbio_funcs = &nbio_v7_0_funcs;
521 else 502 else
522 adev->nbio_funcs = &nbio_v6_1_funcs; 503 adev->nbio_funcs = &nbio_v6_1_funcs;
523 504
505 if (adev->asic_type == CHIP_VEGA20)
506 adev->df_funcs = &df_v3_6_funcs;
507 else
508 adev->df_funcs = &df_v1_7_funcs;
524 adev->nbio_funcs->detect_hw_virt(adev); 509 adev->nbio_funcs->detect_hw_virt(adev);
525 510
526 if (amdgpu_sriov_vf(adev)) 511 if (amdgpu_sriov_vf(adev))
@@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
529 switch (adev->asic_type) { 514 switch (adev->asic_type) {
530 case CHIP_VEGA10: 515 case CHIP_VEGA10:
531 case CHIP_VEGA12: 516 case CHIP_VEGA12:
517 case CHIP_VEGA20:
532 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); 518 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
533 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); 519 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
534 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); 520 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
535 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); 521 if (adev->asic_type != CHIP_VEGA20) {
536 if (!amdgpu_sriov_vf(adev)) 522 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
537 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); 523 if (!amdgpu_sriov_vf(adev))
524 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
525 }
538 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) 526 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
539 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 527 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
540#if defined(CONFIG_DRM_AMD_DC) 528#if defined(CONFIG_DRM_AMD_DC)
@@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev,
593 HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); 581 HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
594} 582}
595 583
584static bool soc15_need_full_reset(struct amdgpu_device *adev)
585{
586 /* change this when we implement soft reset */
587 return true;
588}
589
596static const struct amdgpu_asic_funcs soc15_asic_funcs = 590static const struct amdgpu_asic_funcs soc15_asic_funcs =
597{ 591{
598 .read_disabled_bios = &soc15_read_disabled_bios, 592 .read_disabled_bios = &soc15_read_disabled_bios,
@@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
606 .get_config_memsize = &soc15_get_config_memsize, 600 .get_config_memsize = &soc15_get_config_memsize,
607 .flush_hdp = &soc15_flush_hdp, 601 .flush_hdp = &soc15_flush_hdp,
608 .invalidate_hdp = &soc15_invalidate_hdp, 602 .invalidate_hdp = &soc15_invalidate_hdp,
603 .need_full_reset = &soc15_need_full_reset,
609}; 604};
610 605
611static int soc15_common_early_init(void *handle) 606static int soc15_common_early_init(void *handle)
@@ -675,6 +670,27 @@ static int soc15_common_early_init(void *handle)
675 adev->pg_flags = 0; 670 adev->pg_flags = 0;
676 adev->external_rev_id = adev->rev_id + 0x14; 671 adev->external_rev_id = adev->rev_id + 0x14;
677 break; 672 break;
673 case CHIP_VEGA20:
674 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
675 AMD_CG_SUPPORT_GFX_MGLS |
676 AMD_CG_SUPPORT_GFX_CGCG |
677 AMD_CG_SUPPORT_GFX_CGLS |
678 AMD_CG_SUPPORT_GFX_3D_CGCG |
679 AMD_CG_SUPPORT_GFX_3D_CGLS |
680 AMD_CG_SUPPORT_GFX_CP_LS |
681 AMD_CG_SUPPORT_MC_LS |
682 AMD_CG_SUPPORT_MC_MGCG |
683 AMD_CG_SUPPORT_SDMA_MGCG |
684 AMD_CG_SUPPORT_SDMA_LS |
685 AMD_CG_SUPPORT_BIF_MGCG |
686 AMD_CG_SUPPORT_BIF_LS |
687 AMD_CG_SUPPORT_HDP_MGCG |
688 AMD_CG_SUPPORT_ROM_MGCG |
689 AMD_CG_SUPPORT_VCE_MGCG |
690 AMD_CG_SUPPORT_UVD_MGCG;
691 adev->pg_flags = 0;
692 adev->external_rev_id = adev->rev_id + 0x28;
693 break;
678 case CHIP_RAVEN: 694 case CHIP_RAVEN:
679 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | 695 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
680 AMD_CG_SUPPORT_GFX_MGLS | 696 AMD_CG_SUPPORT_GFX_MGLS |
@@ -694,8 +710,15 @@ static int soc15_common_early_init(void *handle)
694 AMD_CG_SUPPORT_MC_MGCG | 710 AMD_CG_SUPPORT_MC_MGCG |
695 AMD_CG_SUPPORT_MC_LS | 711 AMD_CG_SUPPORT_MC_LS |
696 AMD_CG_SUPPORT_SDMA_MGCG | 712 AMD_CG_SUPPORT_SDMA_MGCG |
697 AMD_CG_SUPPORT_SDMA_LS; 713 AMD_CG_SUPPORT_SDMA_LS |
698 adev->pg_flags = AMD_PG_SUPPORT_SDMA; 714 AMD_CG_SUPPORT_VCN_MGCG;
715
716 adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
717
718 if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
719 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
720 AMD_PG_SUPPORT_CP |
721 AMD_PG_SUPPORT_RLC_SMU_HS;
699 722
700 adev->external_rev_id = 0x1; 723 adev->external_rev_id = 0x1;
701 break; 724 break;
@@ -871,32 +894,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade
871 WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); 894 WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data);
872} 895}
873 896
874static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev,
875 bool enable)
876{
877 uint32_t data;
878
879 /* Put DF on broadcast mode */
880 data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl));
881 data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
882 WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data);
883
884 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
885 data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
886 data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
887 data |= DF_MGCG_ENABLE_15_CYCLE_DELAY;
888 WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
889 } else {
890 data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
891 data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
892 data |= DF_MGCG_DISABLE;
893 WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
894 }
895
896 WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl),
897 mmFabricConfigAccessControl_DEFAULT);
898}
899
900static int soc15_common_set_clockgating_state(void *handle, 897static int soc15_common_set_clockgating_state(void *handle,
901 enum amd_clockgating_state state) 898 enum amd_clockgating_state state)
902{ 899{
@@ -908,6 +905,7 @@ static int soc15_common_set_clockgating_state(void *handle,
908 switch (adev->asic_type) { 905 switch (adev->asic_type) {
909 case CHIP_VEGA10: 906 case CHIP_VEGA10:
910 case CHIP_VEGA12: 907 case CHIP_VEGA12:
908 case CHIP_VEGA20:
911 adev->nbio_funcs->update_medium_grain_clock_gating(adev, 909 adev->nbio_funcs->update_medium_grain_clock_gating(adev,
912 state == AMD_CG_STATE_GATE ? true : false); 910 state == AMD_CG_STATE_GATE ? true : false);
913 adev->nbio_funcs->update_medium_grain_light_sleep(adev, 911 adev->nbio_funcs->update_medium_grain_light_sleep(adev,
@@ -920,7 +918,7 @@ static int soc15_common_set_clockgating_state(void *handle,
920 state == AMD_CG_STATE_GATE ? true : false); 918 state == AMD_CG_STATE_GATE ? true : false);
921 soc15_update_rom_medium_grain_clock_gating(adev, 919 soc15_update_rom_medium_grain_clock_gating(adev,
922 state == AMD_CG_STATE_GATE ? true : false); 920 state == AMD_CG_STATE_GATE ? true : false);
923 soc15_update_df_medium_grain_clock_gating(adev, 921 adev->df_funcs->update_medium_grain_clock_gating(adev,
924 state == AMD_CG_STATE_GATE ? true : false); 922 state == AMD_CG_STATE_GATE ? true : false);
925 break; 923 break;
926 case CHIP_RAVEN: 924 case CHIP_RAVEN:
@@ -973,10 +971,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
973 if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) 971 if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
974 *flags |= AMD_CG_SUPPORT_ROM_MGCG; 972 *flags |= AMD_CG_SUPPORT_ROM_MGCG;
975 973
976 /* AMD_CG_SUPPORT_DF_MGCG */ 974 adev->df_funcs->get_clockgating_state(adev, flags);
977 data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
978 if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY)
979 *flags |= AMD_CG_SUPPORT_DF_MGCG;
980} 975}
981 976
982static int soc15_common_set_powergating_state(void *handle, 977static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index f70da8a29f86..1f714b7af520 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
55 const u32 array_size); 55 const u32 array_size);
56 56
57int vega10_reg_base_init(struct amdgpu_device *adev); 57int vega10_reg_base_init(struct amdgpu_device *adev);
58int vega20_reg_base_init(struct amdgpu_device *adev);
58 59
59#endif 60#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index def865067edd..0942f492d2e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -47,6 +47,21 @@
47#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ 47#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
48 WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) 48 WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
49 49
50#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
51 do { \
52 uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
53 uint32_t loop = adev->usec_timeout; \
54 while ((tmp_ & (mask)) != (expected_value)) { \
55 udelay(2); \
56 tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
57 loop--; \
58 if (!loop) { \
59 ret = -ETIMEDOUT; \
60 break; \
61 } \
62 } \
63 } while (0)
64
50#endif 65#endif
51 66
52 67
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f85fdb6..8dc29107228f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -159,6 +159,7 @@
159#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ 159#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */
160#define EOP_TCL1_ACTION_EN (1 << 16) 160#define EOP_TCL1_ACTION_EN (1 << 16)
161#define EOP_TC_ACTION_EN (1 << 17) /* L2 */ 161#define EOP_TC_ACTION_EN (1 << 17) /* L2 */
162#define EOP_TC_NC_ACTION_EN (1 << 19)
162#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ 163#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */
163 164
164#define DATA_SEL(x) ((x) << 29) 165#define DATA_SEL(x) ((x) << 29)
@@ -268,6 +269,11 @@
268 * x=1: tmz_end 269 * x=1: tmz_end
269 */ 270 */
270 271
272#define PACKET3_INVALIDATE_TLBS 0x98
273# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
274# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
275# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
276# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
271#define PACKET3_SET_RESOURCES 0xA0 277#define PACKET3_SET_RESOURCES 0xA0
272/* 1. header 278/* 1. header
273 * 2. CONTROL 279 * 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 948bb9437757..6fed3d7797a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
93static int uvd_v4_2_early_init(void *handle) 93static int uvd_v4_2_early_init(void *handle)
94{ 94{
95 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 95 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
96 adev->uvd.num_uvd_inst = 1;
96 97
97 uvd_v4_2_set_ring_funcs(adev); 98 uvd_v4_2_set_ring_funcs(adev);
98 uvd_v4_2_set_irq_funcs(adev); 99 uvd_v4_2_set_irq_funcs(adev);
@@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle)
107 int r; 108 int r;
108 109
109 /* UVD TRAP */ 110 /* UVD TRAP */
110 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); 111 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
111 if (r) 112 if (r)
112 return r; 113 return r;
113 114
@@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle)
119 if (r) 120 if (r)
120 return r; 121 return r;
121 122
122 ring = &adev->uvd.ring; 123 ring = &adev->uvd.inst->ring;
123 sprintf(ring->name, "uvd"); 124 sprintf(ring->name, "uvd");
124 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 125 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
125 126
126 return r; 127 return r;
127} 128}
@@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
150static int uvd_v4_2_hw_init(void *handle) 151static int uvd_v4_2_hw_init(void *handle)
151{ 152{
152 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 153 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
153 struct amdgpu_ring *ring = &adev->uvd.ring; 154 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
154 uint32_t tmp; 155 uint32_t tmp;
155 int r; 156 int r;
156 157
@@ -208,7 +209,7 @@ done:
208static int uvd_v4_2_hw_fini(void *handle) 209static int uvd_v4_2_hw_fini(void *handle)
209{ 210{
210 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 211 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
211 struct amdgpu_ring *ring = &adev->uvd.ring; 212 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
212 213
213 if (RREG32(mmUVD_STATUS) != 0) 214 if (RREG32(mmUVD_STATUS) != 0)
214 uvd_v4_2_stop(adev); 215 uvd_v4_2_stop(adev);
@@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle)
251 */ 252 */
252static int uvd_v4_2_start(struct amdgpu_device *adev) 253static int uvd_v4_2_start(struct amdgpu_device *adev)
253{ 254{
254 struct amdgpu_ring *ring = &adev->uvd.ring; 255 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
255 uint32_t rb_bufsz; 256 uint32_t rb_bufsz;
256 int i, j, r; 257 int i, j, r;
257 u32 tmp; 258 u32 tmp;
@@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
523 amdgpu_ring_write(ring, ib->length_dw); 524 amdgpu_ring_write(ring, ib->length_dw);
524} 525}
525 526
527static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
528{
529 int i;
530
531 WARN_ON(ring->wptr % 2 || count % 2);
532
533 for (i = 0; i < count / 2; i++) {
534 amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
535 amdgpu_ring_write(ring, 0);
536 }
537}
538
526/** 539/**
527 * uvd_v4_2_mc_resume - memory controller programming 540 * uvd_v4_2_mc_resume - memory controller programming
528 * 541 *
@@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
536 uint32_t size; 549 uint32_t size;
537 550
538 /* programm the VCPU memory controller bits 0-27 */ 551 /* programm the VCPU memory controller bits 0-27 */
539 addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; 552 addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
540 size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; 553 size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
541 WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); 554 WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
542 WREG32(mmUVD_VCPU_CACHE_SIZE0, size); 555 WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
@@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
553 WREG32(mmUVD_VCPU_CACHE_SIZE2, size); 566 WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
554 567
555 /* bits 28-31 */ 568 /* bits 28-31 */
556 addr = (adev->uvd.gpu_addr >> 28) & 0xF; 569 addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
557 WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); 570 WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
558 571
559 /* bits 32-39 */ 572 /* bits 32-39 */
560 addr = (adev->uvd.gpu_addr >> 32) & 0xFF; 573 addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
561 WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); 574 WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
562 575
563 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 576 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
@@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
664 struct amdgpu_iv_entry *entry) 677 struct amdgpu_iv_entry *entry)
665{ 678{
666 DRM_DEBUG("IH: UVD TRAP\n"); 679 DRM_DEBUG("IH: UVD TRAP\n");
667 amdgpu_fence_process(&adev->uvd.ring); 680 amdgpu_fence_process(&adev->uvd.inst->ring);
668 return 0; 681 return 0;
669} 682}
670 683
@@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
688 701
689 if (state == AMD_PG_STATE_GATE) { 702 if (state == AMD_PG_STATE_GATE) {
690 uvd_v4_2_stop(adev); 703 uvd_v4_2_stop(adev);
691 if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { 704 if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
692 if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & 705 if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
693 CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { 706 CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
694 WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | 707 WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
699 } 712 }
700 return 0; 713 return 0;
701 } else { 714 } else {
702 if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { 715 if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
703 if (RREG32_SMC(ixCURRENT_PG_STATUS) & 716 if (RREG32_SMC(ixCURRENT_PG_STATUS) &
704 CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { 717 CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
705 WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | 718 WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK |
@@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
732static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { 745static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
733 .type = AMDGPU_RING_TYPE_UVD, 746 .type = AMDGPU_RING_TYPE_UVD,
734 .align_mask = 0xf, 747 .align_mask = 0xf,
735 .nop = PACKET0(mmUVD_NO_OP, 0),
736 .support_64bit_ptrs = false, 748 .support_64bit_ptrs = false,
737 .get_rptr = uvd_v4_2_ring_get_rptr, 749 .get_rptr = uvd_v4_2_ring_get_rptr,
738 .get_wptr = uvd_v4_2_ring_get_wptr, 750 .get_wptr = uvd_v4_2_ring_get_wptr,
@@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
745 .emit_fence = uvd_v4_2_ring_emit_fence, 757 .emit_fence = uvd_v4_2_ring_emit_fence,
746 .test_ring = uvd_v4_2_ring_test_ring, 758 .test_ring = uvd_v4_2_ring_test_ring,
747 .test_ib = amdgpu_uvd_ring_test_ib, 759 .test_ib = amdgpu_uvd_ring_test_ib,
748 .insert_nop = amdgpu_ring_insert_nop, 760 .insert_nop = uvd_v4_2_ring_insert_nop,
749 .pad_ib = amdgpu_ring_generic_pad_ib, 761 .pad_ib = amdgpu_ring_generic_pad_ib,
750 .begin_use = amdgpu_uvd_ring_begin_use, 762 .begin_use = amdgpu_uvd_ring_begin_use,
751 .end_use = amdgpu_uvd_ring_end_use, 763 .end_use = amdgpu_uvd_ring_end_use,
@@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
753 765
754static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) 766static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
755{ 767{
756 adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs; 768 adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs;
757} 769}
758 770
759static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { 771static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
@@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
763 775
764static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev) 776static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev)
765{ 777{
766 adev->uvd.irq.num_types = 1; 778 adev->uvd.inst->irq.num_types = 1;
767 adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs; 779 adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs;
768} 780}
769 781
770const struct amdgpu_ip_block_version uvd_v4_2_ip_block = 782const struct amdgpu_ip_block_version uvd_v4_2_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 6445d55e7d5a..341ee6d55ce8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -89,6 +89,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
89static int uvd_v5_0_early_init(void *handle) 89static int uvd_v5_0_early_init(void *handle)
90{ 90{
91 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 91 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
92 adev->uvd.num_uvd_inst = 1;
92 93
93 uvd_v5_0_set_ring_funcs(adev); 94 uvd_v5_0_set_ring_funcs(adev);
94 uvd_v5_0_set_irq_funcs(adev); 95 uvd_v5_0_set_irq_funcs(adev);
@@ -103,7 +104,7 @@ static int uvd_v5_0_sw_init(void *handle)
103 int r; 104 int r;
104 105
105 /* UVD TRAP */ 106 /* UVD TRAP */
106 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); 107 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
107 if (r) 108 if (r)
108 return r; 109 return r;
109 110
@@ -115,9 +116,9 @@ static int uvd_v5_0_sw_init(void *handle)
115 if (r) 116 if (r)
116 return r; 117 return r;
117 118
118 ring = &adev->uvd.ring; 119 ring = &adev->uvd.inst->ring;
119 sprintf(ring->name, "uvd"); 120 sprintf(ring->name, "uvd");
120 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 121 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
121 122
122 return r; 123 return r;
123} 124}
@@ -144,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle)
144static int uvd_v5_0_hw_init(void *handle) 145static int uvd_v5_0_hw_init(void *handle)
145{ 146{
146 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 147 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
147 struct amdgpu_ring *ring = &adev->uvd.ring; 148 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
148 uint32_t tmp; 149 uint32_t tmp;
149 int r; 150 int r;
150 151
@@ -204,7 +205,7 @@ done:
204static int uvd_v5_0_hw_fini(void *handle) 205static int uvd_v5_0_hw_fini(void *handle)
205{ 206{
206 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 207 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
207 struct amdgpu_ring *ring = &adev->uvd.ring; 208 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
208 209
209 if (RREG32(mmUVD_STATUS) != 0) 210 if (RREG32(mmUVD_STATUS) != 0)
210 uvd_v5_0_stop(adev); 211 uvd_v5_0_stop(adev);
@@ -253,9 +254,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
253 254
254 /* programm memory controller bits 0-27 */ 255 /* programm memory controller bits 0-27 */
255 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 256 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
256 lower_32_bits(adev->uvd.gpu_addr)); 257 lower_32_bits(adev->uvd.inst->gpu_addr));
257 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 258 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
258 upper_32_bits(adev->uvd.gpu_addr)); 259 upper_32_bits(adev->uvd.inst->gpu_addr));
259 260
260 offset = AMDGPU_UVD_FIRMWARE_OFFSET; 261 offset = AMDGPU_UVD_FIRMWARE_OFFSET;
261 size = AMDGPU_UVD_FIRMWARE_SIZE(adev); 262 size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -287,7 +288,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
287 */ 288 */
288static int uvd_v5_0_start(struct amdgpu_device *adev) 289static int uvd_v5_0_start(struct amdgpu_device *adev)
289{ 290{
290 struct amdgpu_ring *ring = &adev->uvd.ring; 291 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
291 uint32_t rb_bufsz, tmp; 292 uint32_t rb_bufsz, tmp;
292 uint32_t lmi_swap_cntl; 293 uint32_t lmi_swap_cntl;
293 uint32_t mp_swap_cntl; 294 uint32_t mp_swap_cntl;
@@ -540,6 +541,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
540 amdgpu_ring_write(ring, ib->length_dw); 541 amdgpu_ring_write(ring, ib->length_dw);
541} 542}
542 543
544static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
545{
546 int i;
547
548 WARN_ON(ring->wptr % 2 || count % 2);
549
550 for (i = 0; i < count / 2; i++) {
551 amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
552 amdgpu_ring_write(ring, 0);
553 }
554}
555
543static bool uvd_v5_0_is_idle(void *handle) 556static bool uvd_v5_0_is_idle(void *handle)
544{ 557{
545 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 558 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -586,7 +599,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
586 struct amdgpu_iv_entry *entry) 599 struct amdgpu_iv_entry *entry)
587{ 600{
588 DRM_DEBUG("IH: UVD TRAP\n"); 601 DRM_DEBUG("IH: UVD TRAP\n");
589 amdgpu_fence_process(&adev->uvd.ring); 602 amdgpu_fence_process(&adev->uvd.inst->ring);
590 return 0; 603 return 0;
591} 604}
592 605
@@ -840,7 +853,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
840static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { 853static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
841 .type = AMDGPU_RING_TYPE_UVD, 854 .type = AMDGPU_RING_TYPE_UVD,
842 .align_mask = 0xf, 855 .align_mask = 0xf,
843 .nop = PACKET0(mmUVD_NO_OP, 0),
844 .support_64bit_ptrs = false, 856 .support_64bit_ptrs = false,
845 .get_rptr = uvd_v5_0_ring_get_rptr, 857 .get_rptr = uvd_v5_0_ring_get_rptr,
846 .get_wptr = uvd_v5_0_ring_get_wptr, 858 .get_wptr = uvd_v5_0_ring_get_wptr,
@@ -853,7 +865,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
853 .emit_fence = uvd_v5_0_ring_emit_fence, 865 .emit_fence = uvd_v5_0_ring_emit_fence,
854 .test_ring = uvd_v5_0_ring_test_ring, 866 .test_ring = uvd_v5_0_ring_test_ring,
855 .test_ib = amdgpu_uvd_ring_test_ib, 867 .test_ib = amdgpu_uvd_ring_test_ib,
856 .insert_nop = amdgpu_ring_insert_nop, 868 .insert_nop = uvd_v5_0_ring_insert_nop,
857 .pad_ib = amdgpu_ring_generic_pad_ib, 869 .pad_ib = amdgpu_ring_generic_pad_ib,
858 .begin_use = amdgpu_uvd_ring_begin_use, 870 .begin_use = amdgpu_uvd_ring_begin_use,
859 .end_use = amdgpu_uvd_ring_end_use, 871 .end_use = amdgpu_uvd_ring_end_use,
@@ -861,7 +873,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
861 873
862static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) 874static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
863{ 875{
864 adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs; 876 adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs;
865} 877}
866 878
867static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { 879static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
@@ -871,8 +883,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
871 883
872static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev) 884static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev)
873{ 885{
874 adev->uvd.irq.num_types = 1; 886 adev->uvd.inst->irq.num_types = 1;
875 adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs; 887 adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs;
876} 888}
877 889
878const struct amdgpu_ip_block_version uvd_v5_0_ip_block = 890const struct amdgpu_ip_block_version uvd_v5_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index f26f515db2fb..bfddf97dd13e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -62,7 +62,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
62static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) 62static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
63{ 63{
64 return ((adev->asic_type >= CHIP_POLARIS10) && 64 return ((adev->asic_type >= CHIP_POLARIS10) &&
65 (adev->asic_type <= CHIP_POLARIS12) && 65 (adev->asic_type <= CHIP_VEGAM) &&
66 (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); 66 (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
67} 67}
68 68
@@ -91,7 +91,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
91{ 91{
92 struct amdgpu_device *adev = ring->adev; 92 struct amdgpu_device *adev = ring->adev;
93 93
94 if (ring == &adev->uvd.ring_enc[0]) 94 if (ring == &adev->uvd.inst->ring_enc[0])
95 return RREG32(mmUVD_RB_RPTR); 95 return RREG32(mmUVD_RB_RPTR);
96 else 96 else
97 return RREG32(mmUVD_RB_RPTR2); 97 return RREG32(mmUVD_RB_RPTR2);
@@ -121,7 +121,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
121{ 121{
122 struct amdgpu_device *adev = ring->adev; 122 struct amdgpu_device *adev = ring->adev;
123 123
124 if (ring == &adev->uvd.ring_enc[0]) 124 if (ring == &adev->uvd.inst->ring_enc[0])
125 return RREG32(mmUVD_RB_WPTR); 125 return RREG32(mmUVD_RB_WPTR);
126 else 126 else
127 return RREG32(mmUVD_RB_WPTR2); 127 return RREG32(mmUVD_RB_WPTR2);
@@ -152,7 +152,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
152{ 152{
153 struct amdgpu_device *adev = ring->adev; 153 struct amdgpu_device *adev = ring->adev;
154 154
155 if (ring == &adev->uvd.ring_enc[0]) 155 if (ring == &adev->uvd.inst->ring_enc[0])
156 WREG32(mmUVD_RB_WPTR, 156 WREG32(mmUVD_RB_WPTR,
157 lower_32_bits(ring->wptr)); 157 lower_32_bits(ring->wptr));
158 else 158 else
@@ -375,6 +375,7 @@ error:
375static int uvd_v6_0_early_init(void *handle) 375static int uvd_v6_0_early_init(void *handle)
376{ 376{
377 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 377 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
378 adev->uvd.num_uvd_inst = 1;
378 379
379 if (!(adev->flags & AMD_IS_APU) && 380 if (!(adev->flags & AMD_IS_APU) &&
380 (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) 381 (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK))
@@ -399,14 +400,14 @@ static int uvd_v6_0_sw_init(void *handle)
399 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 400 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
400 401
401 /* UVD TRAP */ 402 /* UVD TRAP */
402 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); 403 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
403 if (r) 404 if (r)
404 return r; 405 return r;
405 406
406 /* UVD ENC TRAP */ 407 /* UVD ENC TRAP */
407 if (uvd_v6_0_enc_support(adev)) { 408 if (uvd_v6_0_enc_support(adev)) {
408 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 409 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
409 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq); 410 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq);
410 if (r) 411 if (r)
411 return r; 412 return r;
412 } 413 }
@@ -418,18 +419,18 @@ static int uvd_v6_0_sw_init(void *handle)
418 419
419 if (!uvd_v6_0_enc_support(adev)) { 420 if (!uvd_v6_0_enc_support(adev)) {
420 for (i = 0; i < adev->uvd.num_enc_rings; ++i) 421 for (i = 0; i < adev->uvd.num_enc_rings; ++i)
421 adev->uvd.ring_enc[i].funcs = NULL; 422 adev->uvd.inst->ring_enc[i].funcs = NULL;
422 423
423 adev->uvd.irq.num_types = 1; 424 adev->uvd.inst->irq.num_types = 1;
424 adev->uvd.num_enc_rings = 0; 425 adev->uvd.num_enc_rings = 0;
425 426
426 DRM_INFO("UVD ENC is disabled\n"); 427 DRM_INFO("UVD ENC is disabled\n");
427 } else { 428 } else {
428 struct drm_sched_rq *rq; 429 struct drm_sched_rq *rq;
429 ring = &adev->uvd.ring_enc[0]; 430 ring = &adev->uvd.inst->ring_enc[0];
430 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 431 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
431 r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, 432 r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
432 rq, amdgpu_sched_jobs, NULL); 433 rq, NULL);
433 if (r) { 434 if (r) {
434 DRM_ERROR("Failed setting up UVD ENC run queue.\n"); 435 DRM_ERROR("Failed setting up UVD ENC run queue.\n");
435 return r; 436 return r;
@@ -440,17 +441,17 @@ static int uvd_v6_0_sw_init(void *handle)
440 if (r) 441 if (r)
441 return r; 442 return r;
442 443
443 ring = &adev->uvd.ring; 444 ring = &adev->uvd.inst->ring;
444 sprintf(ring->name, "uvd"); 445 sprintf(ring->name, "uvd");
445 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 446 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
446 if (r) 447 if (r)
447 return r; 448 return r;
448 449
449 if (uvd_v6_0_enc_support(adev)) { 450 if (uvd_v6_0_enc_support(adev)) {
450 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 451 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
451 ring = &adev->uvd.ring_enc[i]; 452 ring = &adev->uvd.inst->ring_enc[i];
452 sprintf(ring->name, "uvd_enc%d", i); 453 sprintf(ring->name, "uvd_enc%d", i);
453 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); 454 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
454 if (r) 455 if (r)
455 return r; 456 return r;
456 } 457 }
@@ -469,10 +470,10 @@ static int uvd_v6_0_sw_fini(void *handle)
469 return r; 470 return r;
470 471
471 if (uvd_v6_0_enc_support(adev)) { 472 if (uvd_v6_0_enc_support(adev)) {
472 drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); 473 drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
473 474
474 for (i = 0; i < adev->uvd.num_enc_rings; ++i) 475 for (i = 0; i < adev->uvd.num_enc_rings; ++i)
475 amdgpu_ring_fini(&adev->uvd.ring_enc[i]); 476 amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
476 } 477 }
477 478
478 return amdgpu_uvd_sw_fini(adev); 479 return amdgpu_uvd_sw_fini(adev);
@@ -488,7 +489,7 @@ static int uvd_v6_0_sw_fini(void *handle)
488static int uvd_v6_0_hw_init(void *handle) 489static int uvd_v6_0_hw_init(void *handle)
489{ 490{
490 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 491 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
491 struct amdgpu_ring *ring = &adev->uvd.ring; 492 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
492 uint32_t tmp; 493 uint32_t tmp;
493 int i, r; 494 int i, r;
494 495
@@ -532,7 +533,7 @@ static int uvd_v6_0_hw_init(void *handle)
532 533
533 if (uvd_v6_0_enc_support(adev)) { 534 if (uvd_v6_0_enc_support(adev)) {
534 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 535 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
535 ring = &adev->uvd.ring_enc[i]; 536 ring = &adev->uvd.inst->ring_enc[i];
536 ring->ready = true; 537 ring->ready = true;
537 r = amdgpu_ring_test_ring(ring); 538 r = amdgpu_ring_test_ring(ring);
538 if (r) { 539 if (r) {
@@ -563,7 +564,7 @@ done:
563static int uvd_v6_0_hw_fini(void *handle) 564static int uvd_v6_0_hw_fini(void *handle)
564{ 565{
565 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 566 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
566 struct amdgpu_ring *ring = &adev->uvd.ring; 567 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
567 568
568 if (RREG32(mmUVD_STATUS) != 0) 569 if (RREG32(mmUVD_STATUS) != 0)
569 uvd_v6_0_stop(adev); 570 uvd_v6_0_stop(adev);
@@ -611,9 +612,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
611 612
612 /* programm memory controller bits 0-27 */ 613 /* programm memory controller bits 0-27 */
613 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 614 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
614 lower_32_bits(adev->uvd.gpu_addr)); 615 lower_32_bits(adev->uvd.inst->gpu_addr));
615 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 616 WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
616 upper_32_bits(adev->uvd.gpu_addr)); 617 upper_32_bits(adev->uvd.inst->gpu_addr));
617 618
618 offset = AMDGPU_UVD_FIRMWARE_OFFSET; 619 offset = AMDGPU_UVD_FIRMWARE_OFFSET;
619 size = AMDGPU_UVD_FIRMWARE_SIZE(adev); 620 size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -726,7 +727,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
726 */ 727 */
727static int uvd_v6_0_start(struct amdgpu_device *adev) 728static int uvd_v6_0_start(struct amdgpu_device *adev)
728{ 729{
729 struct amdgpu_ring *ring = &adev->uvd.ring; 730 struct amdgpu_ring *ring = &adev->uvd.inst->ring;
730 uint32_t rb_bufsz, tmp; 731 uint32_t rb_bufsz, tmp;
731 uint32_t lmi_swap_cntl; 732 uint32_t lmi_swap_cntl;
732 uint32_t mp_swap_cntl; 733 uint32_t mp_swap_cntl;
@@ -866,14 +867,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
866 WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); 867 WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0);
867 868
868 if (uvd_v6_0_enc_support(adev)) { 869 if (uvd_v6_0_enc_support(adev)) {
869 ring = &adev->uvd.ring_enc[0]; 870 ring = &adev->uvd.inst->ring_enc[0];
870 WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); 871 WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
871 WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); 872 WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
872 WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr); 873 WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr);
873 WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 874 WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
874 WREG32(mmUVD_RB_SIZE, ring->ring_size / 4); 875 WREG32(mmUVD_RB_SIZE, ring->ring_size / 4);
875 876
876 ring = &adev->uvd.ring_enc[1]; 877 ring = &adev->uvd.inst->ring_enc[1];
877 WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); 878 WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
878 WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); 879 WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
879 WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr); 880 WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr);
@@ -964,6 +965,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964} 965}
965 966
966/** 967/**
968 * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
969 *
970 * @ring: amdgpu_ring pointer
971 */
972static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
973{
974 /* The firmware doesn't seem to like touching registers at this point. */
975}
976
977/**
967 * uvd_v6_0_ring_test_ring - register write test 978 * uvd_v6_0_ring_test_ring - register write test
968 * 979 *
969 * @ring: amdgpu_ring pointer 980 * @ring: amdgpu_ring pointer
@@ -1089,6 +1100,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1089 amdgpu_ring_write(ring, 0xE); 1100 amdgpu_ring_write(ring, 0xE);
1090} 1101}
1091 1102
1103static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
1104{
1105 int i;
1106
1107 WARN_ON(ring->wptr % 2 || count % 2);
1108
1109 for (i = 0; i < count / 2; i++) {
1110 amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
1111 amdgpu_ring_write(ring, 0);
1112 }
1113}
1114
1092static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 1115static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1093{ 1116{
1094 uint32_t seq = ring->fence_drv.sync_seq; 1117 uint32_t seq = ring->fence_drv.sync_seq;
@@ -1148,10 +1171,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
1148 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); 1171 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
1149 1172
1150 if (srbm_soft_reset) { 1173 if (srbm_soft_reset) {
1151 adev->uvd.srbm_soft_reset = srbm_soft_reset; 1174 adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
1152 return true; 1175 return true;
1153 } else { 1176 } else {
1154 adev->uvd.srbm_soft_reset = 0; 1177 adev->uvd.inst->srbm_soft_reset = 0;
1155 return false; 1178 return false;
1156 } 1179 }
1157} 1180}
@@ -1160,7 +1183,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
1160{ 1183{
1161 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1184 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1162 1185
1163 if (!adev->uvd.srbm_soft_reset) 1186 if (!adev->uvd.inst->srbm_soft_reset)
1164 return 0; 1187 return 0;
1165 1188
1166 uvd_v6_0_stop(adev); 1189 uvd_v6_0_stop(adev);
@@ -1172,9 +1195,9 @@ static int uvd_v6_0_soft_reset(void *handle)
1172 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1195 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1173 u32 srbm_soft_reset; 1196 u32 srbm_soft_reset;
1174 1197
1175 if (!adev->uvd.srbm_soft_reset) 1198 if (!adev->uvd.inst->srbm_soft_reset)
1176 return 0; 1199 return 0;
1177 srbm_soft_reset = adev->uvd.srbm_soft_reset; 1200 srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
1178 1201
1179 if (srbm_soft_reset) { 1202 if (srbm_soft_reset) {
1180 u32 tmp; 1203 u32 tmp;
@@ -1202,7 +1225,7 @@ static int uvd_v6_0_post_soft_reset(void *handle)
1202{ 1225{
1203 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1226 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1204 1227
1205 if (!adev->uvd.srbm_soft_reset) 1228 if (!adev->uvd.inst->srbm_soft_reset)
1206 return 0; 1229 return 0;
1207 1230
1208 mdelay(5); 1231 mdelay(5);
@@ -1228,17 +1251,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev,
1228 1251
1229 switch (entry->src_id) { 1252 switch (entry->src_id) {
1230 case 124: 1253 case 124:
1231 amdgpu_fence_process(&adev->uvd.ring); 1254 amdgpu_fence_process(&adev->uvd.inst->ring);
1232 break; 1255 break;
1233 case 119: 1256 case 119:
1234 if (likely(uvd_v6_0_enc_support(adev))) 1257 if (likely(uvd_v6_0_enc_support(adev)))
1235 amdgpu_fence_process(&adev->uvd.ring_enc[0]); 1258 amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
1236 else 1259 else
1237 int_handled = false; 1260 int_handled = false;
1238 break; 1261 break;
1239 case 120: 1262 case 120:
1240 if (likely(uvd_v6_0_enc_support(adev))) 1263 if (likely(uvd_v6_0_enc_support(adev)))
1241 amdgpu_fence_process(&adev->uvd.ring_enc[1]); 1264 amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
1242 else 1265 else
1243 int_handled = false; 1266 int_handled = false;
1244 break; 1267 break;
@@ -1521,22 +1544,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
1521static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { 1544static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
1522 .type = AMDGPU_RING_TYPE_UVD, 1545 .type = AMDGPU_RING_TYPE_UVD,
1523 .align_mask = 0xf, 1546 .align_mask = 0xf,
1524 .nop = PACKET0(mmUVD_NO_OP, 0),
1525 .support_64bit_ptrs = false, 1547 .support_64bit_ptrs = false,
1526 .get_rptr = uvd_v6_0_ring_get_rptr, 1548 .get_rptr = uvd_v6_0_ring_get_rptr,
1527 .get_wptr = uvd_v6_0_ring_get_wptr, 1549 .get_wptr = uvd_v6_0_ring_get_wptr,
1528 .set_wptr = uvd_v6_0_ring_set_wptr, 1550 .set_wptr = uvd_v6_0_ring_set_wptr,
1529 .parse_cs = amdgpu_uvd_ring_parse_cs, 1551 .parse_cs = amdgpu_uvd_ring_parse_cs,
1530 .emit_frame_size = 1552 .emit_frame_size =
1531 6 + 6 + /* hdp flush / invalidate */ 1553 6 + /* hdp invalidate */
1532 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 1554 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
1533 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ 1555 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
1534 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ 1556 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
1535 .emit_ib = uvd_v6_0_ring_emit_ib, 1557 .emit_ib = uvd_v6_0_ring_emit_ib,
1536 .emit_fence = uvd_v6_0_ring_emit_fence, 1558 .emit_fence = uvd_v6_0_ring_emit_fence,
1559 .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
1537 .test_ring = uvd_v6_0_ring_test_ring, 1560 .test_ring = uvd_v6_0_ring_test_ring,
1538 .test_ib = amdgpu_uvd_ring_test_ib, 1561 .test_ib = amdgpu_uvd_ring_test_ib,
1539 .insert_nop = amdgpu_ring_insert_nop, 1562 .insert_nop = uvd_v6_0_ring_insert_nop,
1540 .pad_ib = amdgpu_ring_generic_pad_ib, 1563 .pad_ib = amdgpu_ring_generic_pad_ib,
1541 .begin_use = amdgpu_uvd_ring_begin_use, 1564 .begin_use = amdgpu_uvd_ring_begin_use,
1542 .end_use = amdgpu_uvd_ring_end_use, 1565 .end_use = amdgpu_uvd_ring_end_use,
@@ -1552,7 +1575,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
1552 .get_wptr = uvd_v6_0_ring_get_wptr, 1575 .get_wptr = uvd_v6_0_ring_get_wptr,
1553 .set_wptr = uvd_v6_0_ring_set_wptr, 1576 .set_wptr = uvd_v6_0_ring_set_wptr,
1554 .emit_frame_size = 1577 .emit_frame_size =
1555 6 + 6 + /* hdp flush / invalidate */ 1578 6 + /* hdp invalidate */
1556 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 1579 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
1557 VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ 1580 VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
1558 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ 1581 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
@@ -1561,6 +1584,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
1561 .emit_fence = uvd_v6_0_ring_emit_fence, 1584 .emit_fence = uvd_v6_0_ring_emit_fence,
1562 .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, 1585 .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
1563 .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, 1586 .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
1587 .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
1564 .test_ring = uvd_v6_0_ring_test_ring, 1588 .test_ring = uvd_v6_0_ring_test_ring,
1565 .test_ib = amdgpu_uvd_ring_test_ib, 1589 .test_ib = amdgpu_uvd_ring_test_ib,
1566 .insert_nop = amdgpu_ring_insert_nop, 1590 .insert_nop = amdgpu_ring_insert_nop,
@@ -1600,10 +1624,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
1600static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) 1624static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
1601{ 1625{
1602 if (adev->asic_type >= CHIP_POLARIS10) { 1626 if (adev->asic_type >= CHIP_POLARIS10) {
1603 adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs; 1627 adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
1604 DRM_INFO("UVD is enabled in VM mode\n"); 1628 DRM_INFO("UVD is enabled in VM mode\n");
1605 } else { 1629 } else {
1606 adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs; 1630 adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
1607 DRM_INFO("UVD is enabled in physical mode\n"); 1631 DRM_INFO("UVD is enabled in physical mode\n");
1608 } 1632 }
1609} 1633}
@@ -1613,7 +1637,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev)
1613 int i; 1637 int i;
1614 1638
1615 for (i = 0; i < adev->uvd.num_enc_rings; ++i) 1639 for (i = 0; i < adev->uvd.num_enc_rings; ++i)
1616 adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; 1640 adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
1617 1641
1618 DRM_INFO("UVD ENC is enabled in VM mode\n"); 1642 DRM_INFO("UVD ENC is enabled in VM mode\n");
1619} 1643}
@@ -1626,11 +1650,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
1626static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev) 1650static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev)
1627{ 1651{
1628 if (uvd_v6_0_enc_support(adev)) 1652 if (uvd_v6_0_enc_support(adev))
1629 adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; 1653 adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
1630 else 1654 else
1631 adev->uvd.irq.num_types = 1; 1655 adev->uvd.inst->irq.num_types = 1;
1632 1656
1633 adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs; 1657 adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs;
1634} 1658}
1635 1659
1636const struct amdgpu_ip_block_version uvd_v6_0_ip_block = 1660const struct amdgpu_ip_block_version uvd_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index eddc57f3b72a..57d32f21b3a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -40,6 +40,8 @@
40#include "mmhub/mmhub_1_0_offset.h" 40#include "mmhub/mmhub_1_0_offset.h"
41#include "mmhub/mmhub_1_0_sh_mask.h" 41#include "mmhub/mmhub_1_0_sh_mask.h"
42 42
43#define UVD7_MAX_HW_INSTANCES_VEGA20 2
44
43static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); 45static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
44static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); 46static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
45static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); 47static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -47,6 +49,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev);
47static void uvd_v7_0_stop(struct amdgpu_device *adev); 49static void uvd_v7_0_stop(struct amdgpu_device *adev);
48static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); 50static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
49 51
52static int amdgpu_ih_clientid_uvds[] = {
53 SOC15_IH_CLIENTID_UVD,
54 SOC15_IH_CLIENTID_UVD1
55};
56
50/** 57/**
51 * uvd_v7_0_ring_get_rptr - get read pointer 58 * uvd_v7_0_ring_get_rptr - get read pointer
52 * 59 *
@@ -58,7 +65,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
58{ 65{
59 struct amdgpu_device *adev = ring->adev; 66 struct amdgpu_device *adev = ring->adev;
60 67
61 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); 68 return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
62} 69}
63 70
64/** 71/**
@@ -72,10 +79,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
72{ 79{
73 struct amdgpu_device *adev = ring->adev; 80 struct amdgpu_device *adev = ring->adev;
74 81
75 if (ring == &adev->uvd.ring_enc[0]) 82 if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
76 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); 83 return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
77 else 84 else
78 return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); 85 return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
79} 86}
80 87
81/** 88/**
@@ -89,7 +96,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
89{ 96{
90 struct amdgpu_device *adev = ring->adev; 97 struct amdgpu_device *adev = ring->adev;
91 98
92 return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); 99 return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
93} 100}
94 101
95/** 102/**
@@ -106,10 +113,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
106 if (ring->use_doorbell) 113 if (ring->use_doorbell)
107 return adev->wb.wb[ring->wptr_offs]; 114 return adev->wb.wb[ring->wptr_offs];
108 115
109 if (ring == &adev->uvd.ring_enc[0]) 116 if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
110 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); 117 return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
111 else 118 else
112 return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); 119 return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
113} 120}
114 121
115/** 122/**
@@ -123,7 +130,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
123{ 130{
124 struct amdgpu_device *adev = ring->adev; 131 struct amdgpu_device *adev = ring->adev;
125 132
126 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); 133 WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
127} 134}
128 135
129/** 136/**
@@ -144,11 +151,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
144 return; 151 return;
145 } 152 }
146 153
147 if (ring == &adev->uvd.ring_enc[0]) 154 if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
148 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, 155 WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
149 lower_32_bits(ring->wptr)); 156 lower_32_bits(ring->wptr));
150 else 157 else
151 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, 158 WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
152 lower_32_bits(ring->wptr)); 159 lower_32_bits(ring->wptr));
153} 160}
154 161
@@ -170,8 +177,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
170 177
171 r = amdgpu_ring_alloc(ring, 16); 178 r = amdgpu_ring_alloc(ring, 16);
172 if (r) { 179 if (r) {
173 DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", 180 DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
174 ring->idx, r); 181 ring->me, ring->idx, r);
175 return r; 182 return r;
176 } 183 }
177 amdgpu_ring_write(ring, HEVC_ENC_CMD_END); 184 amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
@@ -184,11 +191,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
184 } 191 }
185 192
186 if (i < adev->usec_timeout) { 193 if (i < adev->usec_timeout) {
187 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 194 DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
188 ring->idx, i); 195 ring->me, ring->idx, i);
189 } else { 196 } else {
190 DRM_ERROR("amdgpu: ring %d test failed\n", 197 DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
191 ring->idx); 198 ring->me, ring->idx);
192 r = -ETIMEDOUT; 199 r = -ETIMEDOUT;
193 } 200 }
194 201
@@ -342,24 +349,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
342 349
343 r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); 350 r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
344 if (r) { 351 if (r) {
345 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); 352 DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
346 goto error; 353 goto error;
347 } 354 }
348 355
349 r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); 356 r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence);
350 if (r) { 357 if (r) {
351 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); 358 DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
352 goto error; 359 goto error;
353 } 360 }
354 361
355 r = dma_fence_wait_timeout(fence, false, timeout); 362 r = dma_fence_wait_timeout(fence, false, timeout);
356 if (r == 0) { 363 if (r == 0) {
357 DRM_ERROR("amdgpu: IB test timed out.\n"); 364 DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
358 r = -ETIMEDOUT; 365 r = -ETIMEDOUT;
359 } else if (r < 0) { 366 } else if (r < 0) {
360 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 367 DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
361 } else { 368 } else {
362 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 369 DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
363 r = 0; 370 r = 0;
364 } 371 }
365error: 372error:
@@ -370,6 +377,10 @@ error:
370static int uvd_v7_0_early_init(void *handle) 377static int uvd_v7_0_early_init(void *handle)
371{ 378{
372 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 379 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
380 if (adev->asic_type == CHIP_VEGA20)
381 adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
382 else
383 adev->uvd.num_uvd_inst = 1;
373 384
374 if (amdgpu_sriov_vf(adev)) 385 if (amdgpu_sriov_vf(adev))
375 adev->uvd.num_enc_rings = 1; 386 adev->uvd.num_enc_rings = 1;
@@ -386,19 +397,21 @@ static int uvd_v7_0_sw_init(void *handle)
386{ 397{
387 struct amdgpu_ring *ring; 398 struct amdgpu_ring *ring;
388 struct drm_sched_rq *rq; 399 struct drm_sched_rq *rq;
389 int i, r; 400 int i, j, r;
390 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 401 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
391 402
392 /* UVD TRAP */ 403 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
393 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq); 404 /* UVD TRAP */
394 if (r) 405 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq);
395 return r;
396
397 /* UVD ENC TRAP */
398 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
399 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
400 if (r) 406 if (r)
401 return r; 407 return r;
408
409 /* UVD ENC TRAP */
410 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
411 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq);
412 if (r)
413 return r;
414 }
402 } 415 }
403 416
404 r = amdgpu_uvd_sw_init(adev); 417 r = amdgpu_uvd_sw_init(adev);
@@ -415,43 +428,48 @@ static int uvd_v7_0_sw_init(void *handle)
415 DRM_INFO("PSP loading UVD firmware\n"); 428 DRM_INFO("PSP loading UVD firmware\n");
416 } 429 }
417 430
418 ring = &adev->uvd.ring_enc[0]; 431 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
419 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; 432 ring = &adev->uvd.inst[j].ring_enc[0];
420 r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, 433 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
421 rq, amdgpu_sched_jobs, NULL); 434 r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
422 if (r) { 435 rq, NULL);
423 DRM_ERROR("Failed setting up UVD ENC run queue.\n"); 436 if (r) {
424 return r; 437 DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
438 return r;
439 }
425 } 440 }
426 441
427 r = amdgpu_uvd_resume(adev); 442 r = amdgpu_uvd_resume(adev);
428 if (r) 443 if (r)
429 return r; 444 return r;
430 if (!amdgpu_sriov_vf(adev)) {
431 ring = &adev->uvd.ring;
432 sprintf(ring->name, "uvd");
433 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
434 if (r)
435 return r;
436 }
437 445
438 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 446 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
439 ring = &adev->uvd.ring_enc[i]; 447 if (!amdgpu_sriov_vf(adev)) {
440 sprintf(ring->name, "uvd_enc%d", i); 448 ring = &adev->uvd.inst[j].ring;
441 if (amdgpu_sriov_vf(adev)) { 449 sprintf(ring->name, "uvd<%d>", j);
442 ring->use_doorbell = true; 450 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
443 451 if (r)
444 /* currently only use the first enconding ring for 452 return r;
445 * sriov, so set unused location for other unused rings. 453 }
446 */ 454
447 if (i == 0) 455 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
448 ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; 456 ring = &adev->uvd.inst[j].ring_enc[i];
449 else 457 sprintf(ring->name, "uvd_enc%d<%d>", i, j);
450 ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; 458 if (amdgpu_sriov_vf(adev)) {
459 ring->use_doorbell = true;
460
461 /* currently only use the first enconding ring for
462 * sriov, so set unused location for other unused rings.
463 */
464 if (i == 0)
465 ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
466 else
467 ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
468 }
469 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
470 if (r)
471 return r;
451 } 472 }
452 r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
453 if (r)
454 return r;
455 } 473 }
456 474
457 r = amdgpu_virt_alloc_mm_table(adev); 475 r = amdgpu_virt_alloc_mm_table(adev);
@@ -463,7 +481,7 @@ static int uvd_v7_0_sw_init(void *handle)
463 481
464static int uvd_v7_0_sw_fini(void *handle) 482static int uvd_v7_0_sw_fini(void *handle)
465{ 483{
466 int i, r; 484 int i, j, r;
467 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 485 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
468 486
469 amdgpu_virt_free_mm_table(adev); 487 amdgpu_virt_free_mm_table(adev);
@@ -472,11 +490,12 @@ static int uvd_v7_0_sw_fini(void *handle)
472 if (r) 490 if (r)
473 return r; 491 return r;
474 492
475 drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); 493 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
476 494 drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
477 for (i = 0; i < adev->uvd.num_enc_rings; ++i)
478 amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
479 495
496 for (i = 0; i < adev->uvd.num_enc_rings; ++i)
497 amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
498 }
480 return amdgpu_uvd_sw_fini(adev); 499 return amdgpu_uvd_sw_fini(adev);
481} 500}
482 501
@@ -490,9 +509,9 @@ static int uvd_v7_0_sw_fini(void *handle)
490static int uvd_v7_0_hw_init(void *handle) 509static int uvd_v7_0_hw_init(void *handle)
491{ 510{
492 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 511 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
493 struct amdgpu_ring *ring = &adev->uvd.ring; 512 struct amdgpu_ring *ring;
494 uint32_t tmp; 513 uint32_t tmp;
495 int i, r; 514 int i, j, r;
496 515
497 if (amdgpu_sriov_vf(adev)) 516 if (amdgpu_sriov_vf(adev))
498 r = uvd_v7_0_sriov_start(adev); 517 r = uvd_v7_0_sriov_start(adev);
@@ -501,57 +520,60 @@ static int uvd_v7_0_hw_init(void *handle)
501 if (r) 520 if (r)
502 goto done; 521 goto done;
503 522
504 if (!amdgpu_sriov_vf(adev)) { 523 for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
505 ring->ready = true; 524 ring = &adev->uvd.inst[j].ring;
506 r = amdgpu_ring_test_ring(ring); 525
507 if (r) { 526 if (!amdgpu_sriov_vf(adev)) {
508 ring->ready = false; 527 ring->ready = true;
509 goto done; 528 r = amdgpu_ring_test_ring(ring);
529 if (r) {
530 ring->ready = false;
531 goto done;
532 }
533
534 r = amdgpu_ring_alloc(ring, 10);
535 if (r) {
536 DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
537 goto done;
538 }
539
540 tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
541 mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
542 amdgpu_ring_write(ring, tmp);
543 amdgpu_ring_write(ring, 0xFFFFF);
544
545 tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
546 mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
547 amdgpu_ring_write(ring, tmp);
548 amdgpu_ring_write(ring, 0xFFFFF);
549
550 tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
551 mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
552 amdgpu_ring_write(ring, tmp);
553 amdgpu_ring_write(ring, 0xFFFFF);
554
555 /* Clear timeout status bits */
556 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
557 mmUVD_SEMA_TIMEOUT_STATUS), 0));
558 amdgpu_ring_write(ring, 0x8);
559
560 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
561 mmUVD_SEMA_CNTL), 0));
562 amdgpu_ring_write(ring, 3);
563
564 amdgpu_ring_commit(ring);
510 } 565 }
511 566
512 r = amdgpu_ring_alloc(ring, 10); 567 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
513 if (r) { 568 ring = &adev->uvd.inst[j].ring_enc[i];
514 DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); 569 ring->ready = true;
515 goto done; 570 r = amdgpu_ring_test_ring(ring);
571 if (r) {
572 ring->ready = false;
573 goto done;
574 }
516 } 575 }
517
518 tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
519 mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
520 amdgpu_ring_write(ring, tmp);
521 amdgpu_ring_write(ring, 0xFFFFF);
522
523 tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
524 mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
525 amdgpu_ring_write(ring, tmp);
526 amdgpu_ring_write(ring, 0xFFFFF);
527
528 tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
529 mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
530 amdgpu_ring_write(ring, tmp);
531 amdgpu_ring_write(ring, 0xFFFFF);
532
533 /* Clear timeout status bits */
534 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
535 mmUVD_SEMA_TIMEOUT_STATUS), 0));
536 amdgpu_ring_write(ring, 0x8);
537
538 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
539 mmUVD_SEMA_CNTL), 0));
540 amdgpu_ring_write(ring, 3);
541
542 amdgpu_ring_commit(ring);
543 } 576 }
544
545 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
546 ring = &adev->uvd.ring_enc[i];
547 ring->ready = true;
548 r = amdgpu_ring_test_ring(ring);
549 if (r) {
550 ring->ready = false;
551 goto done;
552 }
553 }
554
555done: 577done:
556 if (!r) 578 if (!r)
557 DRM_INFO("UVD and UVD ENC initialized successfully.\n"); 579 DRM_INFO("UVD and UVD ENC initialized successfully.\n");
@@ -569,7 +591,7 @@ done:
569static int uvd_v7_0_hw_fini(void *handle) 591static int uvd_v7_0_hw_fini(void *handle)
570{ 592{
571 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 593 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
572 struct amdgpu_ring *ring = &adev->uvd.ring; 594 int i;
573 595
574 if (!amdgpu_sriov_vf(adev)) 596 if (!amdgpu_sriov_vf(adev))
575 uvd_v7_0_stop(adev); 597 uvd_v7_0_stop(adev);
@@ -578,7 +600,8 @@ static int uvd_v7_0_hw_fini(void *handle)
578 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); 600 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
579 } 601 }
580 602
581 ring->ready = false; 603 for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
604 adev->uvd.inst[i].ring.ready = false;
582 605
583 return 0; 606 return 0;
584} 607}
@@ -618,48 +641,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
618{ 641{
619 uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); 642 uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
620 uint32_t offset; 643 uint32_t offset;
644 int i;
621 645
622 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 646 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
623 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 647 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
624 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 648 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
625 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 649 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
626 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 650 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
627 offset = 0; 651 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
628 } else { 652 offset = 0;
629 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, 653 } else {
630 lower_32_bits(adev->uvd.gpu_addr)); 654 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
631 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, 655 lower_32_bits(adev->uvd.inst[i].gpu_addr));
632 upper_32_bits(adev->uvd.gpu_addr)); 656 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
633 offset = size; 657 upper_32_bits(adev->uvd.inst[i].gpu_addr));
634 } 658 offset = size;
659 }
635 660
636 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 661 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
637 AMDGPU_UVD_FIRMWARE_OFFSET >> 3); 662 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
638 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); 663 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
639 664
640 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, 665 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
641 lower_32_bits(adev->uvd.gpu_addr + offset)); 666 lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
642 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, 667 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
643 upper_32_bits(adev->uvd.gpu_addr + offset)); 668 upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
644 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); 669 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
645 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); 670 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
646 671
647 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, 672 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
648 lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 673 lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
649 WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, 674 WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
650 upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); 675 upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
651 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); 676 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
652 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, 677 WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
653 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); 678 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
654 679
655 WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, 680 WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
656 adev->gfx.config.gb_addr_config); 681 adev->gfx.config.gb_addr_config);
657 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, 682 WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
658 adev->gfx.config.gb_addr_config); 683 adev->gfx.config.gb_addr_config);
659 WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, 684 WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
660 adev->gfx.config.gb_addr_config); 685 adev->gfx.config.gb_addr_config);
661 686
662 WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); 687 WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
688 }
663} 689}
664 690
665static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, 691static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -669,6 +695,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
669 uint64_t addr = table->gpu_addr; 695 uint64_t addr = table->gpu_addr;
670 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; 696 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
671 uint32_t size; 697 uint32_t size;
698 int i;
672 699
673 size = header->header_size + header->vce_table_size + header->uvd_table_size; 700 size = header->header_size + header->vce_table_size + header->uvd_table_size;
674 701
@@ -688,11 +715,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
688 /* 4, set resp to zero */ 715 /* 4, set resp to zero */
689 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); 716 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
690 717
691 WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); 718 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
692 adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; 719 WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
693 adev->uvd.ring_enc[0].wptr = 0; 720 adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
694 adev->uvd.ring_enc[0].wptr_old = 0; 721 adev->uvd.inst[i].ring_enc[0].wptr = 0;
695 722 adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
723 }
696 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ 724 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
697 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); 725 WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
698 726
@@ -725,6 +753,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
725 struct mmsch_v1_0_cmd_end end = { {0} }; 753 struct mmsch_v1_0_cmd_end end = { {0} };
726 uint32_t *init_table = adev->virt.mm_table.cpu_addr; 754 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
727 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; 755 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
756 uint8_t i = 0;
728 757
729 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; 758 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
730 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; 759 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
@@ -742,120 +771,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
742 771
743 init_table += header->uvd_table_offset; 772 init_table += header->uvd_table_offset;
744 773
745 ring = &adev->uvd.ring; 774 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
746 ring->wptr = 0; 775 ring = &adev->uvd.inst[i].ring;
747 size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); 776 ring->wptr = 0;
748 777 size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
749 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 778
750 0xFFFFFFFF, 0x00000004); 779 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
751 /* mc resume*/ 780 0xFFFFFFFF, 0x00000004);
752 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 781 /* mc resume*/
753 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 782 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
754 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 783 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
755 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 784 lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
756 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); 785 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
757 offset = 0; 786 upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
758 } else { 787 offset = 0;
759 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 788 } else {
760 lower_32_bits(adev->uvd.gpu_addr)); 789 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
761 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 790 lower_32_bits(adev->uvd.inst[i].gpu_addr));
762 upper_32_bits(adev->uvd.gpu_addr)); 791 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
763 offset = size; 792 upper_32_bits(adev->uvd.inst[i].gpu_addr));
793 offset = size;
794 }
795
796 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
797 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
798 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
799
800 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
801 lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
802 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
803 upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
804 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
805 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
806
807 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
808 lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
809 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
810 upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
811 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
812 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
813 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
814
815 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
816 /* mc resume end*/
817
818 /* disable clock gating */
819 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
820 ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
821
822 /* disable interupt */
823 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
824 ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
825
826 /* stall UMC and register bus before resetting VCPU */
827 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
828 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
829 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
830
831 /* put LMI, VCPU, RBC etc... into reset */
832 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
833 (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
834 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
835 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
836 UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
837 UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
838 UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
839 UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
840 UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
841
842 /* initialize UVD memory controller */
843 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
844 (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
845 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
846 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
847 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
848 UVD_LMI_CTRL__REQ_MODE_MASK |
849 0x00100000L));
850
851 /* take all subblocks out of reset, except VCPU */
852 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
853 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
854
855 /* enable VCPU clock */
856 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
857 UVD_VCPU_CNTL__CLK_EN_MASK);
858
859 /* enable master interrupt */
860 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
861 ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
862 (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
863
864 /* clear the bit 4 of UVD_STATUS */
865 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
866 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
867
868 /* force RBC into idle state */
869 size = order_base_2(ring->ring_size);
870 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
871 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
872 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
873
874 ring = &adev->uvd.inst[i].ring_enc[0];
875 ring->wptr = 0;
876 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
877 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
878 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
879
880 /* boot up the VCPU */
881 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
882
883 /* enable UMC */
884 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
885 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
886
887 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
764 } 888 }
765
766 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
767 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
768 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
769
770 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
771 lower_32_bits(adev->uvd.gpu_addr + offset));
772 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
773 upper_32_bits(adev->uvd.gpu_addr + offset));
774 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
775 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
776
777 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
778 lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
779 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
780 upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
781 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
782 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
783 AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
784
785 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
786 /* mc resume end*/
787
788 /* disable clock gating */
789 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
790 ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
791
792 /* disable interupt */
793 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
794 ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
795
796 /* stall UMC and register bus before resetting VCPU */
797 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
798 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
799 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
800
801 /* put LMI, VCPU, RBC etc... into reset */
802 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
803 (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
804 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
805 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
806 UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
807 UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
808 UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
809 UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
810 UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
811
812 /* initialize UVD memory controller */
813 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
814 (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
815 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
816 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
817 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
818 UVD_LMI_CTRL__REQ_MODE_MASK |
819 0x00100000L));
820
821 /* take all subblocks out of reset, except VCPU */
822 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
823 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
824
825 /* enable VCPU clock */
826 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
827 UVD_VCPU_CNTL__CLK_EN_MASK);
828
829 /* enable master interrupt */
830 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
831 ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
832 (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
833
834 /* clear the bit 4 of UVD_STATUS */
835 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
836 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
837
838 /* force RBC into idle state */
839 size = order_base_2(ring->ring_size);
840 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
841 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
842 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
843
844 ring = &adev->uvd.ring_enc[0];
845 ring->wptr = 0;
846 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
847 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
848 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
849
850 /* boot up the VCPU */
851 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
852
853 /* enable UMC */
854 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
855 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
856
857 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
858
859 /* add end packet */ 889 /* add end packet */
860 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); 890 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
861 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; 891 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -874,15 +904,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
874 */ 904 */
875static int uvd_v7_0_start(struct amdgpu_device *adev) 905static int uvd_v7_0_start(struct amdgpu_device *adev)
876{ 906{
877 struct amdgpu_ring *ring = &adev->uvd.ring; 907 struct amdgpu_ring *ring;
878 uint32_t rb_bufsz, tmp; 908 uint32_t rb_bufsz, tmp;
879 uint32_t lmi_swap_cntl; 909 uint32_t lmi_swap_cntl;
880 uint32_t mp_swap_cntl; 910 uint32_t mp_swap_cntl;
881 int i, j, r; 911 int i, j, k, r;
882 912
883 /* disable DPG */ 913 for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
884 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, 914 /* disable DPG */
885 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); 915 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
916 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
917 }
886 918
887 /* disable byte swapping */ 919 /* disable byte swapping */
888 lmi_swap_cntl = 0; 920 lmi_swap_cntl = 0;
@@ -890,157 +922,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
890 922
891 uvd_v7_0_mc_resume(adev); 923 uvd_v7_0_mc_resume(adev);
892 924
893 /* disable clock gating */ 925 for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
894 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, 926 ring = &adev->uvd.inst[k].ring;
895 ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); 927 /* disable clock gating */
896 928 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
897 /* disable interupt */ 929 ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
898 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
899 ~UVD_MASTINT_EN__VCPU_EN_MASK);
900
901 /* stall UMC and register bus before resetting VCPU */
902 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
903 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
904 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
905 mdelay(1);
906
907 /* put LMI, VCPU, RBC etc... into reset */
908 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
909 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
910 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
911 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
912 UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
913 UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
914 UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
915 UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
916 UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
917 mdelay(5);
918 930
919 /* initialize UVD memory controller */ 931 /* disable interupt */
920 WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, 932 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
921 (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | 933 ~UVD_MASTINT_EN__VCPU_EN_MASK);
922 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | 934
923 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | 935 /* stall UMC and register bus before resetting VCPU */
924 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | 936 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
925 UVD_LMI_CTRL__REQ_MODE_MASK | 937 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
926 0x00100000L); 938 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
939 mdelay(1);
940
941 /* put LMI, VCPU, RBC etc... into reset */
942 WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
943 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
944 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
945 UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
946 UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
947 UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
948 UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
949 UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
950 UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
951 mdelay(5);
952
953 /* initialize UVD memory controller */
954 WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
955 (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
956 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
957 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
958 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
959 UVD_LMI_CTRL__REQ_MODE_MASK |
960 0x00100000L);
927 961
928#ifdef __BIG_ENDIAN 962#ifdef __BIG_ENDIAN
929 /* swap (8 in 32) RB and IB */ 963 /* swap (8 in 32) RB and IB */
930 lmi_swap_cntl = 0xa; 964 lmi_swap_cntl = 0xa;
931 mp_swap_cntl = 0; 965 mp_swap_cntl = 0;
932#endif 966#endif
933 WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); 967 WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
934 WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); 968 WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
935
936 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
937 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
938 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
939 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
940 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
941 WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
942
943 /* take all subblocks out of reset, except VCPU */
944 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
945 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
946 mdelay(5);
947 969
948 /* enable VCPU clock */ 970 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
949 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 971 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
950 UVD_VCPU_CNTL__CLK_EN_MASK); 972 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
973 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
974 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
975 WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
951 976
952 /* enable UMC */ 977 /* take all subblocks out of reset, except VCPU */
953 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, 978 WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
954 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 979 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
980 mdelay(5);
955 981
956 /* boot up the VCPU */ 982 /* enable VCPU clock */
957 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); 983 WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
958 mdelay(10); 984 UVD_VCPU_CNTL__CLK_EN_MASK);
959 985
960 for (i = 0; i < 10; ++i) { 986 /* enable UMC */
961 uint32_t status; 987 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
988 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
962 989
963 for (j = 0; j < 100; ++j) { 990 /* boot up the VCPU */
964 status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); 991 WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
992 mdelay(10);
993
994 for (i = 0; i < 10; ++i) {
995 uint32_t status;
996
997 for (j = 0; j < 100; ++j) {
998 status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
999 if (status & 2)
1000 break;
1001 mdelay(10);
1002 }
1003 r = 0;
965 if (status & 2) 1004 if (status & 2)
966 break; 1005 break;
1006
1007 DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
1008 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
1009 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
1010 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
967 mdelay(10); 1011 mdelay(10);
1012 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
1013 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1014 mdelay(10);
1015 r = -1;
968 } 1016 }
969 r = 0;
970 if (status & 2)
971 break;
972
973 DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
974 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
975 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
976 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
977 mdelay(10);
978 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
979 ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
980 mdelay(10);
981 r = -1;
982 }
983
984 if (r) {
985 DRM_ERROR("UVD not responding, giving up!!!\n");
986 return r;
987 }
988 /* enable master interrupt */
989 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
990 (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
991 ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
992
993 /* clear the bit 4 of UVD_STATUS */
994 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
995 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
996
997 /* force RBC into idle state */
998 rb_bufsz = order_base_2(ring->ring_size);
999 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1000 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1001 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1002 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
1003 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1004 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1005 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
1006
1007 /* set the write pointer delay */
1008 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
1009
1010 /* set the wb address */
1011 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
1012 (upper_32_bits(ring->gpu_addr) >> 2));
1013
1014 /* programm the RB_BASE for ring buffer */
1015 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1016 lower_32_bits(ring->gpu_addr));
1017 WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1018 upper_32_bits(ring->gpu_addr));
1019
1020 /* Initialize the ring buffer's read and write pointers */
1021 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
1022
1023 ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
1024 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1025 lower_32_bits(ring->wptr));
1026 1017
1027 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, 1018 if (r) {
1028 ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); 1019 DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
1029 1020 return r;
1030 ring = &adev->uvd.ring_enc[0]; 1021 }
1031 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); 1022 /* enable master interrupt */
1032 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); 1023 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
1033 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); 1024 (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
1034 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); 1025 ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
1035 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
1036 1026
1037 ring = &adev->uvd.ring_enc[1]; 1027 /* clear the bit 4 of UVD_STATUS */
1038 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); 1028 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
1039 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); 1029 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1040 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1041 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1042 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
1043 1030
1031 /* force RBC into idle state */
1032 rb_bufsz = order_base_2(ring->ring_size);
1033 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1034 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1035 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1036 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
1037 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1038 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1039 WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
1040
1041 /* set the write pointer delay */
1042 WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
1043
1044 /* set the wb address */
1045 WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
1046 (upper_32_bits(ring->gpu_addr) >> 2));
1047
1048 /* programm the RB_BASE for ring buffer */
1049 WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1050 lower_32_bits(ring->gpu_addr));
1051 WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1052 upper_32_bits(ring->gpu_addr));
1053
1054 /* Initialize the ring buffer's read and write pointers */
1055 WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
1056
1057 ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
1058 WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
1059 lower_32_bits(ring->wptr));
1060
1061 WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
1062 ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
1063
1064 ring = &adev->uvd.inst[k].ring_enc[0];
1065 WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1066 WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1067 WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
1068 WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1069 WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
1070
1071 ring = &adev->uvd.inst[k].ring_enc[1];
1072 WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1073 WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1074 WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1075 WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1076 WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
1077 }
1044 return 0; 1078 return 0;
1045} 1079}
1046 1080
@@ -1053,26 +1087,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
1053 */ 1087 */
1054static void uvd_v7_0_stop(struct amdgpu_device *adev) 1088static void uvd_v7_0_stop(struct amdgpu_device *adev)
1055{ 1089{
1056 /* force RBC into idle state */ 1090 uint8_t i = 0;
1057 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
1058
1059 /* Stall UMC and register bus before resetting VCPU */
1060 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
1061 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
1062 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1063 mdelay(1);
1064
1065 /* put VCPU into reset */
1066 WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
1067 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1068 mdelay(5);
1069 1091
1070 /* disable VCPU clock */ 1092 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
1071 WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); 1093 /* force RBC into idle state */
1094 WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
1072 1095
1073 /* Unstall UMC and register bus */ 1096 /* Stall UMC and register bus before resetting VCPU */
1074 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, 1097 WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
1075 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 1098 UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
1099 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1100 mdelay(1);
1101
1102 /* put VCPU into reset */
1103 WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
1104 UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
1105 mdelay(5);
1106
1107 /* disable VCPU clock */
1108 WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
1109
1110 /* Unstall UMC and register bus */
1111 WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
1112 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1113 }
1076} 1114}
1077 1115
1078/** 1116/**
@@ -1091,26 +1129,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
1091 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 1129 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1092 1130
1093 amdgpu_ring_write(ring, 1131 amdgpu_ring_write(ring,
1094 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); 1132 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
1095 amdgpu_ring_write(ring, seq); 1133 amdgpu_ring_write(ring, seq);
1096 amdgpu_ring_write(ring, 1134 amdgpu_ring_write(ring,
1097 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1135 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1098 amdgpu_ring_write(ring, addr & 0xffffffff); 1136 amdgpu_ring_write(ring, addr & 0xffffffff);
1099 amdgpu_ring_write(ring, 1137 amdgpu_ring_write(ring,
1100 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1138 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1101 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); 1139 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
1102 amdgpu_ring_write(ring, 1140 amdgpu_ring_write(ring,
1103 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); 1141 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1104 amdgpu_ring_write(ring, 0); 1142 amdgpu_ring_write(ring, 0);
1105 1143
1106 amdgpu_ring_write(ring, 1144 amdgpu_ring_write(ring,
1107 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1145 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1108 amdgpu_ring_write(ring, 0); 1146 amdgpu_ring_write(ring, 0);
1109 amdgpu_ring_write(ring, 1147 amdgpu_ring_write(ring,
1110 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1148 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1111 amdgpu_ring_write(ring, 0); 1149 amdgpu_ring_write(ring, 0);
1112 amdgpu_ring_write(ring, 1150 amdgpu_ring_write(ring,
1113 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); 1151 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1114 amdgpu_ring_write(ring, 2); 1152 amdgpu_ring_write(ring, 2);
1115} 1153}
1116 1154
@@ -1136,6 +1174,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1136} 1174}
1137 1175
1138/** 1176/**
1177 * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
1178 *
1179 * @ring: amdgpu_ring pointer
1180 */
1181static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1182{
1183 /* The firmware doesn't seem to like touching registers at this point. */
1184}
1185
1186/**
1139 * uvd_v7_0_ring_test_ring - register write test 1187 * uvd_v7_0_ring_test_ring - register write test
1140 * 1188 *
1141 * @ring: amdgpu_ring pointer 1189 * @ring: amdgpu_ring pointer
@@ -1149,30 +1197,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1149 unsigned i; 1197 unsigned i;
1150 int r; 1198 int r;
1151 1199
1152 WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); 1200 WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
1153 r = amdgpu_ring_alloc(ring, 3); 1201 r = amdgpu_ring_alloc(ring, 3);
1154 if (r) { 1202 if (r) {
1155 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 1203 DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
1156 ring->idx, r); 1204 ring->me, ring->idx, r);
1157 return r; 1205 return r;
1158 } 1206 }
1159 amdgpu_ring_write(ring, 1207 amdgpu_ring_write(ring,
1160 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); 1208 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
1161 amdgpu_ring_write(ring, 0xDEADBEEF); 1209 amdgpu_ring_write(ring, 0xDEADBEEF);
1162 amdgpu_ring_commit(ring); 1210 amdgpu_ring_commit(ring);
1163 for (i = 0; i < adev->usec_timeout; i++) { 1211 for (i = 0; i < adev->usec_timeout; i++) {
1164 tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); 1212 tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
1165 if (tmp == 0xDEADBEEF) 1213 if (tmp == 0xDEADBEEF)
1166 break; 1214 break;
1167 DRM_UDELAY(1); 1215 DRM_UDELAY(1);
1168 } 1216 }
1169 1217
1170 if (i < adev->usec_timeout) { 1218 if (i < adev->usec_timeout) {
1171 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 1219 DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
1172 ring->idx, i); 1220 ring->me, ring->idx, i);
1173 } else { 1221 } else {
1174 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", 1222 DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
1175 ring->idx, tmp); 1223 ring->me, ring->idx, tmp);
1176 r = -EINVAL; 1224 r = -EINVAL;
1177 } 1225 }
1178 return r; 1226 return r;
@@ -1193,17 +1241,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
1193 struct amdgpu_device *adev = ring->adev; 1241 struct amdgpu_device *adev = ring->adev;
1194 1242
1195 amdgpu_ring_write(ring, 1243 amdgpu_ring_write(ring,
1196 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); 1244 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
1197 amdgpu_ring_write(ring, vmid); 1245 amdgpu_ring_write(ring, vmid);
1198 1246
1199 amdgpu_ring_write(ring, 1247 amdgpu_ring_write(ring,
1200 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); 1248 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
1201 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1249 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1202 amdgpu_ring_write(ring, 1250 amdgpu_ring_write(ring,
1203 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); 1251 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
1204 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 1252 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1205 amdgpu_ring_write(ring, 1253 amdgpu_ring_write(ring,
1206 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); 1254 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
1207 amdgpu_ring_write(ring, ib->length_dw); 1255 amdgpu_ring_write(ring, ib->length_dw);
1208} 1256}
1209 1257
@@ -1231,13 +1279,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
1231 struct amdgpu_device *adev = ring->adev; 1279 struct amdgpu_device *adev = ring->adev;
1232 1280
1233 amdgpu_ring_write(ring, 1281 amdgpu_ring_write(ring,
1234 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1282 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1235 amdgpu_ring_write(ring, reg << 2); 1283 amdgpu_ring_write(ring, reg << 2);
1236 amdgpu_ring_write(ring, 1284 amdgpu_ring_write(ring,
1237 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1285 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1238 amdgpu_ring_write(ring, val); 1286 amdgpu_ring_write(ring, val);
1239 amdgpu_ring_write(ring, 1287 amdgpu_ring_write(ring,
1240 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); 1288 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1241 amdgpu_ring_write(ring, 8); 1289 amdgpu_ring_write(ring, 8);
1242} 1290}
1243 1291
@@ -1247,16 +1295,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1247 struct amdgpu_device *adev = ring->adev; 1295 struct amdgpu_device *adev = ring->adev;
1248 1296
1249 amdgpu_ring_write(ring, 1297 amdgpu_ring_write(ring,
1250 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1298 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
1251 amdgpu_ring_write(ring, reg << 2); 1299 amdgpu_ring_write(ring, reg << 2);
1252 amdgpu_ring_write(ring, 1300 amdgpu_ring_write(ring,
1253 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1301 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
1254 amdgpu_ring_write(ring, val); 1302 amdgpu_ring_write(ring, val);
1255 amdgpu_ring_write(ring, 1303 amdgpu_ring_write(ring,
1256 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); 1304 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
1257 amdgpu_ring_write(ring, mask); 1305 amdgpu_ring_write(ring, mask);
1258 amdgpu_ring_write(ring, 1306 amdgpu_ring_write(ring,
1259 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); 1307 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
1260 amdgpu_ring_write(ring, 12); 1308 amdgpu_ring_write(ring, 12);
1261} 1309}
1262 1310
@@ -1277,12 +1325,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1277 1325
1278static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 1326static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
1279{ 1327{
1280 int i;
1281 struct amdgpu_device *adev = ring->adev; 1328 struct amdgpu_device *adev = ring->adev;
1329 int i;
1282 1330
1283 for (i = 0; i < count; i++) 1331 WARN_ON(ring->wptr % 2 || count % 2);
1284 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
1285 1332
1333 for (i = 0; i < count / 2; i++) {
1334 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
1335 amdgpu_ring_write(ring, 0);
1336 }
1286} 1337}
1287 1338
1288static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) 1339static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1349,16 +1400,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle)
1349 1400
1350 if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || 1401 if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
1351 REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || 1402 REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
1352 (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & 1403 (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
1353 AMDGPU_UVD_STATUS_BUSY_MASK)) 1404 AMDGPU_UVD_STATUS_BUSY_MASK))
1354 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 1405 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
1355 SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); 1406 SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
1356 1407
1357 if (srbm_soft_reset) { 1408 if (srbm_soft_reset) {
1358 adev->uvd.srbm_soft_reset = srbm_soft_reset; 1409 adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
1359 return true; 1410 return true;
1360 } else { 1411 } else {
1361 adev->uvd.srbm_soft_reset = 0; 1412 adev->uvd.inst[ring->me].srbm_soft_reset = 0;
1362 return false; 1413 return false;
1363 } 1414 }
1364} 1415}
@@ -1367,7 +1418,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle)
1367{ 1418{
1368 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1419 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1369 1420
1370 if (!adev->uvd.srbm_soft_reset) 1421 if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1371 return 0; 1422 return 0;
1372 1423
1373 uvd_v7_0_stop(adev); 1424 uvd_v7_0_stop(adev);
@@ -1379,9 +1430,9 @@ static int uvd_v7_0_soft_reset(void *handle)
1379 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1430 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1380 u32 srbm_soft_reset; 1431 u32 srbm_soft_reset;
1381 1432
1382 if (!adev->uvd.srbm_soft_reset) 1433 if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1383 return 0; 1434 return 0;
1384 srbm_soft_reset = adev->uvd.srbm_soft_reset; 1435 srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
1385 1436
1386 if (srbm_soft_reset) { 1437 if (srbm_soft_reset) {
1387 u32 tmp; 1438 u32 tmp;
@@ -1409,7 +1460,7 @@ static int uvd_v7_0_post_soft_reset(void *handle)
1409{ 1460{
1410 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1461 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1411 1462
1412 if (!adev->uvd.srbm_soft_reset) 1463 if (!adev->uvd.inst[ring->me].srbm_soft_reset)
1413 return 0; 1464 return 0;
1414 1465
1415 mdelay(5); 1466 mdelay(5);
@@ -1431,17 +1482,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
1431 struct amdgpu_irq_src *source, 1482 struct amdgpu_irq_src *source,
1432 struct amdgpu_iv_entry *entry) 1483 struct amdgpu_iv_entry *entry)
1433{ 1484{
1485 uint32_t ip_instance;
1486
1487 switch (entry->client_id) {
1488 case SOC15_IH_CLIENTID_UVD:
1489 ip_instance = 0;
1490 break;
1491 case SOC15_IH_CLIENTID_UVD1:
1492 ip_instance = 1;
1493 break;
1494 default:
1495 DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1496 return 0;
1497 }
1498
1434 DRM_DEBUG("IH: UVD TRAP\n"); 1499 DRM_DEBUG("IH: UVD TRAP\n");
1500
1435 switch (entry->src_id) { 1501 switch (entry->src_id) {
1436 case 124: 1502 case 124:
1437 amdgpu_fence_process(&adev->uvd.ring); 1503 amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
1438 break; 1504 break;
1439 case 119: 1505 case 119:
1440 amdgpu_fence_process(&adev->uvd.ring_enc[0]); 1506 amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
1441 break; 1507 break;
1442 case 120: 1508 case 120:
1443 if (!amdgpu_sriov_vf(adev)) 1509 if (!amdgpu_sriov_vf(adev))
1444 amdgpu_fence_process(&adev->uvd.ring_enc[1]); 1510 amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
1445 break; 1511 break;
1446 default: 1512 default:
1447 DRM_ERROR("Unhandled interrupt: %d %d\n", 1513 DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1457,9 +1523,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
1457{ 1523{
1458 uint32_t data, data1, data2, suvd_flags; 1524 uint32_t data, data1, data2, suvd_flags;
1459 1525
1460 data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); 1526 data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
1461 data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); 1527 data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
1462 data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); 1528 data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
1463 1529
1464 data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | 1530 data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
1465 UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); 1531 UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1503,18 +1569,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
1503 UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); 1569 UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
1504 data1 |= suvd_flags; 1570 data1 |= suvd_flags;
1505 1571
1506 WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); 1572 WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
1507 WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); 1573 WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
1508 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); 1574 WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
1509 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); 1575 WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
1510} 1576}
1511 1577
1512static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) 1578static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
1513{ 1579{
1514 uint32_t data, data1, cgc_flags, suvd_flags; 1580 uint32_t data, data1, cgc_flags, suvd_flags;
1515 1581
1516 data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); 1582 data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
1517 data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); 1583 data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
1518 1584
1519 cgc_flags = UVD_CGC_GATE__SYS_MASK | 1585 cgc_flags = UVD_CGC_GATE__SYS_MASK |
1520 UVD_CGC_GATE__UDEC_MASK | 1586 UVD_CGC_GATE__UDEC_MASK |
@@ -1546,8 +1612,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
1546 data |= cgc_flags; 1612 data |= cgc_flags;
1547 data1 |= suvd_flags; 1613 data1 |= suvd_flags;
1548 1614
1549 WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); 1615 WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
1550 WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); 1616 WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
1551} 1617}
1552 1618
1553static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) 1619static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1606,7 +1672,7 @@ static int uvd_v7_0_set_powergating_state(void *handle,
1606 if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) 1672 if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
1607 return 0; 1673 return 0;
1608 1674
1609 WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); 1675 WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
1610 1676
1611 if (state == AMD_PG_STATE_GATE) { 1677 if (state == AMD_PG_STATE_GATE) {
1612 uvd_v7_0_stop(adev); 1678 uvd_v7_0_stop(adev);
@@ -1647,14 +1713,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
1647static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { 1713static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1648 .type = AMDGPU_RING_TYPE_UVD, 1714 .type = AMDGPU_RING_TYPE_UVD,
1649 .align_mask = 0xf, 1715 .align_mask = 0xf,
1650 .nop = PACKET0(0x81ff, 0),
1651 .support_64bit_ptrs = false, 1716 .support_64bit_ptrs = false,
1652 .vmhub = AMDGPU_MMHUB, 1717 .vmhub = AMDGPU_MMHUB,
1653 .get_rptr = uvd_v7_0_ring_get_rptr, 1718 .get_rptr = uvd_v7_0_ring_get_rptr,
1654 .get_wptr = uvd_v7_0_ring_get_wptr, 1719 .get_wptr = uvd_v7_0_ring_get_wptr,
1655 .set_wptr = uvd_v7_0_ring_set_wptr, 1720 .set_wptr = uvd_v7_0_ring_set_wptr,
1656 .emit_frame_size = 1721 .emit_frame_size =
1657 6 + 6 + /* hdp flush / invalidate */ 1722 6 + /* hdp invalidate */
1658 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 1723 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1659 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 1724 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1660 8 + /* uvd_v7_0_ring_emit_vm_flush */ 1725 8 + /* uvd_v7_0_ring_emit_vm_flush */
@@ -1663,6 +1728,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1663 .emit_ib = uvd_v7_0_ring_emit_ib, 1728 .emit_ib = uvd_v7_0_ring_emit_ib,
1664 .emit_fence = uvd_v7_0_ring_emit_fence, 1729 .emit_fence = uvd_v7_0_ring_emit_fence,
1665 .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, 1730 .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
1731 .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
1666 .test_ring = uvd_v7_0_ring_test_ring, 1732 .test_ring = uvd_v7_0_ring_test_ring,
1667 .test_ib = amdgpu_uvd_ring_test_ib, 1733 .test_ib = amdgpu_uvd_ring_test_ib,
1668 .insert_nop = uvd_v7_0_ring_insert_nop, 1734 .insert_nop = uvd_v7_0_ring_insert_nop,
@@ -1671,6 +1737,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1671 .end_use = amdgpu_uvd_ring_end_use, 1737 .end_use = amdgpu_uvd_ring_end_use,
1672 .emit_wreg = uvd_v7_0_ring_emit_wreg, 1738 .emit_wreg = uvd_v7_0_ring_emit_wreg,
1673 .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, 1739 .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
1740 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1674}; 1741};
1675 1742
1676static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { 1743static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1702,22 +1769,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
1702 .end_use = amdgpu_uvd_ring_end_use, 1769 .end_use = amdgpu_uvd_ring_end_use,
1703 .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, 1770 .emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
1704 .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, 1771 .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
1772 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1705}; 1773};
1706 1774
1707static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) 1775static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
1708{ 1776{
1709 adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs; 1777 int i;
1710 DRM_INFO("UVD is enabled in VM mode\n"); 1778
1779 for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
1780 adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
1781 adev->uvd.inst[i].ring.me = i;
1782 DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
1783 }
1711} 1784}
1712 1785
1713static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) 1786static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
1714{ 1787{
1715 int i; 1788 int i, j;
1716 1789
1717 for (i = 0; i < adev->uvd.num_enc_rings; ++i) 1790 for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
1718 adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; 1791 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
1792 adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
1793 adev->uvd.inst[j].ring_enc[i].me = j;
1794 }
1719 1795
1720 DRM_INFO("UVD ENC is enabled in VM mode\n"); 1796 DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
1797 }
1721} 1798}
1722 1799
1723static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { 1800static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
@@ -1727,8 +1804,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
1727 1804
1728static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) 1805static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
1729{ 1806{
1730 adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; 1807 int i;
1731 adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs; 1808
1809 for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
1810 adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
1811 adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
1812 }
1732} 1813}
1733 1814
1734const struct amdgpu_ip_block_version uvd_v7_0_ip_block = 1815const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 428d1928e44e..0999c843f623 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -388,7 +388,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
388 default: 388 default:
389 if ((adev->asic_type == CHIP_POLARIS10) || 389 if ((adev->asic_type == CHIP_POLARIS10) ||
390 (adev->asic_type == CHIP_POLARIS11) || 390 (adev->asic_type == CHIP_POLARIS11) ||
391 (adev->asic_type == CHIP_POLARIS12)) 391 (adev->asic_type == CHIP_POLARIS12) ||
392 (adev->asic_type == CHIP_VEGAM))
392 return AMDGPU_VCE_HARVEST_VCE1; 393 return AMDGPU_VCE_HARVEST_VCE1;
393 394
394 return 0; 395 return 0;
@@ -467,8 +468,8 @@ static int vce_v3_0_hw_init(void *handle)
467 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 468 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
468 469
469 vce_v3_0_override_vce_clock_gating(adev, true); 470 vce_v3_0_override_vce_clock_gating(adev, true);
470 if (!(adev->flags & AMD_IS_APU)) 471
471 amdgpu_asic_set_vce_clocks(adev, 10000, 10000); 472 amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
472 473
473 for (i = 0; i < adev->vce.num_rings; i++) 474 for (i = 0; i < adev->vce.num_rings; i++)
474 adev->vce.ring[i].ready = false; 475 adev->vce.ring[i].ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 73fd48d6c756..8fd1b742985a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1081,6 +1081,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1081 .end_use = amdgpu_vce_ring_end_use, 1081 .end_use = amdgpu_vce_ring_end_use,
1082 .emit_wreg = vce_v4_0_emit_wreg, 1082 .emit_wreg = vce_v4_0_emit_wreg,
1083 .emit_reg_wait = vce_v4_0_emit_reg_wait, 1083 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1084 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1084}; 1085};
1085 1086
1086static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1087static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 8c132673bc79..110b294ebed3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -35,7 +35,6 @@
35#include "mmhub/mmhub_9_1_offset.h" 35#include "mmhub/mmhub_9_1_offset.h"
36#include "mmhub/mmhub_9_1_sh_mask.h" 36#include "mmhub/mmhub_9_1_sh_mask.h"
37 37
38static int vcn_v1_0_start(struct amdgpu_device *adev);
39static int vcn_v1_0_stop(struct amdgpu_device *adev); 38static int vcn_v1_0_stop(struct amdgpu_device *adev);
40static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); 39static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
41static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); 40static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
@@ -146,10 +145,6 @@ static int vcn_v1_0_hw_init(void *handle)
146 struct amdgpu_ring *ring = &adev->vcn.ring_dec; 145 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
147 int i, r; 146 int i, r;
148 147
149 r = vcn_v1_0_start(adev);
150 if (r)
151 goto done;
152
153 ring->ready = true; 148 ring->ready = true;
154 r = amdgpu_ring_test_ring(ring); 149 r = amdgpu_ring_test_ring(ring);
155 if (r) { 150 if (r) {
@@ -185,11 +180,9 @@ static int vcn_v1_0_hw_fini(void *handle)
185{ 180{
186 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 181 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
187 struct amdgpu_ring *ring = &adev->vcn.ring_dec; 182 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
188 int r;
189 183
190 r = vcn_v1_0_stop(adev); 184 if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
191 if (r) 185 vcn_v1_0_stop(adev);
192 return r;
193 186
194 ring->ready = false; 187 ring->ready = false;
195 188
@@ -288,14 +281,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
288 * 281 *
289 * Disable clock gating for VCN block 282 * Disable clock gating for VCN block
290 */ 283 */
291static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) 284static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
292{ 285{
293 uint32_t data; 286 uint32_t data;
294 287
295 /* JPEG disable CGC */ 288 /* JPEG disable CGC */
296 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); 289 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
297 290
298 if (sw) 291 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
299 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 292 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
300 else 293 else
301 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; 294 data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -310,7 +303,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
310 303
311 /* UVD disable CGC */ 304 /* UVD disable CGC */
312 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); 305 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
313 if (sw) 306 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
314 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 307 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
315 else 308 else
316 data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; 309 data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -415,13 +408,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
415 * 408 *
416 * Enable clock gating for VCN block 409 * Enable clock gating for VCN block
417 */ 410 */
418static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) 411static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
419{ 412{
420 uint32_t data = 0; 413 uint32_t data = 0;
421 414
422 /* enable JPEG CGC */ 415 /* enable JPEG CGC */
423 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); 416 data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
424 if (sw) 417 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
425 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 418 data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
426 else 419 else
427 data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 420 data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -435,7 +428,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
435 428
436 /* enable UVD CGC */ 429 /* enable UVD CGC */
437 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); 430 data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
438 if (sw) 431 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
439 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 432 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
440 else 433 else
441 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; 434 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -480,6 +473,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
480 WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); 473 WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
481} 474}
482 475
476static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
477{
478 uint32_t data = 0;
479 int ret;
480
481 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
482 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
483 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
484 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
485 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
486 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
487 | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
488 | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
489 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
490 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
491 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
492 | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
493
494 WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
495 SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
496 } else {
497 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
498 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
499 | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
500 | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
501 | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
502 | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
503 | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
504 | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
505 | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
506 | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
507 | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
508 WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
509 SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret);
510 }
511
512 /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
513
514 data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
515 data &= ~0x103;
516 if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
517 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK;
518
519 WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
520}
521
522static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
523{
524 uint32_t data = 0;
525 int ret;
526
527 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
528 /* Before power off, this indicator has to be turned on */
529 data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
530 data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
531 data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
532 WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
533
534
535 data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
536 | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
537 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
538 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
539 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
540 | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
541 | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
542 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
543 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
544 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
545 | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
546
547 WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
548
549 data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
550 | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
551 | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
552 | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
553 | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
554 | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
555 | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
556 | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
557 | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
558 | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
559 | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
560 SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
561 }
562}
563
483/** 564/**
484 * vcn_v1_0_start - start VCN block 565 * vcn_v1_0_start - start VCN block
485 * 566 *
@@ -499,8 +580,9 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
499 580
500 vcn_v1_0_mc_resume(adev); 581 vcn_v1_0_mc_resume(adev);
501 582
583 vcn_1_0_disable_static_power_gating(adev);
502 /* disable clock gating */ 584 /* disable clock gating */
503 vcn_v1_0_disable_clock_gating(adev, true); 585 vcn_v1_0_disable_clock_gating(adev);
504 586
505 /* disable interupt */ 587 /* disable interupt */
506 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, 588 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -680,16 +762,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
680 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, 762 WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
681 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); 763 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
682 764
683 /* enable clock gating */ 765 WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
684 vcn_v1_0_enable_clock_gating(adev, true);
685 766
767 vcn_v1_0_enable_clock_gating(adev);
768 vcn_1_0_enable_static_power_gating(adev);
686 return 0; 769 return 0;
687} 770}
688 771
772bool vcn_v1_0_is_idle(void *handle)
773{
774 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
775
776 return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2);
777}
778
779int vcn_v1_0_wait_for_idle(void *handle)
780{
781 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
782 int ret = 0;
783
784 SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret);
785
786 return ret;
787}
788
689static int vcn_v1_0_set_clockgating_state(void *handle, 789static int vcn_v1_0_set_clockgating_state(void *handle,
690 enum amd_clockgating_state state) 790 enum amd_clockgating_state state)
691{ 791{
692 /* needed for driver unload*/ 792 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
793 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
794
795 if (enable) {
796 /* wait for STATUS to clear */
797 if (vcn_v1_0_is_idle(handle))
798 return -EBUSY;
799 vcn_v1_0_enable_clock_gating(adev);
800 } else {
801 /* disable HW gating and enable Sw gating */
802 vcn_v1_0_disable_clock_gating(adev);
803 }
693 return 0; 804 return 0;
694} 805}
695 806
@@ -1048,16 +1159,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
1048 return 0; 1159 return 0;
1049} 1160}
1050 1161
1051static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 1162static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
1052{ 1163{
1053 int i;
1054 struct amdgpu_device *adev = ring->adev; 1164 struct amdgpu_device *adev = ring->adev;
1165 int i;
1055 1166
1056 for (i = 0; i < count; i++) 1167 WARN_ON(ring->wptr % 2 || count % 2);
1057 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
1058 1168
1169 for (i = 0; i < count / 2; i++) {
1170 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
1171 amdgpu_ring_write(ring, 0);
1172 }
1059} 1173}
1060 1174
1175static int vcn_v1_0_set_powergating_state(void *handle,
1176 enum amd_powergating_state state)
1177{
1178 /* This doesn't actually powergate the VCN block.
1179 * That's done in the dpm code via the SMC. This
1180 * just re-inits the block as necessary. The actual
1181 * gating still happens in the dpm code. We should
1182 * revisit this when there is a cleaner line between
1183 * the smc and the hw blocks
1184 */
1185 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1186
1187 if (state == AMD_PG_STATE_GATE)
1188 return vcn_v1_0_stop(adev);
1189 else
1190 return vcn_v1_0_start(adev);
1191}
1061 1192
1062static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { 1193static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
1063 .name = "vcn_v1_0", 1194 .name = "vcn_v1_0",
@@ -1069,20 +1200,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
1069 .hw_fini = vcn_v1_0_hw_fini, 1200 .hw_fini = vcn_v1_0_hw_fini,
1070 .suspend = vcn_v1_0_suspend, 1201 .suspend = vcn_v1_0_suspend,
1071 .resume = vcn_v1_0_resume, 1202 .resume = vcn_v1_0_resume,
1072 .is_idle = NULL /* vcn_v1_0_is_idle */, 1203 .is_idle = vcn_v1_0_is_idle,
1073 .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, 1204 .wait_for_idle = vcn_v1_0_wait_for_idle,
1074 .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, 1205 .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
1075 .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, 1206 .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
1076 .soft_reset = NULL /* vcn_v1_0_soft_reset */, 1207 .soft_reset = NULL /* vcn_v1_0_soft_reset */,
1077 .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, 1208 .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
1078 .set_clockgating_state = vcn_v1_0_set_clockgating_state, 1209 .set_clockgating_state = vcn_v1_0_set_clockgating_state,
1079 .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, 1210 .set_powergating_state = vcn_v1_0_set_powergating_state,
1080}; 1211};
1081 1212
1082static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { 1213static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1083 .type = AMDGPU_RING_TYPE_VCN_DEC, 1214 .type = AMDGPU_RING_TYPE_VCN_DEC,
1084 .align_mask = 0xf, 1215 .align_mask = 0xf,
1085 .nop = PACKET0(0x81ff, 0),
1086 .support_64bit_ptrs = false, 1216 .support_64bit_ptrs = false,
1087 .vmhub = AMDGPU_MMHUB, 1217 .vmhub = AMDGPU_MMHUB,
1088 .get_rptr = vcn_v1_0_dec_ring_get_rptr, 1218 .get_rptr = vcn_v1_0_dec_ring_get_rptr,
@@ -1101,7 +1231,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1101 .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, 1231 .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
1102 .test_ring = amdgpu_vcn_dec_ring_test_ring, 1232 .test_ring = amdgpu_vcn_dec_ring_test_ring,
1103 .test_ib = amdgpu_vcn_dec_ring_test_ib, 1233 .test_ib = amdgpu_vcn_dec_ring_test_ib,
1104 .insert_nop = vcn_v1_0_ring_insert_nop, 1234 .insert_nop = vcn_v1_0_dec_ring_insert_nop,
1105 .insert_start = vcn_v1_0_dec_ring_insert_start, 1235 .insert_start = vcn_v1_0_dec_ring_insert_start,
1106 .insert_end = vcn_v1_0_dec_ring_insert_end, 1236 .insert_end = vcn_v1_0_dec_ring_insert_end,
1107 .pad_ib = amdgpu_ring_generic_pad_ib, 1237 .pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1109,6 +1239,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1109 .end_use = amdgpu_vcn_ring_end_use, 1239 .end_use = amdgpu_vcn_ring_end_use,
1110 .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, 1240 .emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
1111 .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, 1241 .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
1242 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1112}; 1243};
1113 1244
1114static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { 1245static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1139,6 +1270,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
1139 .end_use = amdgpu_vcn_ring_end_use, 1270 .end_use = amdgpu_vcn_ring_end_use,
1140 .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, 1271 .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
1141 .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, 1272 .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
1273 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1142}; 1274};
1143 1275
1144static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) 1276static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
new file mode 100644
index 000000000000..52778de93ab0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -0,0 +1,53 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include "amdgpu.h"
24#include "soc15.h"
25
26#include "soc15_common.h"
27#include "soc15_hw_ip.h"
28#include "vega20_ip_offset.h"
29
30int vega20_reg_base_init(struct amdgpu_device *adev)
31{
32 /* HW has more IP blocks, only initialized the blocke beend by our driver */
33 uint32_t i;
34 for (i = 0 ; i < MAX_INSTANCE ; ++i) {
35 adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
36 adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
37 adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
38 adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
39 adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
40 adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
41 adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
42 adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
43 adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
44 adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
45 adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
46 adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
47 adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
48 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
49 }
50 return 0;
51}
52
53
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 126f1276d347..4ac1288ab7df 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
305 stoney_mgcg_cgcg_init, 305 stoney_mgcg_cgcg_init,
306 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 306 ARRAY_SIZE(stoney_mgcg_cgcg_init));
307 break; 307 break;
308 case CHIP_POLARIS11:
309 case CHIP_POLARIS10: 308 case CHIP_POLARIS10:
309 case CHIP_POLARIS11:
310 case CHIP_POLARIS12: 310 case CHIP_POLARIS12:
311 case CHIP_VEGAM:
311 default: 312 default:
312 break; 313 break;
313 } 314 }
@@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
728 return r; 729 return r;
729 730
730 tmp = RREG32_SMC(cntl_reg); 731 tmp = RREG32_SMC(cntl_reg);
731 tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | 732
732 CG_DCLK_CNTL__DCLK_DIVIDER_MASK); 733 if (adev->flags & AMD_IS_APU)
734 tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
735 else
736 tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
737 CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
733 tmp |= dividers.post_divider; 738 tmp |= dividers.post_divider;
734 WREG32_SMC(cntl_reg, tmp); 739 WREG32_SMC(cntl_reg, tmp);
735 740
736 for (i = 0; i < 100; i++) { 741 for (i = 0; i < 100; i++) {
737 if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) 742 tmp = RREG32_SMC(status_reg);
738 break; 743 if (adev->flags & AMD_IS_APU) {
744 if (tmp & 0x10000)
745 break;
746 } else {
747 if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
748 break;
749 }
739 mdelay(10); 750 mdelay(10);
740 } 751 }
741 if (i == 100) 752 if (i == 100)
742 return -ETIMEDOUT; 753 return -ETIMEDOUT;
743
744 return 0; 754 return 0;
745} 755}
746 756
757#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
758#define ixGNB_CLK1_STATUS 0xD822010C
759#define ixGNB_CLK2_DFS_CNTL 0xD8220110
760#define ixGNB_CLK2_STATUS 0xD822012C
761#define ixGNB_CLK3_DFS_CNTL 0xD8220130
762#define ixGNB_CLK3_STATUS 0xD822014C
763
747static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) 764static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
748{ 765{
749 int r; 766 int r;
750 767
751 r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); 768 if (adev->flags & AMD_IS_APU) {
752 if (r) 769 r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
753 return r; 770 if (r)
771 return r;
754 772
755 r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); 773 r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
756 if (r) 774 if (r)
757 return r; 775 return r;
776 } else {
777 r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
778 if (r)
779 return r;
780
781 r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
782 if (r)
783 return r;
784 }
758 785
759 return 0; 786 return 0;
760} 787}
@@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
764 int r, i; 791 int r, i;
765 struct atom_clock_dividers dividers; 792 struct atom_clock_dividers dividers;
766 u32 tmp; 793 u32 tmp;
794 u32 reg_ctrl;
795 u32 reg_status;
796 u32 status_mask;
797 u32 reg_mask;
798
799 if (adev->flags & AMD_IS_APU) {
800 reg_ctrl = ixGNB_CLK3_DFS_CNTL;
801 reg_status = ixGNB_CLK3_STATUS;
802 status_mask = 0x00010000;
803 reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
804 } else {
805 reg_ctrl = ixCG_ECLK_CNTL;
806 reg_status = ixCG_ECLK_STATUS;
807 status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
808 reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
809 }
767 810
768 r = amdgpu_atombios_get_clock_dividers(adev, 811 r = amdgpu_atombios_get_clock_dividers(adev,
769 COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, 812 COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
@@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
772 return r; 815 return r;
773 816
774 for (i = 0; i < 100; i++) { 817 for (i = 0; i < 100; i++) {
775 if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) 818 if (RREG32_SMC(reg_status) & status_mask)
776 break; 819 break;
777 mdelay(10); 820 mdelay(10);
778 } 821 }
822
779 if (i == 100) 823 if (i == 100)
780 return -ETIMEDOUT; 824 return -ETIMEDOUT;
781 825
782 tmp = RREG32_SMC(ixCG_ECLK_CNTL); 826 tmp = RREG32_SMC(reg_ctrl);
783 tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | 827 tmp &= ~reg_mask;
784 CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
785 tmp |= dividers.post_divider; 828 tmp |= dividers.post_divider;
786 WREG32_SMC(ixCG_ECLK_CNTL, tmp); 829 WREG32_SMC(reg_ctrl, tmp);
787 830
788 for (i = 0; i < 100; i++) { 831 for (i = 0; i < 100; i++) {
789 if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) 832 if (RREG32_SMC(reg_status) & status_mask)
790 break; 833 break;
791 mdelay(10); 834 mdelay(10);
792 } 835 }
836
793 if (i == 100) 837 if (i == 100)
794 return -ETIMEDOUT; 838 return -ETIMEDOUT;
795 839
@@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev,
876 } 920 }
877} 921}
878 922
923static bool vi_need_full_reset(struct amdgpu_device *adev)
924{
925 switch (adev->asic_type) {
926 case CHIP_CARRIZO:
927 case CHIP_STONEY:
928 /* CZ has hang issues with full reset at the moment */
929 return false;
930 case CHIP_FIJI:
931 case CHIP_TONGA:
932 /* XXX: soft reset should work on fiji and tonga */
933 return true;
934 case CHIP_POLARIS10:
935 case CHIP_POLARIS11:
936 case CHIP_POLARIS12:
937 case CHIP_TOPAZ:
938 default:
939 /* change this when we support soft reset */
940 return true;
941 }
942}
943
879static const struct amdgpu_asic_funcs vi_asic_funcs = 944static const struct amdgpu_asic_funcs vi_asic_funcs =
880{ 945{
881 .read_disabled_bios = &vi_read_disabled_bios, 946 .read_disabled_bios = &vi_read_disabled_bios,
@@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
889 .get_config_memsize = &vi_get_config_memsize, 954 .get_config_memsize = &vi_get_config_memsize,
890 .flush_hdp = &vi_flush_hdp, 955 .flush_hdp = &vi_flush_hdp,
891 .invalidate_hdp = &vi_invalidate_hdp, 956 .invalidate_hdp = &vi_invalidate_hdp,
957 .need_full_reset = &vi_need_full_reset,
892}; 958};
893 959
894#define CZ_REV_BRISTOL(rev) \ 960#define CZ_REV_BRISTOL(rev) \
@@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle)
1031 adev->pg_flags = 0; 1097 adev->pg_flags = 0;
1032 adev->external_rev_id = adev->rev_id + 0x64; 1098 adev->external_rev_id = adev->rev_id + 0x64;
1033 break; 1099 break;
1100 case CHIP_VEGAM:
1101 adev->cg_flags = 0;
1102 /*AMD_CG_SUPPORT_GFX_MGCG |
1103 AMD_CG_SUPPORT_GFX_RLC_LS |
1104 AMD_CG_SUPPORT_GFX_CP_LS |
1105 AMD_CG_SUPPORT_GFX_CGCG |
1106 AMD_CG_SUPPORT_GFX_CGLS |
1107 AMD_CG_SUPPORT_GFX_3D_CGCG |
1108 AMD_CG_SUPPORT_GFX_3D_CGLS |
1109 AMD_CG_SUPPORT_SDMA_MGCG |
1110 AMD_CG_SUPPORT_SDMA_LS |
1111 AMD_CG_SUPPORT_BIF_MGCG |
1112 AMD_CG_SUPPORT_BIF_LS |
1113 AMD_CG_SUPPORT_HDP_MGCG |
1114 AMD_CG_SUPPORT_HDP_LS |
1115 AMD_CG_SUPPORT_ROM_MGCG |
1116 AMD_CG_SUPPORT_MC_MGCG |
1117 AMD_CG_SUPPORT_MC_LS |
1118 AMD_CG_SUPPORT_DRM_LS |
1119 AMD_CG_SUPPORT_UVD_MGCG |
1120 AMD_CG_SUPPORT_VCE_MGCG;*/
1121 adev->pg_flags = 0;
1122 adev->external_rev_id = adev->rev_id + 0x6E;
1123 break;
1034 case CHIP_CARRIZO: 1124 case CHIP_CARRIZO:
1035 adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | 1125 adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
1036 AMD_CG_SUPPORT_GFX_MGCG | 1126 AMD_CG_SUPPORT_GFX_MGCG |
@@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle,
1422 case CHIP_POLARIS10: 1512 case CHIP_POLARIS10:
1423 case CHIP_POLARIS11: 1513 case CHIP_POLARIS11:
1424 case CHIP_POLARIS12: 1514 case CHIP_POLARIS12:
1515 case CHIP_VEGAM:
1425 vi_common_set_clockgating_state_by_smu(adev, state); 1516 vi_common_set_clockgating_state_by_smu(adev, state);
1426 default: 1517 default:
1427 break; 1518 break;
@@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1551 amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); 1642 amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
1552 } 1643 }
1553 break; 1644 break;
1554 case CHIP_POLARIS11:
1555 case CHIP_POLARIS10: 1645 case CHIP_POLARIS10:
1646 case CHIP_POLARIS11:
1556 case CHIP_POLARIS12: 1647 case CHIP_POLARIS12:
1648 case CHIP_VEGAM:
1557 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1649 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1558 amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); 1650 amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
1559 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); 1651 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);