aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-05-30 19:33:29 -0400
committerDave Airlie <airlied@redhat.com>2019-05-30 20:04:39 -0400
commit91c1ead6aee22d4595f50ba66070b94a4a8f84a9 (patch)
tree066ffa1c352b6257dd37cda6a1df53159e133f2e /drivers/gpu/drm/amd
parent14ee642c2ab0a3d8a1ded11fade692d8b77172b9 (diff)
parentcf401e2856b27b2deeada498eab864e2a50cf219 (diff)
Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3: - Add new thermal sensors for vega asics - Various RAS fixes - Add sysfs interface for memory interface utilization - Use HMM rather than mmu notifier for user pages - Expose xgmi topology via kfd - SR-IOV fixes - Fixes for manual driver reload - Add unique identifier for vega asics - Clean up user fence handling with UVD/VCE/VCN blocks - Convert DC to use core bpc attribute rather than a custom one - Add GWS support for KFD - Vega powerplay improvements - Add CRC support for DCE 12 - SR-IOV support for new security policy - Various cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c225
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c (renamed from drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c)131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c254
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c289
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c428
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c424
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h483
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c49
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c375
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c70
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c85
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c71
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c94
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c95
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c97
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c96
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c430
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c55
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c152
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c91
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c107
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_pp_smu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h5
-rw-r--r--drivers/gpu/drm/amd/display/include/bios_parser_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h7
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/include/set_mode_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c56
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h1
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h18
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h31
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/cik_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h11
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c76
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c18
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c156
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c24
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c123
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c84
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/power_state.h7
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smumgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c191
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c21
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c153
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.h44
200 files changed, 6363 insertions, 2472 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 844f0a162981..a04f2fc7bf37 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
27config DRM_AMDGPU_USERPTR 27config DRM_AMDGPU_USERPTR
28 bool "Always enable userptr write support" 28 bool "Always enable userptr write support"
29 depends on DRM_AMDGPU 29 depends on DRM_AMDGPU
30 select MMU_NOTIFIER 30 depends on ARCH_HAS_HMM
31 select HMM_MIRROR
31 help 32 help
32 This option selects CONFIG_MMU_NOTIFIER if it isn't already 33 This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
33 selected to enabled full userptr support. 34 isn't already selected to enabled full userptr support.
34 35
35config DRM_AMDGPU_GART_DEBUGFS 36config DRM_AMDGPU_GART_DEBUGFS
36 bool "Allow GART access through debugfs" 37 bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index fdd0ca4b0f0b..57ce44cc3226 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
49 amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ 49 amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
50 amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ 50 amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
51 atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ 51 atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
52 amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ 52 amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
53 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ 53 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
54 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ 54 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
55 amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ 55 amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
@@ -173,7 +173,7 @@ endif
173amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o 173amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
174amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o 174amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
175amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o 175amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
176amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o 176amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
177 177
178include $(FULL_AMD_PATH)/powerplay/Makefile 178include $(FULL_AMD_PATH)/powerplay/Makefile
179 179
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 14398f55f602..58f8f132904d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
118extern int amdgpu_hw_i2c; 118extern int amdgpu_hw_i2c;
119extern int amdgpu_pcie_gen2; 119extern int amdgpu_pcie_gen2;
120extern int amdgpu_msi; 120extern int amdgpu_msi;
121extern int amdgpu_lockup_timeout;
122extern int amdgpu_dpm; 121extern int amdgpu_dpm;
123extern int amdgpu_fw_load_type; 122extern int amdgpu_fw_load_type;
124extern int amdgpu_aspm; 123extern int amdgpu_aspm;
@@ -211,6 +210,7 @@ struct amdgpu_irq_src;
211struct amdgpu_fpriv; 210struct amdgpu_fpriv;
212struct amdgpu_bo_va_mapping; 211struct amdgpu_bo_va_mapping;
213struct amdgpu_atif; 212struct amdgpu_atif;
213struct kfd_vm_fault_info;
214 214
215enum amdgpu_cp_irq { 215enum amdgpu_cp_irq {
216 AMDGPU_CP_IRQ_GFX_EOP = 0, 216 AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -415,6 +415,7 @@ struct amdgpu_fpriv {
415}; 415};
416 416
417int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); 417int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
418int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
418 419
419int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, 420int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
420 unsigned size, struct amdgpu_ib *ib); 421 unsigned size, struct amdgpu_ib *ib);
@@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
558 uint64_t *count1); 559 uint64_t *count1);
559 /* do we need to reset the asic at init time (e.g., kexec) */ 560 /* do we need to reset the asic at init time (e.g., kexec) */
560 bool (*need_reset_on_init)(struct amdgpu_device *adev); 561 bool (*need_reset_on_init)(struct amdgpu_device *adev);
562 /* PCIe replay counter */
563 uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
561}; 564};
562 565
563/* 566/*
@@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
639 u32 ref_and_mask_sdma1; 642 u32 ref_and_mask_sdma1;
640}; 643};
641 644
645struct amdgpu_mmio_remap {
646 u32 reg_offset;
647 resource_size_t bus_addr;
648};
649
642struct amdgpu_nbio_funcs { 650struct amdgpu_nbio_funcs {
643 const struct nbio_hdp_flush_reg *hdp_flush_reg; 651 const struct nbio_hdp_flush_reg *hdp_flush_reg;
644 u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); 652 u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
666 void (*ih_control)(struct amdgpu_device *adev); 674 void (*ih_control)(struct amdgpu_device *adev);
667 void (*init_registers)(struct amdgpu_device *adev); 675 void (*init_registers)(struct amdgpu_device *adev);
668 void (*detect_hw_virt)(struct amdgpu_device *adev); 676 void (*detect_hw_virt)(struct amdgpu_device *adev);
677 void (*remap_hdp_registers)(struct amdgpu_device *adev);
669}; 678};
670 679
671struct amdgpu_df_funcs { 680struct amdgpu_df_funcs {
@@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
680 u32 *flags); 689 u32 *flags);
681 void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, 690 void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
682 bool enable); 691 bool enable);
692 int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
693 int is_enable);
694 int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
695 int is_disable);
696 void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
697 uint64_t *count);
683}; 698};
684/* Define the HW IP blocks will be used in driver , add more if necessary */ 699/* Define the HW IP blocks will be used in driver , add more if necessary */
685enum amd_hw_ip_block_type { 700enum amd_hw_ip_block_type {
@@ -764,6 +779,7 @@ struct amdgpu_device {
764 void __iomem *rmmio; 779 void __iomem *rmmio;
765 /* protects concurrent MM_INDEX/DATA based register access */ 780 /* protects concurrent MM_INDEX/DATA based register access */
766 spinlock_t mmio_idx_lock; 781 spinlock_t mmio_idx_lock;
782 struct amdgpu_mmio_remap rmmio_remap;
767 /* protects concurrent SMC based register access */ 783 /* protects concurrent SMC based register access */
768 spinlock_t smc_idx_lock; 784 spinlock_t smc_idx_lock;
769 amdgpu_rreg_t smc_rreg; 785 amdgpu_rreg_t smc_rreg;
@@ -936,6 +952,13 @@ struct amdgpu_device {
936 struct work_struct xgmi_reset_work; 952 struct work_struct xgmi_reset_work;
937 953
938 bool in_baco_reset; 954 bool in_baco_reset;
955
956 long gfx_timeout;
957 long sdma_timeout;
958 long video_timeout;
959 long compute_timeout;
960
961 uint64_t unique_id;
939}; 962};
940 963
941static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) 964static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
1065#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) 1088#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
1066#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) 1089#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
1067#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) 1090#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
1091#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
1068 1092
1069/* Common functions */ 1093/* Common functions */
1070bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); 1094bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1081 const u32 array_size); 1105 const u32 array_size);
1082 1106
1083bool amdgpu_device_is_px(struct drm_device *dev); 1107bool amdgpu_device_is_px(struct drm_device *dev);
1108bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
1109 struct amdgpu_device *peer_adev);
1110
1084/* atpx handler */ 1111/* atpx handler */
1085#if defined(CONFIG_VGA_SWITCHEROO) 1112#if defined(CONFIG_VGA_SWITCHEROO)
1086void amdgpu_register_atpx_handler(void); 1113void amdgpu_register_atpx_handler(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index aeead072fa79..4af3989e4a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -25,8 +25,10 @@
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_gfx.h" 27#include "amdgpu_gfx.h"
28#include "amdgpu_dma_buf.h"
28#include <linux/module.h> 29#include <linux/module.h>
29#include <linux/dma-buf.h> 30#include <linux/dma-buf.h>
31#include "amdgpu_xgmi.h"
30 32
31static const unsigned int compute_vmid_bitmap = 0xFF00; 33static const unsigned int compute_vmid_bitmap = 0xFF00;
32 34
@@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
148 }; 150 };
149 151
150 /* this is going to have a few of the MSBs set that we need to 152 /* this is going to have a few of the MSBs set that we need to
151 * clear */ 153 * clear
154 */
152 bitmap_complement(gpu_resources.queue_bitmap, 155 bitmap_complement(gpu_resources.queue_bitmap,
153 adev->gfx.mec.queue_bitmap, 156 adev->gfx.mec.queue_bitmap,
154 KGD_MAX_QUEUES); 157 KGD_MAX_QUEUES);
@@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
162 gpu_resources.queue_bitmap); 165 gpu_resources.queue_bitmap);
163 166
164 /* According to linux/bitmap.h we shouldn't use bitmap_clear if 167 /* According to linux/bitmap.h we shouldn't use bitmap_clear if
165 * nbits is not compile time constant */ 168 * nbits is not compile time constant
169 */
166 last_valid_bit = 1 /* only first MEC can have compute queues */ 170 last_valid_bit = 1 /* only first MEC can have compute queues */
167 * adev->gfx.mec.num_pipe_per_mec 171 * adev->gfx.mec.num_pipe_per_mec
168 * adev->gfx.mec.num_queue_per_pipe; 172 * adev->gfx.mec.num_queue_per_pipe;
@@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
335 amdgpu_bo_unref(&(bo)); 339 amdgpu_bo_unref(&(bo));
336} 340}
337 341
342int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
343 void **mem_obj)
344{
345 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
346 struct amdgpu_bo *bo = NULL;
347 struct amdgpu_bo_param bp;
348 int r;
349
350 memset(&bp, 0, sizeof(bp));
351 bp.size = size;
352 bp.byte_align = 1;
353 bp.domain = AMDGPU_GEM_DOMAIN_GWS;
354 bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
355 bp.type = ttm_bo_type_device;
356 bp.resv = NULL;
357
358 r = amdgpu_bo_create(adev, &bp, &bo);
359 if (r) {
360 dev_err(adev->dev,
361 "failed to allocate gws BO for amdkfd (%d)\n", r);
362 return r;
363 }
364
365 *mem_obj = bo;
366 return 0;
367}
368
369void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
370{
371 struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
372
373 amdgpu_bo_unref(&bo);
374}
375
338uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, 376uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
339 enum kgd_engine_type type) 377 enum kgd_engine_type type)
340{ 378{
@@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
518 556
519 return adev->gmc.xgmi.hive_id; 557 return adev->gmc.xgmi.hive_id;
520} 558}
559uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
560{
561 struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
562 struct amdgpu_device *adev = (struct amdgpu_device *)dst;
563 int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
564
565 if (ret < 0) {
566 DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
567 adev->gmc.xgmi.physical_node_id,
568 peer_adev->gmc.xgmi.physical_node_id, ret);
569 ret = 0;
570 }
571 return (uint8_t)ret;
572}
573
574uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
575{
576 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
577
578 return adev->rmmio_remap.bus_addr;
579}
580
581uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
582{
583 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
584
585 return adev->gds.gws_size;
586}
521 587
522int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, 588int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
523 uint32_t vmid, uint64_t gpu_addr, 589 uint32_t vmid, uint64_t gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4e37fa7e85b1..f968bf147c5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
61 61
62 atomic_t invalid; 62 atomic_t invalid;
63 struct amdkfd_process_info *process_info; 63 struct amdkfd_process_info *process_info;
64 struct page **user_pages;
65 64
66 struct amdgpu_sync sync; 65 struct amdgpu_sync sync;
67 66
@@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
154 void **mem_obj, uint64_t *gpu_addr, 153 void **mem_obj, uint64_t *gpu_addr,
155 void **cpu_ptr, bool mqd_gfx9); 154 void **cpu_ptr, bool mqd_gfx9);
156void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); 155void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
156int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
157void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
158int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
159int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
157uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, 160uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
158 enum kgd_engine_type type); 161 enum kgd_engine_type type);
159void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, 162void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
169 uint32_t *flags); 172 uint32_t *flags);
170uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); 173uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
171uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); 174uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
175uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
176uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
177uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
172 178
173#define read_user_wptr(mmptr, wptr, dst) \ 179#define read_user_wptr(mmptr, wptr, dst) \
174 ({ \ 180 ({ \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index fa09e11a600c..c6abcf72e822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
310 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 310 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
311 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; 311 m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
312 312
313 pr_debug("kfd: sdma base address: 0x%x\n", retval); 313 pr_debug("sdma base address: 0x%x\n", retval);
314 314
315 return retval; 315 return retval;
316} 316}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index fec3a6aa1de6..4e8b4e949926 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
266 266
267 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + 267 retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
268 m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; 268 m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
269 pr_debug("kfd: sdma base address: 0x%x\n", retval); 269 pr_debug("sdma base address: 0x%x\n", retval);
270 270
271 return retval; 271 return retval;
272} 272}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ef3d93b995b2..d5af41143d12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
225 225
226 lock_srbm(kgd, 0, 0, 0, vmid); 226 lock_srbm(kgd, 0, 0, 0, vmid);
227 227
228 WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); 228 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
229 WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); 229 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
230 /* APE1 no longer exists on GFX9 */ 230 /* APE1 no longer exists on GFX9 */
231 231
232 unlock_srbm(kgd); 232 unlock_srbm(kgd);
@@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
369 value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); 369 value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
370 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, 370 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
371 ((mec << 5) | (pipe << 3) | queue_id | 0x80)); 371 ((mec << 5) | (pipe << 3) | queue_id | 0x80));
372 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); 372 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
373 } 373 }
374 374
375 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ 375 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
378 378
379 for (reg = hqd_base; 379 for (reg = hqd_base;
380 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) 380 reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
381 WREG32(reg, mqd_hqd[reg - hqd_base]); 381 WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
382 382
383 383
384 /* Activate doorbell logic before triggering WPTR poll. */ 384 /* Activate doorbell logic before triggering WPTR poll. */
385 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, 385 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
386 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 386 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
387 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); 387 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
388 388
389 if (wptr) { 389 if (wptr) {
390 /* Don't read wptr with get_user because the user 390 /* Don't read wptr with get_user because the user
@@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
413 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); 413 guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
414 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; 414 guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
415 415
416 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), 416 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
417 lower_32_bits(guessed_wptr)); 417 lower_32_bits(guessed_wptr));
418 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), 418 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
419 upper_32_bits(guessed_wptr)); 419 upper_32_bits(guessed_wptr));
420 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 420 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
421 lower_32_bits((uintptr_t)wptr)); 421 lower_32_bits((uintptr_t)wptr));
422 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 422 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
423 upper_32_bits((uintptr_t)wptr)); 423 upper_32_bits((uintptr_t)wptr));
424 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), 424 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
425 get_queue_mask(adev, pipe_id, queue_id)); 425 get_queue_mask(adev, pipe_id, queue_id));
426 } 426 }
427 427
428 /* Start the EOP fetcher */ 428 /* Start the EOP fetcher */
429 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), 429 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
430 REG_SET_FIELD(m->cp_hqd_eop_rptr, 430 REG_SET_FIELD(m->cp_hqd_eop_rptr,
431 CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); 431 CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
432 432
433 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); 433 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
434 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); 434 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
435 435
436 release_queue(kgd); 436 release_queue(kgd);
437 437
@@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
633 acquire_queue(kgd, pipe_id, queue_id); 633 acquire_queue(kgd, pipe_id, queue_id);
634 634
635 if (m->cp_hqd_vmid == 0) 635 if (m->cp_hqd_vmid == 0)
636 WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); 636 WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
637 637
638 switch (reset_type) { 638 switch (reset_type) {
639 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: 639 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
647 break; 647 break;
648 } 648 }
649 649
650 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); 650 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
651 651
652 end_jiffies = (utimeout * HZ / 1000) + jiffies; 652 end_jiffies = (utimeout * HZ / 1000) + jiffies;
653 while (true) { 653 while (true) {
@@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
726 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; 726 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
727} 727}
728 728
729static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 729static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
730{ 730 uint32_t flush_type)
731 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
732
733 /* Use legacy mode tlb invalidation.
734 *
735 * Currently on Raven the code below is broken for anything but
736 * legacy mode due to a MMHUB power gating problem. A workaround
737 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
738 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
739 * bit.
740 *
741 * TODO 1: agree on the right set of invalidation registers for
742 * KFD use. Use the last one for now. Invalidate both GC and
743 * MMHUB.
744 *
745 * TODO 2: support range-based invalidation, requires kfg2kgd
746 * interface change
747 */
748 amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
749}
750
751static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
752{ 731{
753 signed long r; 732 signed long r;
754 uint32_t seq; 733 uint32_t seq;
@@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
761 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 740 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
762 PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | 741 PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
763 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 742 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
764 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ 743 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
765 amdgpu_fence_emit_polling(ring, &seq); 744 amdgpu_fence_emit_polling(ring, &seq);
766 amdgpu_ring_commit(ring); 745 amdgpu_ring_commit(ring);
767 spin_unlock(&adev->gfx.kiq.ring_lock); 746 spin_unlock(&adev->gfx.kiq.ring_lock);
@@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
780 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 759 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
781 int vmid; 760 int vmid;
782 struct amdgpu_ring *ring = &adev->gfx.kiq.ring; 761 struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
762 uint32_t flush_type = 0;
783 763
784 if (adev->in_gpu_reset) 764 if (adev->in_gpu_reset)
785 return -EIO; 765 return -EIO;
766 if (adev->gmc.xgmi.num_physical_nodes &&
767 adev->asic_type == CHIP_VEGA20)
768 flush_type = 2;
786 769
787 if (ring->sched.ready) 770 if (ring->sched.ready)
788 return invalidate_tlbs_with_kiq(adev, pasid); 771 return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
789 772
790 for (vmid = 0; vmid < 16; vmid++) { 773 for (vmid = 0; vmid < 16; vmid++) {
791 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) 774 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
793 if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { 776 if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
794 if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) 777 if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
795 == pasid) { 778 == pasid) {
796 write_vmid_invalidate_request(kgd, vmid); 779 amdgpu_gmc_flush_gpu_tlb(adev, vmid,
780 flush_type);
797 break; 781 break;
798 } 782 }
799 } 783 }
@@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
811 return 0; 795 return 0;
812 } 796 }
813 797
814 write_vmid_invalidate_request(kgd, vmid); 798 /* Use legacy mode tlb invalidation.
799 *
800 * Currently on Raven the code below is broken for anything but
801 * legacy mode due to a MMHUB power gating problem. A workaround
802 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
803 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
804 * bit.
805 *
806 * TODO 1: agree on the right set of invalidation registers for
807 * KFD use. Use the last one for now. Invalidate both GC and
808 * MMHUB.
809 *
810 * TODO 2: support range-based invalidation, requires kfg2kgd
811 * interface change
812 */
813 amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
815 return 0; 814 return 0;
816} 815}
817 816
@@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
838 837
839 mutex_lock(&adev->grbm_idx_mutex); 838 mutex_lock(&adev->grbm_idx_mutex);
840 839
841 WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); 840 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
842 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); 841 WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
843 842
844 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 843 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
848 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, 847 data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
849 SE_BROADCAST_WRITES, 1); 848 SE_BROADCAST_WRITES, 1);
850 849
851 WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); 850 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
852 mutex_unlock(&adev->grbm_idx_mutex); 851 mutex_unlock(&adev->grbm_idx_mutex);
853 852
854 return 0; 853 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a6e5184d436c..87177ed37dd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -30,6 +30,7 @@
30#include "amdgpu_object.h" 30#include "amdgpu_object.h"
31#include "amdgpu_vm.h" 31#include "amdgpu_vm.h"
32#include "amdgpu_amdkfd.h" 32#include "amdgpu_amdkfd.h"
33#include "amdgpu_dma_buf.h"
33 34
34/* Special VM and GART address alignment needed for VI pre-Fiji due to 35/* Special VM and GART address alignment needed for VI pre-Fiji due to
35 * a HW bug. 36 * a HW bug.
@@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
456 mutex_unlock(&process_info->lock); 457 mutex_unlock(&process_info->lock);
457} 458}
458 459
460static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
461 struct amdkfd_process_info *process_info)
462{
463 struct ttm_validate_buffer *bo_list_entry;
464
465 bo_list_entry = &mem->validate_list;
466 mutex_lock(&process_info->lock);
467 list_del(&bo_list_entry->head);
468 mutex_unlock(&process_info->lock);
469}
470
459/* Initializes user pages. It registers the MMU notifier and validates 471/* Initializes user pages. It registers the MMU notifier and validates
460 * the userptr BO in the GTT domain. 472 * the userptr BO in the GTT domain.
461 * 473 *
@@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
491 goto out; 503 goto out;
492 } 504 }
493 505
494 /* If no restore worker is running concurrently, user_pages 506 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
495 * should not be allocated
496 */
497 WARN(mem->user_pages, "Leaking user_pages array");
498
499 mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
500 sizeof(struct page *),
501 GFP_KERNEL | __GFP_ZERO);
502 if (!mem->user_pages) {
503 pr_err("%s: Failed to allocate pages array\n", __func__);
504 ret = -ENOMEM;
505 goto unregister_out;
506 }
507
508 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
509 if (ret) { 507 if (ret) {
510 pr_err("%s: Failed to get user pages: %d\n", __func__, ret); 508 pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
511 goto free_out; 509 goto unregister_out;
512 } 510 }
513 511
514 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
515
516 ret = amdgpu_bo_reserve(bo, true); 512 ret = amdgpu_bo_reserve(bo, true);
517 if (ret) { 513 if (ret) {
518 pr_err("%s: Failed to reserve BO\n", __func__); 514 pr_err("%s: Failed to reserve BO\n", __func__);
@@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
525 amdgpu_bo_unreserve(bo); 521 amdgpu_bo_unreserve(bo);
526 522
527release_out: 523release_out:
528 if (ret) 524 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
529 release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
530free_out:
531 kvfree(mem->user_pages);
532 mem->user_pages = NULL;
533unregister_out: 525unregister_out:
534 if (ret) 526 if (ret)
535 amdgpu_mn_unregister(bo); 527 amdgpu_mn_unregister(bo);
@@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
588 ctx->kfd_bo.priority = 0; 580 ctx->kfd_bo.priority = 0;
589 ctx->kfd_bo.tv.bo = &bo->tbo; 581 ctx->kfd_bo.tv.bo = &bo->tbo;
590 ctx->kfd_bo.tv.num_shared = 1; 582 ctx->kfd_bo.tv.num_shared = 1;
591 ctx->kfd_bo.user_pages = NULL;
592 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 583 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
593 584
594 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); 585 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
652 ctx->kfd_bo.priority = 0; 643 ctx->kfd_bo.priority = 0;
653 ctx->kfd_bo.tv.bo = &bo->tbo; 644 ctx->kfd_bo.tv.bo = &bo->tbo;
654 ctx->kfd_bo.tv.num_shared = 1; 645 ctx->kfd_bo.tv.num_shared = 1;
655 ctx->kfd_bo.user_pages = NULL;
656 list_add(&ctx->kfd_bo.tv.head, &ctx->list); 646 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
657 647
658 i = 0; 648 i = 0;
@@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
896 AMDGPU_FENCE_OWNER_KFD, false); 886 AMDGPU_FENCE_OWNER_KFD, false);
897 if (ret) 887 if (ret)
898 goto wait_pd_fail; 888 goto wait_pd_fail;
889 ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
890 if (ret)
891 goto reserve_shared_fail;
899 amdgpu_bo_fence(vm->root.base.bo, 892 amdgpu_bo_fence(vm->root.base.bo,
900 &vm->process_info->eviction_fence->base, true); 893 &vm->process_info->eviction_fence->base, true);
901 amdgpu_bo_unreserve(vm->root.base.bo); 894 amdgpu_bo_unreserve(vm->root.base.bo);
@@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
909 902
910 return 0; 903 return 0;
911 904
905reserve_shared_fail:
912wait_pd_fail: 906wait_pd_fail:
913validate_pd_fail: 907validate_pd_fail:
914 amdgpu_bo_unreserve(vm->root.base.bo); 908 amdgpu_bo_unreserve(vm->root.base.bo);
@@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1109 if (!offset || !*offset) 1103 if (!offset || !*offset)
1110 return -EINVAL; 1104 return -EINVAL;
1111 user_addr = *offset; 1105 user_addr = *offset;
1112 } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { 1106 } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
1107 ALLOC_MEM_FLAGS_MMIO_REMAP)) {
1113 domain = AMDGPU_GEM_DOMAIN_GTT; 1108 domain = AMDGPU_GEM_DOMAIN_GTT;
1114 alloc_domain = AMDGPU_GEM_DOMAIN_CPU; 1109 alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
1115 bo_type = ttm_bo_type_sg; 1110 bo_type = ttm_bo_type_sg;
@@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1199 1194
1200 if (user_addr) { 1195 if (user_addr) {
1201 ret = init_user_pages(*mem, current->mm, user_addr); 1196 ret = init_user_pages(*mem, current->mm, user_addr);
1202 if (ret) { 1197 if (ret)
1203 mutex_lock(&avm->process_info->lock);
1204 list_del(&(*mem)->validate_list.head);
1205 mutex_unlock(&avm->process_info->lock);
1206 goto allocate_init_user_pages_failed; 1198 goto allocate_init_user_pages_failed;
1207 }
1208 } 1199 }
1209 1200
1210 if (offset) 1201 if (offset)
@@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1213 return 0; 1204 return 0;
1214 1205
1215allocate_init_user_pages_failed: 1206allocate_init_user_pages_failed:
1207 remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
1216 amdgpu_bo_unref(&bo); 1208 amdgpu_bo_unref(&bo);
1217 /* Don't unreserve system mem limit twice */ 1209 /* Don't unreserve system mem limit twice */
1218 goto err_reserve_limit; 1210 goto err_reserve_limit;
@@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1262 list_del(&bo_list_entry->head); 1254 list_del(&bo_list_entry->head);
1263 mutex_unlock(&process_info->lock); 1255 mutex_unlock(&process_info->lock);
1264 1256
1265 /* Free user pages if necessary */
1266 if (mem->user_pages) {
1267 pr_debug("%s: Freeing user_pages array\n", __func__);
1268 if (mem->user_pages[0])
1269 release_pages(mem->user_pages,
1270 mem->bo->tbo.ttm->num_pages);
1271 kvfree(mem->user_pages);
1272 }
1273
1274 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); 1257 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1275 if (unlikely(ret)) 1258 if (unlikely(ret))
1276 return ret; 1259 return ret;
@@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1294 /* Free the sync object */ 1277 /* Free the sync object */
1295 amdgpu_sync_free(&mem->sync); 1278 amdgpu_sync_free(&mem->sync);
1296 1279
1297 /* If the SG is not NULL, it's one we created for a doorbell 1280 /* If the SG is not NULL, it's one we created for a doorbell or mmio
1298 * BO. We need to free it. 1281 * remap BO. We need to free it.
1299 */ 1282 */
1300 if (mem->bo->tbo.sg) { 1283 if (mem->bo->tbo.sg) {
1301 sg_free_table(mem->bo->tbo.sg); 1284 sg_free_table(mem->bo->tbo.sg);
@@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1409 ret = map_bo_to_gpuvm(adev, entry, ctx.sync, 1392 ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
1410 is_invalid_userptr); 1393 is_invalid_userptr);
1411 if (ret) { 1394 if (ret) {
1412 pr_err("Failed to map radeon bo to gpuvm\n"); 1395 pr_err("Failed to map bo to gpuvm\n");
1413 goto map_bo_to_gpuvm_failed; 1396 goto map_bo_to_gpuvm_failed;
1414 } 1397 }
1415 1398
@@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
1744 1727
1745 bo = mem->bo; 1728 bo = mem->bo;
1746 1729
1747 if (!mem->user_pages) {
1748 mem->user_pages =
1749 kvmalloc_array(bo->tbo.ttm->num_pages,
1750 sizeof(struct page *),
1751 GFP_KERNEL | __GFP_ZERO);
1752 if (!mem->user_pages) {
1753 pr_err("%s: Failed to allocate pages array\n",
1754 __func__);
1755 return -ENOMEM;
1756 }
1757 } else if (mem->user_pages[0]) {
1758 release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
1759 }
1760
1761 /* Get updated user pages */ 1730 /* Get updated user pages */
1762 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, 1731 ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
1763 mem->user_pages); 1732 bo->tbo.ttm->pages);
1764 if (ret) { 1733 if (ret) {
1765 mem->user_pages[0] = NULL; 1734 bo->tbo.ttm->pages[0] = NULL;
1766 pr_info("%s: Failed to get user pages: %d\n", 1735 pr_info("%s: Failed to get user pages: %d\n",
1767 __func__, ret); 1736 __func__, ret);
1768 /* Pretend it succeeded. It will fail later 1737 /* Pretend it succeeded. It will fail later
@@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
1771 * stalled user mode queues. 1740 * stalled user mode queues.
1772 */ 1741 */
1773 } 1742 }
1774
1775 /* Mark the BO as valid unless it was invalidated
1776 * again concurrently
1777 */
1778 if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
1779 return -EAGAIN;
1780 } 1743 }
1781 1744
1782 return 0; 1745 return 0;
1783} 1746}
1784 1747
1748/* Remove invalid userptr BOs from hmm track list
1749 *
1750 * Stop HMM track the userptr update
1751 */
1752static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
1753{
1754 struct kgd_mem *mem, *tmp_mem;
1755 struct amdgpu_bo *bo;
1756
1757 list_for_each_entry_safe(mem, tmp_mem,
1758 &process_info->userptr_inval_list,
1759 validate_list.head) {
1760 bo = mem->bo;
1761 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1762 }
1763}
1764
1785/* Validate invalid userptr BOs 1765/* Validate invalid userptr BOs
1786 * 1766 *
1787 * Validates BOs on the userptr_inval_list, and moves them back to the 1767 * Validates BOs on the userptr_inval_list, and moves them back to the
@@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1806 GFP_KERNEL); 1786 GFP_KERNEL);
1807 if (!pd_bo_list_entries) { 1787 if (!pd_bo_list_entries) {
1808 pr_err("%s: Failed to allocate PD BO list entries\n", __func__); 1788 pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
1809 return -ENOMEM; 1789 ret = -ENOMEM;
1790 goto out_no_mem;
1810 } 1791 }
1811 1792
1812 INIT_LIST_HEAD(&resv_list); 1793 INIT_LIST_HEAD(&resv_list);
@@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1830 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); 1811 ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
1831 WARN(!list_empty(&duplicates), "Duplicates should be empty"); 1812 WARN(!list_empty(&duplicates), "Duplicates should be empty");
1832 if (ret) 1813 if (ret)
1833 goto out; 1814 goto out_free;
1834 1815
1835 amdgpu_sync_create(&sync); 1816 amdgpu_sync_create(&sync);
1836 1817
@@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1846 1827
1847 bo = mem->bo; 1828 bo = mem->bo;
1848 1829
1849 /* Copy pages array and validate the BO if we got user pages */ 1830 /* Validate the BO if we got user pages */
1850 if (mem->user_pages[0]) { 1831 if (bo->tbo.ttm->pages[0]) {
1851 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
1852 mem->user_pages);
1853 amdgpu_bo_placement_from_domain(bo, mem->domain); 1832 amdgpu_bo_placement_from_domain(bo, mem->domain);
1854 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 1833 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1855 if (ret) { 1834 if (ret) {
@@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
1858 } 1837 }
1859 } 1838 }
1860 1839
1861 /* Validate succeeded, now the BO owns the pages, free
1862 * our copy of the pointer array. Put this BO back on
1863 * the userptr_valid_list. If we need to revalidate
1864 * it, we need to start from scratch.
1865 */
1866 kvfree(mem->user_pages);
1867 mem->user_pages = NULL;
1868 list_move_tail(&mem->validate_list.head, 1840 list_move_tail(&mem->validate_list.head,
1869 &process_info->userptr_valid_list); 1841 &process_info->userptr_valid_list);
1870 1842
1843 /* Stop HMM track the userptr update. We dont check the return
1844 * value for concurrent CPU page table update because we will
1845 * reschedule the restore worker if process_info->evicted_bos
1846 * is updated.
1847 */
1848 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1849
1871 /* Update mapping. If the BO was not validated 1850 /* Update mapping. If the BO was not validated
1872 * (because we couldn't get user pages), this will 1851 * (because we couldn't get user pages), this will
1873 * clear the page table entries, which will result in 1852 * clear the page table entries, which will result in
@@ -1897,8 +1876,9 @@ unreserve_out:
1897 ttm_eu_backoff_reservation(&ticket, &resv_list); 1876 ttm_eu_backoff_reservation(&ticket, &resv_list);
1898 amdgpu_sync_wait(&sync, false); 1877 amdgpu_sync_wait(&sync, false);
1899 amdgpu_sync_free(&sync); 1878 amdgpu_sync_free(&sync);
1900out: 1879out_free:
1901 kfree(pd_bo_list_entries); 1880 kfree(pd_bo_list_entries);
1881out_no_mem:
1902 1882
1903 return ret; 1883 return ret;
1904} 1884}
@@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
1963 * hanging. No point trying again. 1943 * hanging. No point trying again.
1964 */ 1944 */
1965 } 1945 }
1946
1966unlock_out: 1947unlock_out:
1948 untrack_invalid_user_pages(process_info);
1967 mutex_unlock(&process_info->lock); 1949 mutex_unlock(&process_info->lock);
1968 mmput(mm); 1950 mmput(mm);
1969 put_task_struct(usertask); 1951 put_task_struct(usertask);
@@ -2130,3 +2112,88 @@ ttm_reserve_fail:
2130 kfree(pd_bo_list); 2112 kfree(pd_bo_list);
2131 return ret; 2113 return ret;
2132} 2114}
2115
2116int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
2117{
2118 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2119 struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
2120 int ret;
2121
2122 if (!info || !gws)
2123 return -EINVAL;
2124
2125 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
2126 if (!*mem)
2127 return -EINVAL;
2128
2129 mutex_init(&(*mem)->lock);
2130 (*mem)->bo = amdgpu_bo_ref(gws_bo);
2131 (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
2132 (*mem)->process_info = process_info;
2133 add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
2134 amdgpu_sync_create(&(*mem)->sync);
2135
2136
2137 /* Validate gws bo the first time it is added to process */
2138 mutex_lock(&(*mem)->process_info->lock);
2139 ret = amdgpu_bo_reserve(gws_bo, false);
2140 if (unlikely(ret)) {
2141 pr_err("Reserve gws bo failed %d\n", ret);
2142 goto bo_reservation_failure;
2143 }
2144
2145 ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
2146 if (ret) {
2147 pr_err("GWS BO validate failed %d\n", ret);
2148 goto bo_validation_failure;
2149 }
2150 /* GWS resource is shared b/t amdgpu and amdkfd
2151 * Add process eviction fence to bo so they can
2152 * evict each other.
2153 */
2154 amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
2155 amdgpu_bo_unreserve(gws_bo);
2156 mutex_unlock(&(*mem)->process_info->lock);
2157
2158 return ret;
2159
2160bo_validation_failure:
2161 amdgpu_bo_unreserve(gws_bo);
2162bo_reservation_failure:
2163 mutex_unlock(&(*mem)->process_info->lock);
2164 amdgpu_sync_free(&(*mem)->sync);
2165 remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
2166 amdgpu_bo_unref(&gws_bo);
2167 mutex_destroy(&(*mem)->lock);
2168 kfree(*mem);
2169 *mem = NULL;
2170 return ret;
2171}
2172
2173int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
2174{
2175 int ret;
2176 struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
2177 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
2178 struct amdgpu_bo *gws_bo = kgd_mem->bo;
2179
2180 /* Remove BO from process's validate list so restore worker won't touch
2181 * it anymore
2182 */
2183 remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
2184
2185 ret = amdgpu_bo_reserve(gws_bo, false);
2186 if (unlikely(ret)) {
2187 pr_err("Reserve gws bo failed %d\n", ret);
2188 //TODO add BO back to validate_list?
2189 return ret;
2190 }
2191 amdgpu_amdkfd_remove_eviction_fence(gws_bo,
2192 process_info->eviction_fence);
2193 amdgpu_bo_unreserve(gws_bo);
2194 amdgpu_sync_free(&kgd_mem->sync);
2195 amdgpu_bo_unref(&gws_bo);
2196 mutex_destroy(&kgd_mem->lock);
2197 kfree(mem);
2198 return 0;
2199}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8e1150..d497467b7fc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
81 return -ENOMEM; 81 return -ENOMEM;
82 82
83 kref_init(&list->refcount); 83 kref_init(&list->refcount);
84 list->gds_obj = adev->gds.gds_gfx_bo; 84 list->gds_obj = NULL;
85 list->gws_obj = adev->gds.gws_gfx_bo; 85 list->gws_obj = NULL;
86 list->oa_obj = adev->gds.oa_gfx_bo; 86 list->oa_obj = NULL;
87 87
88 array = amdgpu_bo_list_array_entry(list, 0); 88 array = amdgpu_bo_list_array_entry(list, 0);
89 memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); 89 memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
36 struct amdgpu_bo_va *bo_va; 36 struct amdgpu_bo_va *bo_va;
37 uint32_t priority; 37 uint32_t priority;
38 struct page **user_pages; 38 struct page **user_pages;
39 int user_invalidated; 39 bool user_invalidated;
40}; 40};
41 41
42struct amdgpu_bo_list { 42struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2f6239b6be6f..d72cc583ebd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
52 p->uf_entry.tv.bo = &bo->tbo; 52 p->uf_entry.tv.bo = &bo->tbo;
53 /* One for TTM and one for the CS job */ 53 /* One for TTM and one for the CS job */
54 p->uf_entry.tv.num_shared = 2; 54 p->uf_entry.tv.num_shared = 2;
55 p->uf_entry.user_pages = NULL;
56 55
57 drm_gem_object_put_unlocked(gobj); 56 drm_gem_object_put_unlocked(gobj);
58 57
@@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
542 if (usermm && usermm != current->mm) 541 if (usermm && usermm != current->mm)
543 return -EPERM; 542 return -EPERM;
544 543
545 /* Check if we have user pages and nobody bound the BO already */ 544 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
546 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && 545 lobj->user_invalidated && lobj->user_pages) {
547 lobj->user_pages) {
548 amdgpu_bo_placement_from_domain(bo, 546 amdgpu_bo_placement_from_domain(bo,
549 AMDGPU_GEM_DOMAIN_CPU); 547 AMDGPU_GEM_DOMAIN_CPU);
550 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 548 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
551 if (r) 549 if (r)
552 return r; 550 return r;
551
553 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, 552 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
554 lobj->user_pages); 553 lobj->user_pages);
555 binding_userptr = true; 554 binding_userptr = true;
@@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
580 struct amdgpu_bo *gds; 579 struct amdgpu_bo *gds;
581 struct amdgpu_bo *gws; 580 struct amdgpu_bo *gws;
582 struct amdgpu_bo *oa; 581 struct amdgpu_bo *oa;
583 unsigned tries = 10;
584 int r; 582 int r;
585 583
586 INIT_LIST_HEAD(&p->validated); 584 INIT_LIST_HEAD(&p->validated);
@@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
616 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) 614 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
617 list_add(&p->uf_entry.tv.head, &p->validated); 615 list_add(&p->uf_entry.tv.head, &p->validated);
618 616
619 while (1) { 617 /* Get userptr backing pages. If pages are updated after registered
620 struct list_head need_pages; 618 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
621 619 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
622 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 620 */
623 &duplicates); 621 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
624 if (unlikely(r != 0)) { 622 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
625 if (r != -ERESTARTSYS) 623 bool userpage_invalidated = false;
626 DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); 624 int i;
627 goto error_free_pages; 625
628 } 626 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
629 627 sizeof(struct page *),
630 INIT_LIST_HEAD(&need_pages); 628 GFP_KERNEL | __GFP_ZERO);
631 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 629 if (!e->user_pages) {
632 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 630 DRM_ERROR("calloc failure\n");
633 631 return -ENOMEM;
634 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
635 &e->user_invalidated) && e->user_pages) {
636
637 /* We acquired a page array, but somebody
638 * invalidated it. Free it and try again
639 */
640 release_pages(e->user_pages,
641 bo->tbo.ttm->num_pages);
642 kvfree(e->user_pages);
643 e->user_pages = NULL;
644 }
645
646 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
647 !e->user_pages) {
648 list_del(&e->tv.head);
649 list_add(&e->tv.head, &need_pages);
650
651 amdgpu_bo_unreserve(bo);
652 }
653 } 632 }
654 633
655 if (list_empty(&need_pages)) 634 r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
656 break; 635 if (r) {
657 636 kvfree(e->user_pages);
658 /* Unreserve everything again. */ 637 e->user_pages = NULL;
659 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 638 return r;
660
661 /* We tried too many times, just abort */
662 if (!--tries) {
663 r = -EDEADLK;
664 DRM_ERROR("deadlock in %s\n", __func__);
665 goto error_free_pages;
666 } 639 }
667 640
668 /* Fill the page arrays for all userptrs. */ 641 for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
669 list_for_each_entry(e, &need_pages, tv.head) { 642 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
670 struct ttm_tt *ttm = e->tv.bo->ttm; 643 userpage_invalidated = true;
671 644 break;
672 e->user_pages = kvmalloc_array(ttm->num_pages,
673 sizeof(struct page*),
674 GFP_KERNEL | __GFP_ZERO);
675 if (!e->user_pages) {
676 r = -ENOMEM;
677 DRM_ERROR("calloc failure in %s\n", __func__);
678 goto error_free_pages;
679 }
680
681 r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
682 if (r) {
683 DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
684 kvfree(e->user_pages);
685 e->user_pages = NULL;
686 goto error_free_pages;
687 } 645 }
688 } 646 }
647 e->user_invalidated = userpage_invalidated;
648 }
689 649
690 /* And try again. */ 650 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
691 list_splice(&need_pages, &p->validated); 651 &duplicates);
652 if (unlikely(r != 0)) {
653 if (r != -ERESTARTSYS)
654 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
655 goto out;
692 } 656 }
693 657
694 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, 658 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
757error_validate: 721error_validate:
758 if (r) 722 if (r)
759 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 723 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
760 724out:
761error_free_pages:
762
763 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
764 if (!e->user_pages)
765 continue;
766
767 release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
768 kvfree(e->user_pages);
769 }
770
771 return r; 725 return r;
772} 726}
773 727
@@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
1054 j++; 1008 j++;
1055 } 1009 }
1056 1010
1057 /* UVD & VCE fw doesn't support user fences */ 1011 /* MM engine doesn't support user fences */
1058 ring = to_amdgpu_ring(parser->entity->rq->sched); 1012 ring = to_amdgpu_ring(parser->entity->rq->sched);
1059 if (parser->job->uf_addr && ( 1013 if (parser->job->uf_addr && ring->funcs->no_user_fence)
1060 ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
1061 ring->funcs->type == AMDGPU_RING_TYPE_VCE))
1062 return -EINVAL; 1014 return -EINVAL;
1063 1015
1064 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); 1016 return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1328 struct amdgpu_bo_list_entry *e; 1280 struct amdgpu_bo_list_entry *e;
1329 struct amdgpu_job *job; 1281 struct amdgpu_job *job;
1330 uint64_t seq; 1282 uint64_t seq;
1331
1332 int r; 1283 int r;
1333 1284
1334 job = p->job; 1285 job = p->job;
@@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1338 if (r) 1289 if (r)
1339 goto error_unlock; 1290 goto error_unlock;
1340 1291
1341 /* No memory allocation is allowed while holding the mn lock */ 1292 /* No memory allocation is allowed while holding the mn lock.
1293 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
1294 * to BOs.
1295 */
1342 amdgpu_mn_lock(p->mn); 1296 amdgpu_mn_lock(p->mn);
1297
1298 /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1299 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1300 */
1343 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { 1301 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1344 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); 1302 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1345 1303
1346 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { 1304 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1347 r = -ERESTARTSYS; 1305 }
1348 goto error_abort; 1306 if (r) {
1349 } 1307 r = -EAGAIN;
1308 goto error_abort;
1350 } 1309 }
1351 1310
1352 job->owner = p->filp; 1311 job->owner = p->filp;
@@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1442 1401
1443out: 1402out:
1444 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1403 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1404
1445 return r; 1405 return r;
1446} 1406}
1447 1407
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9f282e971197..0ffa6733f2b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {
97 "LAST", 97 "LAST",
98}; 98};
99 99
100/**
101 * DOC: pcie_replay_count
102 *
103 * The amdgpu driver provides a sysfs API for reporting the total number
104 * of PCIe replays (NAKs)
105 * The file pcie_replay_count is used for this and returns the total
106 * number of replays as a sum of the NAKs generated and NAKs received
107 */
108
109static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
110 struct device_attribute *attr, char *buf)
111{
112 struct drm_device *ddev = dev_get_drvdata(dev);
113 struct amdgpu_device *adev = ddev->dev_private;
114 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
115
116 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
117}
118
119static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
120 amdgpu_device_get_pcie_replay_count, NULL);
121
100static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 122static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
101 123
102/** 124/**
@@ -910,8 +932,10 @@ def_value:
910 * Validates certain module parameters and updates 932 * Validates certain module parameters and updates
911 * the associated values used by the driver (all asics). 933 * the associated values used by the driver (all asics).
912 */ 934 */
913static void amdgpu_device_check_arguments(struct amdgpu_device *adev) 935static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
914{ 936{
937 int ret = 0;
938
915 if (amdgpu_sched_jobs < 4) { 939 if (amdgpu_sched_jobs < 4) {
916 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 940 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
917 amdgpu_sched_jobs); 941 amdgpu_sched_jobs);
@@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
956 amdgpu_vram_page_split = 1024; 980 amdgpu_vram_page_split = 1024;
957 } 981 }
958 982
959 if (amdgpu_lockup_timeout == 0) { 983 ret = amdgpu_device_get_job_timeout_settings(adev);
960 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 984 if (ret) {
961 amdgpu_lockup_timeout = 10000; 985 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
986 return ret;
962 } 987 }
963 988
964 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 989 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
990
991 return ret;
965} 992}
966 993
967/** 994/**
@@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1505 r = amdgpu_virt_request_full_gpu(adev, true); 1532 r = amdgpu_virt_request_full_gpu(adev, true);
1506 if (r) 1533 if (r)
1507 return -EAGAIN; 1534 return -EAGAIN;
1535
1536 /* query the reg access mode at the very beginning */
1537 amdgpu_virt_init_reg_access_mode(adev);
1508 } 1538 }
1509 1539
1510 adev->pm.pp_feature = amdgpu_pp_feature_mask; 1540 adev->pm.pp_feature = amdgpu_pp_feature_mask;
1511 if (amdgpu_sriov_vf(adev)) 1541 if (amdgpu_sriov_vf(adev))
1512 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1542 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1513 1543
1544 /* Read BIOS */
1545 if (!amdgpu_get_bios(adev))
1546 return -EINVAL;
1547
1548 r = amdgpu_atombios_init(adev);
1549 if (r) {
1550 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1551 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1552 return r;
1553 }
1554
1514 for (i = 0; i < adev->num_ip_blocks; i++) { 1555 for (i = 0; i < adev->num_ip_blocks; i++) {
1515 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 1556 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
1516 DRM_ERROR("disabled ip block: %d <%s>\n", 1557 DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1550 if (adev->ip_blocks[i].status.hw) 1591 if (adev->ip_blocks[i].status.hw)
1551 continue; 1592 continue;
1552 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1593 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1594 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
1553 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 1595 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1554 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 1596 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1555 if (r) { 1597 if (r) {
@@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2473 mutex_init(&adev->lock_reset); 2515 mutex_init(&adev->lock_reset);
2474 mutex_init(&adev->virt.dpm_mutex); 2516 mutex_init(&adev->virt.dpm_mutex);
2475 2517
2476 amdgpu_device_check_arguments(adev); 2518 r = amdgpu_device_check_arguments(adev);
2519 if (r)
2520 return r;
2477 2521
2478 spin_lock_init(&adev->mmio_idx_lock); 2522 spin_lock_init(&adev->mmio_idx_lock);
2479 spin_lock_init(&adev->smc_idx_lock); 2523 spin_lock_init(&adev->smc_idx_lock);
@@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2558 goto fence_driver_init; 2602 goto fence_driver_init;
2559 } 2603 }
2560 2604
2561 /* Read BIOS */
2562 if (!amdgpu_get_bios(adev)) {
2563 r = -EINVAL;
2564 goto failed;
2565 }
2566
2567 r = amdgpu_atombios_init(adev);
2568 if (r) {
2569 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2570 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2571 goto failed;
2572 }
2573
2574 /* detect if we are with an SRIOV vbios */ 2605 /* detect if we are with an SRIOV vbios */
2575 amdgpu_device_detect_sriov_bios(adev); 2606 amdgpu_device_detect_sriov_bios(adev);
2576 2607
@@ -2672,6 +2703,10 @@ fence_driver_init:
2672 if (r) 2703 if (r)
2673 DRM_ERROR("registering pm debugfs failed (%d).\n", r); 2704 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2674 2705
2706 r = amdgpu_ucode_sysfs_init(adev);
2707 if (r)
2708 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2709
2675 r = amdgpu_debugfs_gem_init(adev); 2710 r = amdgpu_debugfs_gem_init(adev);
2676 if (r) 2711 if (r)
2677 DRM_ERROR("registering gem debugfs failed (%d).\n", r); 2712 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2712,7 +2747,13 @@ fence_driver_init:
2712 } 2747 }
2713 2748
2714 /* must succeed. */ 2749 /* must succeed. */
2715 amdgpu_ras_post_init(adev); 2750 amdgpu_ras_resume(adev);
2751
2752 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2753 if (r) {
2754 dev_err(adev->dev, "Could not create pcie_replay_count");
2755 return r;
2756 }
2716 2757
2717 return 0; 2758 return 0;
2718 2759
@@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2777 adev->rmmio = NULL; 2818 adev->rmmio = NULL;
2778 amdgpu_device_doorbell_fini(adev); 2819 amdgpu_device_doorbell_fini(adev);
2779 amdgpu_debugfs_regs_cleanup(adev); 2820 amdgpu_debugfs_regs_cleanup(adev);
2821 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
2822 amdgpu_ucode_sysfs_fini(adev);
2780} 2823}
2781 2824
2782 2825
@@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2857 2900
2858 amdgpu_amdkfd_suspend(adev); 2901 amdgpu_amdkfd_suspend(adev);
2859 2902
2903 amdgpu_ras_suspend(adev);
2904
2860 r = amdgpu_device_ip_suspend_phase1(adev); 2905 r = amdgpu_device_ip_suspend_phase1(adev);
2861 2906
2862 /* evict vram memory */ 2907 /* evict vram memory */
@@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
2977 3022
2978 drm_kms_helper_poll_enable(dev); 3023 drm_kms_helper_poll_enable(dev);
2979 3024
3025 amdgpu_ras_resume(adev);
3026
2980 /* 3027 /*
2981 * Most of the connector probing functions try to acquire runtime pm 3028 * Most of the connector probing functions try to acquire runtime pm
2982 * refs to ensure that the GPU is powered on when connector polling is 3029 * refs to ensure that the GPU is powered on when connector polling is
@@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3455 if (vram_lost) 3502 if (vram_lost)
3456 amdgpu_device_fill_reset_magic(tmp_adev); 3503 amdgpu_device_fill_reset_magic(tmp_adev);
3457 3504
3505 r = amdgpu_device_ip_late_init(tmp_adev);
3506 if (r)
3507 goto out;
3508
3509 /* must succeed. */
3510 amdgpu_ras_resume(tmp_adev);
3511
3458 /* Update PSP FW topology after reset */ 3512 /* Update PSP FW topology after reset */
3459 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) 3513 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3460 r = amdgpu_xgmi_update_topology(hive, tmp_adev); 3514 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@@ -3695,43 +3749,6 @@ skip_hw_reset:
3695 return r; 3749 return r;
3696} 3750}
3697 3751
3698static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
3699 enum pci_bus_speed *speed,
3700 enum pcie_link_width *width)
3701{
3702 struct pci_dev *pdev = adev->pdev;
3703 enum pci_bus_speed cur_speed;
3704 enum pcie_link_width cur_width;
3705 u32 ret = 1;
3706
3707 *speed = PCI_SPEED_UNKNOWN;
3708 *width = PCIE_LNK_WIDTH_UNKNOWN;
3709
3710 while (pdev) {
3711 cur_speed = pcie_get_speed_cap(pdev);
3712 cur_width = pcie_get_width_cap(pdev);
3713 ret = pcie_bandwidth_available(adev->pdev, NULL,
3714 NULL, &cur_width);
3715 if (!ret)
3716 cur_width = PCIE_LNK_WIDTH_RESRV;
3717
3718 if (cur_speed != PCI_SPEED_UNKNOWN) {
3719 if (*speed == PCI_SPEED_UNKNOWN)
3720 *speed = cur_speed;
3721 else if (cur_speed < *speed)
3722 *speed = cur_speed;
3723 }
3724
3725 if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
3726 if (*width == PCIE_LNK_WIDTH_UNKNOWN)
3727 *width = cur_width;
3728 else if (cur_width < *width)
3729 *width = cur_width;
3730 }
3731 pdev = pci_upstream_bridge(pdev);
3732 }
3733}
3734
3735/** 3752/**
3736 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 3753 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3737 * 3754 *
@@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
3765 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 3782 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3766 return; 3783 return;
3767 3784
3768 amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, 3785 pcie_bandwidth_available(adev->pdev, NULL,
3769 &platform_link_width); 3786 &platform_speed_cap, &platform_link_width);
3770 3787
3771 if (adev->pm.pcie_gen_mask == 0) { 3788 if (adev->pm.pcie_gen_mask == 0) {
3772 /* asic caps */ 3789 /* asic caps */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b083b219b1a9..30e6ad8a90bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
631 amdgpu_dither_enum_list, sz); 631 amdgpu_dither_enum_list, sz);
632 632
633 if (amdgpu_device_has_dc_support(adev)) { 633 if (amdgpu_device_has_dc_support(adev)) {
634 adev->mode_info.max_bpc_property =
635 drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
636 if (!adev->mode_info.max_bpc_property)
637 return -ENOMEM;
638 adev->mode_info.abm_level_property = 634 adev->mode_info.abm_level_property =
639 drm_property_create_range(adev->ddev, 0, 635 drm_property_create_range(adev->ddev, 0,
640 "abm level", 0, 4); 636 "abm level", 0, 4);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index a38e0fb4a6fe..4711cf1b5bd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright 2012 Advanced Micro Devices, Inc. 2 * Copyright 2019 Advanced Micro Devices, Inc.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
103 * Returns: 103 * Returns:
104 * 0 on success or a negative error code on failure. 104 * 0 on success or a negative error code on failure.
105 */ 105 */
106int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) 106int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
107 struct vm_area_struct *vma)
107{ 108{
108 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 109 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
109 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 110 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -137,57 +138,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
137 return ret; 138 return ret;
138} 139}
139 140
140/**
141 * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
142 * implementation
143 * @dev: DRM device
144 * @attach: DMA-buf attachment
145 * @sg: Scatter/gather table
146 *
147 * Imports shared DMA buffer memory exported by another device.
148 *
149 * Returns:
150 * A new GEM BO of the given DRM device, representing the memory
151 * described by the given DMA-buf attachment and scatter/gather table.
152 */
153struct drm_gem_object *
154amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
155 struct dma_buf_attachment *attach,
156 struct sg_table *sg)
157{
158 struct reservation_object *resv = attach->dmabuf->resv;
159 struct amdgpu_device *adev = dev->dev_private;
160 struct amdgpu_bo *bo;
161 struct amdgpu_bo_param bp;
162 int ret;
163
164 memset(&bp, 0, sizeof(bp));
165 bp.size = attach->dmabuf->size;
166 bp.byte_align = PAGE_SIZE;
167 bp.domain = AMDGPU_GEM_DOMAIN_CPU;
168 bp.flags = 0;
169 bp.type = ttm_bo_type_sg;
170 bp.resv = resv;
171 ww_mutex_lock(&resv->lock, NULL);
172 ret = amdgpu_bo_create(adev, &bp, &bo);
173 if (ret)
174 goto error;
175
176 bo->tbo.sg = sg;
177 bo->tbo.ttm->sg = sg;
178 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
179 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
180 if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
181 bo->prime_shared_count = 1;
182
183 ww_mutex_unlock(&resv->lock);
184 return &bo->gem_base;
185
186error:
187 ww_mutex_unlock(&resv->lock);
188 return ERR_PTR(ret);
189}
190
191static int 141static int
192__reservation_object_make_exclusive(struct reservation_object *obj) 142__reservation_object_make_exclusive(struct reservation_object *obj)
193{ 143{
@@ -231,7 +181,7 @@ err_fences_put:
231} 181}
232 182
233/** 183/**
234 * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation 184 * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
235 * @dma_buf: Shared DMA buffer 185 * @dma_buf: Shared DMA buffer
236 * @attach: DMA-buf attachment 186 * @attach: DMA-buf attachment
237 * 187 *
@@ -242,8 +192,8 @@ err_fences_put:
242 * Returns: 192 * Returns:
243 * 0 on success or a negative error code on failure. 193 * 0 on success or a negative error code on failure.
244 */ 194 */
245static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, 195static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
246 struct dma_buf_attachment *attach) 196 struct dma_buf_attachment *attach)
247{ 197{
248 struct drm_gem_object *obj = dma_buf->priv; 198 struct drm_gem_object *obj = dma_buf->priv;
249 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 199 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -291,15 +241,15 @@ error_detach:
291} 241}
292 242
293/** 243/**
294 * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation 244 * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
295 * @dma_buf: Shared DMA buffer 245 * @dma_buf: Shared DMA buffer
296 * @attach: DMA-buf attachment 246 * @attach: DMA-buf attachment
297 * 247 *
298 * This is called when a shared DMA buffer no longer needs to be accessible by 248 * This is called when a shared DMA buffer no longer needs to be accessible by
299 * another device. For now, simply unpins the buffer from GTT. 249 * another device. For now, simply unpins the buffer from GTT.
300 */ 250 */
301static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, 251static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
302 struct dma_buf_attachment *attach) 252 struct dma_buf_attachment *attach)
303{ 253{
304 struct drm_gem_object *obj = dma_buf->priv; 254 struct drm_gem_object *obj = dma_buf->priv;
305 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 255 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -334,7 +284,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
334} 284}
335 285
336/** 286/**
337 * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation 287 * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
338 * @dma_buf: Shared DMA buffer 288 * @dma_buf: Shared DMA buffer
339 * @direction: Direction of DMA transfer 289 * @direction: Direction of DMA transfer
340 * 290 *
@@ -345,8 +295,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
345 * Returns: 295 * Returns:
346 * 0 on success or a negative error code on failure. 296 * 0 on success or a negative error code on failure.
347 */ 297 */
348static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, 298static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
349 enum dma_data_direction direction) 299 enum dma_data_direction direction)
350{ 300{
351 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); 301 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
352 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 302 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -374,12 +324,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
374} 324}
375 325
376const struct dma_buf_ops amdgpu_dmabuf_ops = { 326const struct dma_buf_ops amdgpu_dmabuf_ops = {
377 .attach = amdgpu_gem_map_attach, 327 .attach = amdgpu_dma_buf_map_attach,
378 .detach = amdgpu_gem_map_detach, 328 .detach = amdgpu_dma_buf_map_detach,
379 .map_dma_buf = drm_gem_map_dma_buf, 329 .map_dma_buf = drm_gem_map_dma_buf,
380 .unmap_dma_buf = drm_gem_unmap_dma_buf, 330 .unmap_dma_buf = drm_gem_unmap_dma_buf,
381 .release = drm_gem_dmabuf_release, 331 .release = drm_gem_dmabuf_release,
382 .begin_cpu_access = amdgpu_gem_begin_cpu_access, 332 .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
383 .mmap = drm_gem_dmabuf_mmap, 333 .mmap = drm_gem_dmabuf_mmap,
384 .vmap = drm_gem_dmabuf_vmap, 334 .vmap = drm_gem_dmabuf_vmap,
385 .vunmap = drm_gem_dmabuf_vunmap, 335 .vunmap = drm_gem_dmabuf_vunmap,
@@ -418,6 +368,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
418} 368}
419 369
420/** 370/**
371 * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
372 * implementation
373 * @dev: DRM device
374 * @attach: DMA-buf attachment
375 * @sg: Scatter/gather table
376 *
377 * Imports shared DMA buffer memory exported by another device.
378 *
379 * Returns:
380 * A new GEM BO of the given DRM device, representing the memory
381 * described by the given DMA-buf attachment and scatter/gather table.
382 */
383struct drm_gem_object *
384amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
385 struct dma_buf_attachment *attach,
386 struct sg_table *sg)
387{
388 struct reservation_object *resv = attach->dmabuf->resv;
389 struct amdgpu_device *adev = dev->dev_private;
390 struct amdgpu_bo *bo;
391 struct amdgpu_bo_param bp;
392 int ret;
393
394 memset(&bp, 0, sizeof(bp));
395 bp.size = attach->dmabuf->size;
396 bp.byte_align = PAGE_SIZE;
397 bp.domain = AMDGPU_GEM_DOMAIN_CPU;
398 bp.flags = 0;
399 bp.type = ttm_bo_type_sg;
400 bp.resv = resv;
401 ww_mutex_lock(&resv->lock, NULL);
402 ret = amdgpu_bo_create(adev, &bp, &bo);
403 if (ret)
404 goto error;
405
406 bo->tbo.sg = sg;
407 bo->tbo.ttm->sg = sg;
408 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
409 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
410 if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
411 bo->prime_shared_count = 1;
412
413 ww_mutex_unlock(&resv->lock);
414 return &bo->gem_base;
415
416error:
417 ww_mutex_unlock(&resv->lock);
418 return ERR_PTR(ret);
419}
420
421/**
421 * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation 422 * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
422 * @dev: DRM device 423 * @dev: DRM device
423 * @dma_buf: Shared DMA buffer 424 * @dma_buf: Shared DMA buffer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000000..c7056cbe8685
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,46 @@
1/*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#ifndef __AMDGPU_DMA_BUF_H__
24#define __AMDGPU_DMA_BUF_H__
25
26#include <drm/drm_gem.h>
27
28struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
29struct drm_gem_object *
30amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
31 struct dma_buf_attachment *attach,
32 struct sg_table *sg);
33struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
34 struct drm_gem_object *gobj,
35 int flags);
36struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
37 struct dma_buf *dma_buf);
38struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
39void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
40void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
41int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
42 struct vm_area_struct *vma);
43
44extern const struct dma_buf_ops amdgpu_dmabuf_ops;
45
46#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..521dbd0d9af8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
75 int min_temp; 75 int min_temp;
76 /* high temperature threshold */ 76 /* high temperature threshold */
77 int max_temp; 77 int max_temp;
78 /* edge max emergency(shutdown) temp */
79 int max_edge_emergency_temp;
80 /* hotspot low temperature threshold */
81 int min_hotspot_temp;
82 /* hotspot high temperature critical threshold */
83 int max_hotspot_crit_temp;
84 /* hotspot max emergency(shutdown) temp */
85 int max_hotspot_emergency_temp;
86 /* memory low temperature threshold */
87 int min_mem_temp;
88 /* memory high temperature critical threshold */
89 int max_mem_crit_temp;
90 /* memory max emergency(shutdown) temp */
91 int max_mem_emergency_temp;
78 /* was last interrupt low to high or high to low */ 92 /* was last interrupt low to high or high to low */
79 bool high_to_low; 93 bool high_to_low;
80 /* interrupt source */ 94 /* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1e2cc9d68a05..1f38d6fc1fe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -36,7 +36,7 @@
36 36
37#include "amdgpu.h" 37#include "amdgpu.h"
38#include "amdgpu_irq.h" 38#include "amdgpu_irq.h"
39#include "amdgpu_gem.h" 39#include "amdgpu_dma_buf.h"
40 40
41#include "amdgpu_amdkfd.h" 41#include "amdgpu_amdkfd.h"
42 42
@@ -81,6 +81,8 @@
81#define KMS_DRIVER_MINOR 32 81#define KMS_DRIVER_MINOR 32
82#define KMS_DRIVER_PATCHLEVEL 0 82#define KMS_DRIVER_PATCHLEVEL 0
83 83
84#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
85
84int amdgpu_vram_limit = 0; 86int amdgpu_vram_limit = 0;
85int amdgpu_vis_vram_limit = 0; 87int amdgpu_vis_vram_limit = 0;
86int amdgpu_gart_size = -1; /* auto */ 88int amdgpu_gart_size = -1; /* auto */
@@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0;
93int amdgpu_hw_i2c = 0; 95int amdgpu_hw_i2c = 0;
94int amdgpu_pcie_gen2 = -1; 96int amdgpu_pcie_gen2 = -1;
95int amdgpu_msi = -1; 97int amdgpu_msi = -1;
96int amdgpu_lockup_timeout = 10000; 98char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
97int amdgpu_dpm = -1; 99int amdgpu_dpm = -1;
98int amdgpu_fw_load_type = -1; 100int amdgpu_fw_load_type = -1;
99int amdgpu_aspm = -1; 101int amdgpu_aspm = -1;
@@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
227module_param_named(msi, amdgpu_msi, int, 0444); 229module_param_named(msi, amdgpu_msi, int, 0444);
228 230
229/** 231/**
230 * DOC: lockup_timeout (int) 232 * DOC: lockup_timeout (string)
231 * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. 233 * Set GPU scheduler timeout value in ms.
232 * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. 234 *
233 */ 235 * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
234MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); 236 * multiple values specified. 0 and negative values are invalidated. They will be adjusted
235module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); 237 * to default timeout.
238 * - With one value specified, the setting will apply to all non-compute jobs.
239 * - With multiple values specified, the first one will be for GFX. The second one is for Compute.
240 * And the third and fourth ones are for SDMA and Video.
241 * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
242 * jobs is 10000. And there is no timeout enforced on compute jobs.
243 */
244MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
245 "format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
246module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
236 247
237/** 248/**
238 * DOC: dpm (int) 249 * DOC: dpm (int)
@@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry,
655int halt_if_hws_hang; 666int halt_if_hws_hang;
656module_param(halt_if_hws_hang, int, 0644); 667module_param(halt_if_hws_hang, int, 0644);
657MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); 668MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
669
670/**
671 * DOC: hws_gws_support(bool)
672 * Whether HWS support gws barriers. Default value: false (not supported)
673 * This will be replaced with a MEC firmware version check once firmware
674 * is ready
675 */
676bool hws_gws_support;
677module_param(hws_gws_support, bool, 0444);
678MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
658#endif 679#endif
659 680
660/** 681/**
@@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
1216 return 0; 1237 return 0;
1217} 1238}
1218 1239
1240int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
1241{
1242 char *input = amdgpu_lockup_timeout;
1243 char *timeout_setting = NULL;
1244 int index = 0;
1245 long timeout;
1246 int ret = 0;
1247
1248 /*
1249 * By default timeout for non compute jobs is 10000.
1250 * And there is no timeout enforced on compute jobs.
1251 */
1252 adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
1253 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
1254
1255 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
1256 while ((timeout_setting = strsep(&input, ",")) &&
1257 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
1258 ret = kstrtol(timeout_setting, 0, &timeout);
1259 if (ret)
1260 return ret;
1261
1262 /* Invalidate 0 and negative values */
1263 if (timeout <= 0) {
1264 index++;
1265 continue;
1266 }
1267
1268 switch (index++) {
1269 case 0:
1270 adev->gfx_timeout = timeout;
1271 break;
1272 case 1:
1273 adev->compute_timeout = timeout;
1274 break;
1275 case 2:
1276 adev->sdma_timeout = timeout;
1277 break;
1278 case 3:
1279 adev->video_timeout = timeout;
1280 break;
1281 default:
1282 break;
1283 }
1284 }
1285 /*
1286 * There is only one value specified and
1287 * it should apply to all non-compute jobs.
1288 */
1289 if (index == 1)
1290 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
1291 }
1292
1293 return ret;
1294}
1295
1219static bool 1296static bool
1220amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, 1297amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
1221 bool in_vblank_irq, int *vpos, int *hpos, 1298 bool in_vblank_irq, int *vpos, int *hpos,
@@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = {
1230 .driver_features = 1307 .driver_features =
1231 DRIVER_USE_AGP | DRIVER_ATOMIC | 1308 DRIVER_USE_AGP | DRIVER_ATOMIC |
1232 DRIVER_GEM | 1309 DRIVER_GEM |
1233 DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, 1310 DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
1311 DRIVER_SYNCOBJ_TIMELINE,
1234 .load = amdgpu_driver_load_kms, 1312 .load = amdgpu_driver_load_kms,
1235 .open = amdgpu_driver_open_kms, 1313 .open = amdgpu_driver_open_kms,
1236 .postclose = amdgpu_driver_postclose_kms, 1314 .postclose = amdgpu_driver_postclose_kms,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 4dee2326b29c..3a483f7e89c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
427int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, 427int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
428 unsigned num_hw_submission) 428 unsigned num_hw_submission)
429{ 429{
430 struct amdgpu_device *adev = ring->adev;
430 long timeout; 431 long timeout;
431 int r; 432 int r;
432 433
434 if (!adev)
435 return -EINVAL;
436
433 /* Check that num_hw_submission is a power of two */ 437 /* Check that num_hw_submission is a power of two */
434 if ((num_hw_submission & (num_hw_submission - 1)) != 0) 438 if ((num_hw_submission & (num_hw_submission - 1)) != 0)
435 return -EINVAL; 439 return -EINVAL;
@@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
451 455
452 /* No need to setup the GPU scheduler for KIQ ring */ 456 /* No need to setup the GPU scheduler for KIQ ring */
453 if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { 457 if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
454 /* for non-sriov case, no timeout enforce on compute ring */ 458 switch (ring->funcs->type) {
455 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 459 case AMDGPU_RING_TYPE_GFX:
456 && !amdgpu_sriov_vf(ring->adev)) 460 timeout = adev->gfx_timeout;
457 timeout = MAX_SCHEDULE_TIMEOUT; 461 break;
458 else 462 case AMDGPU_RING_TYPE_COMPUTE:
459 timeout = msecs_to_jiffies(amdgpu_lockup_timeout); 463 /*
464 * For non-sriov case, no timeout enforce
465 * on compute ring by default. Unless user
466 * specifies a timeout for compute ring.
467 *
468 * For sriov case, always use the timeout
469 * as gfx ring
470 */
471 if (!amdgpu_sriov_vf(ring->adev))
472 timeout = adev->compute_timeout;
473 else
474 timeout = adev->gfx_timeout;
475 break;
476 case AMDGPU_RING_TYPE_SDMA:
477 timeout = adev->sdma_timeout;
478 break;
479 default:
480 timeout = adev->video_timeout;
481 break;
482 }
460 483
461 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, 484 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
462 num_hw_submission, amdgpu_job_hang_limit, 485 num_hw_submission, amdgpu_job_hang_limit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index f89f5734d985..dad2186f4ed5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -27,26 +27,11 @@
27struct amdgpu_ring; 27struct amdgpu_ring;
28struct amdgpu_bo; 28struct amdgpu_bo;
29 29
30struct amdgpu_gds_asic_info {
31 uint32_t total_size;
32 uint32_t gfx_partition_size;
33 uint32_t cs_partition_size;
34};
35
36struct amdgpu_gds { 30struct amdgpu_gds {
37 struct amdgpu_gds_asic_info mem; 31 uint32_t gds_size;
38 struct amdgpu_gds_asic_info gws; 32 uint32_t gws_size;
39 struct amdgpu_gds_asic_info oa; 33 uint32_t oa_size;
40 uint32_t gds_compute_max_wave_id; 34 uint32_t gds_compute_max_wave_id;
41
42 /* At present, GDS, GWS and OA resources for gfx (graphics)
43 * is always pre-allocated and available for graphics operation.
44 * Such resource is shared between all gfx clients.
45 * TODO: move this operation to user space
46 * */
47 struct amdgpu_bo* gds_gfx_bo;
48 struct amdgpu_bo* gws_gfx_bo;
49 struct amdgpu_bo* oa_gfx_bo;
50}; 35};
51 36
52struct amdgpu_gds_reg_offset { 37struct amdgpu_gds_reg_offset {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d4fcf5475464..7b840367004c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
330 330
331 r = amdgpu_bo_reserve(bo, true); 331 r = amdgpu_bo_reserve(bo, true);
332 if (r) 332 if (r)
333 goto free_pages; 333 goto user_pages_done;
334 334
335 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); 335 amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
336 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 336 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
337 amdgpu_bo_unreserve(bo); 337 amdgpu_bo_unreserve(bo);
338 if (r) 338 if (r)
339 goto free_pages; 339 goto user_pages_done;
340 } 340 }
341 341
342 r = drm_gem_handle_create(filp, gobj, &handle); 342 r = drm_gem_handle_create(filp, gobj, &handle);
343 /* drop reference from allocate - handle holds it now */
344 drm_gem_object_put_unlocked(gobj);
345 if (r) 343 if (r)
346 return r; 344 goto user_pages_done;
347 345
348 args->handle = handle; 346 args->handle = handle;
349 return 0;
350 347
351free_pages: 348user_pages_done:
352 release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); 349 if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
350 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
353 351
354release_object: 352release_object:
355 drm_gem_object_put_unlocked(gobj); 353 drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f1ddfc50bcc7..b8ba6e27c61f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
39void amdgpu_gem_object_close(struct drm_gem_object *obj, 39void amdgpu_gem_object_close(struct drm_gem_object *obj,
40 struct drm_file *file_priv); 40 struct drm_file *file_priv);
41unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); 41unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
42struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
43struct drm_gem_object *
44amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
45 struct dma_buf_attachment *attach,
46 struct sg_table *sg);
47struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
48 struct drm_gem_object *gobj,
49 int flags);
50struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
51 struct dma_buf *dma_buf);
52struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
53void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
54void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
55int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
56
57extern const struct dma_buf_ops amdgpu_dmabuf_ops;
58 42
59/* 43/*
60 * GEM objects. 44 * GEM objects.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a17fb1af204..7ab1241bd9e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
51 51
52 if (amdgpu_device_should_recover_gpu(ring->adev)) 52 if (amdgpu_device_should_recover_gpu(ring->adev))
53 amdgpu_device_gpu_recover(ring->adev, job); 53 amdgpu_device_gpu_recover(ring->adev, job);
54 else
55 drm_sched_suspend_timeout(&ring->sched);
54} 56}
55 57
56int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 58int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b17d0545728e..edb675103bd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
590 struct drm_amdgpu_info_gds gds_info; 590 struct drm_amdgpu_info_gds gds_info;
591 591
592 memset(&gds_info, 0, sizeof(gds_info)); 592 memset(&gds_info, 0, sizeof(gds_info));
593 gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; 593 gds_info.compute_partition_size = adev->gds.gds_size;
594 gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; 594 gds_info.gds_total_size = adev->gds.gds_size;
595 gds_info.gds_total_size = adev->gds.mem.total_size; 595 gds_info.gws_per_compute_partition = adev->gds.gws_size;
596 gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; 596 gds_info.oa_per_compute_partition = adev->gds.oa_size;
597 gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
598 gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
599 gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
600 return copy_to_user(out, &gds_info, 597 return copy_to_user(out, &gds_info,
601 min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; 598 min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
602 } 599 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 58ed401c5996..41ccee49a224 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
45 45
46#include <linux/firmware.h> 46#include <linux/firmware.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/mmu_notifier.h> 48#include <linux/hmm.h>
49#include <linux/interval_tree.h> 49#include <linux/interval_tree.h>
50#include <drm/drmP.h> 50#include <drm/drmP.h>
51#include <drm/drm.h> 51#include <drm/drm.h>
@@ -58,14 +58,12 @@
58 * 58 *
59 * @adev: amdgpu device pointer 59 * @adev: amdgpu device pointer
60 * @mm: process address space 60 * @mm: process address space
61 * @mn: MMU notifier structure
62 * @type: type of MMU notifier 61 * @type: type of MMU notifier
63 * @work: destruction work item 62 * @work: destruction work item
64 * @node: hash table node to find structure by adev and mn 63 * @node: hash table node to find structure by adev and mn
65 * @lock: rw semaphore protecting the notifier nodes 64 * @lock: rw semaphore protecting the notifier nodes
66 * @objects: interval tree containing amdgpu_mn_nodes 65 * @objects: interval tree containing amdgpu_mn_nodes
67 * @read_lock: mutex for recursive locking of @lock 66 * @mirror: HMM mirror function support
68 * @recursion: depth of recursion
69 * 67 *
70 * Data for each amdgpu device and process address space. 68 * Data for each amdgpu device and process address space.
71 */ 69 */
@@ -73,7 +71,6 @@ struct amdgpu_mn {
73 /* constant after initialisation */ 71 /* constant after initialisation */
74 struct amdgpu_device *adev; 72 struct amdgpu_device *adev;
75 struct mm_struct *mm; 73 struct mm_struct *mm;
76 struct mmu_notifier mn;
77 enum amdgpu_mn_type type; 74 enum amdgpu_mn_type type;
78 75
79 /* only used on destruction */ 76 /* only used on destruction */
@@ -85,8 +82,9 @@ struct amdgpu_mn {
85 /* objects protected by lock */ 82 /* objects protected by lock */
86 struct rw_semaphore lock; 83 struct rw_semaphore lock;
87 struct rb_root_cached objects; 84 struct rb_root_cached objects;
88 struct mutex read_lock; 85
89 atomic_t recursion; 86 /* HMM mirror */
87 struct hmm_mirror mirror;
90}; 88};
91 89
92/** 90/**
@@ -103,7 +101,7 @@ struct amdgpu_mn_node {
103}; 101};
104 102
105/** 103/**
106 * amdgpu_mn_destroy - destroy the MMU notifier 104 * amdgpu_mn_destroy - destroy the HMM mirror
107 * 105 *
108 * @work: previously sheduled work item 106 * @work: previously sheduled work item
109 * 107 *
@@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
129 } 127 }
130 up_write(&amn->lock); 128 up_write(&amn->lock);
131 mutex_unlock(&adev->mn_lock); 129 mutex_unlock(&adev->mn_lock);
132 mmu_notifier_unregister_no_release(&amn->mn, amn->mm); 130
131 hmm_mirror_unregister(&amn->mirror);
133 kfree(amn); 132 kfree(amn);
134} 133}
135 134
136/** 135/**
137 * amdgpu_mn_release - callback to notify about mm destruction 136 * amdgpu_hmm_mirror_release - callback to notify about mm destruction
138 * 137 *
139 * @mn: our notifier 138 * @mirror: the HMM mirror (mm) this callback is about
140 * @mm: the mm this callback is about
141 * 139 *
142 * Shedule a work item to lazy destroy our notifier. 140 * Shedule a work item to lazy destroy HMM mirror.
143 */ 141 */
144static void amdgpu_mn_release(struct mmu_notifier *mn, 142static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
145 struct mm_struct *mm)
146{ 143{
147 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 144 struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
148 145
149 INIT_WORK(&amn->work, amdgpu_mn_destroy); 146 INIT_WORK(&amn->work, amdgpu_mn_destroy);
150 schedule_work(&amn->work); 147 schedule_work(&amn->work);
151} 148}
152 149
153
154/** 150/**
155 * amdgpu_mn_lock - take the write side lock for this notifier 151 * amdgpu_mn_lock - take the write side lock for this notifier
156 * 152 *
@@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
181static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) 177static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
182{ 178{
183 if (blockable) 179 if (blockable)
184 mutex_lock(&amn->read_lock); 180 down_read(&amn->lock);
185 else if (!mutex_trylock(&amn->read_lock)) 181 else if (!down_read_trylock(&amn->lock))
186 return -EAGAIN; 182 return -EAGAIN;
187 183
188 if (atomic_inc_return(&amn->recursion) == 1)
189 down_read_non_owner(&amn->lock);
190 mutex_unlock(&amn->read_lock);
191
192 return 0; 184 return 0;
193} 185}
194 186
@@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
199 */ 191 */
200static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) 192static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
201{ 193{
202 if (atomic_dec_return(&amn->recursion) == 0) 194 up_read(&amn->lock);
203 up_read_non_owner(&amn->lock);
204} 195}
205 196
206/** 197/**
@@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
229 true, false, MAX_SCHEDULE_TIMEOUT); 220 true, false, MAX_SCHEDULE_TIMEOUT);
230 if (r <= 0) 221 if (r <= 0)
231 DRM_ERROR("(%ld) failed to wait for user bo\n", r); 222 DRM_ERROR("(%ld) failed to wait for user bo\n", r);
232
233 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
234 } 223 }
235} 224}
236 225
237/** 226/**
238 * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change 227 * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
239 * 228 *
240 * @mn: our notifier 229 * @mirror: the hmm_mirror (mm) is about to update
241 * @range: mmu notifier context 230 * @update: the update start, end address
242 * 231 *
243 * Block for operations on BOs to finish and mark pages as accessed and 232 * Block for operations on BOs to finish and mark pages as accessed and
244 * potentially dirty. 233 * potentially dirty.
245 */ 234 */
246static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, 235static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
247 const struct mmu_notifier_range *range) 236 const struct hmm_update *update)
248{ 237{
249 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 238 struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
239 unsigned long start = update->start;
240 unsigned long end = update->end;
241 bool blockable = update->blockable;
250 struct interval_tree_node *it; 242 struct interval_tree_node *it;
251 unsigned long end;
252 243
253 /* notification is exclusive, but interval is inclusive */ 244 /* notification is exclusive, but interval is inclusive */
254 end = range->end - 1; 245 end -= 1;
255 246
256 /* TODO we should be able to split locking for interval tree and 247 /* TODO we should be able to split locking for interval tree and
257 * amdgpu_mn_invalidate_node 248 * amdgpu_mn_invalidate_node
258 */ 249 */
259 if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) 250 if (amdgpu_mn_read_lock(amn, blockable))
260 return -EAGAIN; 251 return -EAGAIN;
261 252
262 it = interval_tree_iter_first(&amn->objects, range->start, end); 253 it = interval_tree_iter_first(&amn->objects, start, end);
263 while (it) { 254 while (it) {
264 struct amdgpu_mn_node *node; 255 struct amdgpu_mn_node *node;
265 256
266 if (!mmu_notifier_range_blockable(range)) { 257 if (!blockable) {
267 amdgpu_mn_read_unlock(amn); 258 amdgpu_mn_read_unlock(amn);
268 return -EAGAIN; 259 return -EAGAIN;
269 } 260 }
270 261
271 node = container_of(it, struct amdgpu_mn_node, it); 262 node = container_of(it, struct amdgpu_mn_node, it);
272 it = interval_tree_iter_next(it, range->start, end); 263 it = interval_tree_iter_next(it, start, end);
273 264
274 amdgpu_mn_invalidate_node(node, range->start, end); 265 amdgpu_mn_invalidate_node(node, start, end);
275 } 266 }
276 267
268 amdgpu_mn_read_unlock(amn);
269
277 return 0; 270 return 0;
278} 271}
279 272
280/** 273/**
281 * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change 274 * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
282 * 275 *
283 * @mn: our notifier 276 * @mirror: the hmm_mirror (mm) is about to update
284 * @mm: the mm this callback is about 277 * @update: the update start, end address
285 * @start: start of updated range
286 * @end: end of updated range
287 * 278 *
288 * We temporarily evict all BOs between start and end. This 279 * We temporarily evict all BOs between start and end. This
289 * necessitates evicting all user-mode queues of the process. The BOs 280 * necessitates evicting all user-mode queues of the process. The BOs
290 * are restorted in amdgpu_mn_invalidate_range_end_hsa. 281 * are restorted in amdgpu_mn_invalidate_range_end_hsa.
291 */ 282 */
292static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, 283static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
293 const struct mmu_notifier_range *range) 284 const struct hmm_update *update)
294{ 285{
295 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); 286 struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
287 unsigned long start = update->start;
288 unsigned long end = update->end;
289 bool blockable = update->blockable;
296 struct interval_tree_node *it; 290 struct interval_tree_node *it;
297 unsigned long end;
298 291
299 /* notification is exclusive, but interval is inclusive */ 292 /* notification is exclusive, but interval is inclusive */
300 end = range->end - 1; 293 end -= 1;
301 294
302 if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) 295 if (amdgpu_mn_read_lock(amn, blockable))
303 return -EAGAIN; 296 return -EAGAIN;
304 297
305 it = interval_tree_iter_first(&amn->objects, range->start, end); 298 it = interval_tree_iter_first(&amn->objects, start, end);
306 while (it) { 299 while (it) {
307 struct amdgpu_mn_node *node; 300 struct amdgpu_mn_node *node;
308 struct amdgpu_bo *bo; 301 struct amdgpu_bo *bo;
309 302
310 if (!mmu_notifier_range_blockable(range)) { 303 if (!blockable) {
311 amdgpu_mn_read_unlock(amn); 304 amdgpu_mn_read_unlock(amn);
312 return -EAGAIN; 305 return -EAGAIN;
313 } 306 }
314 307
315 node = container_of(it, struct amdgpu_mn_node, it); 308 node = container_of(it, struct amdgpu_mn_node, it);
316 it = interval_tree_iter_next(it, range->start, end); 309 it = interval_tree_iter_next(it, start, end);
317 310
318 list_for_each_entry(bo, &node->bos, mn_list) { 311 list_for_each_entry(bo, &node->bos, mn_list) {
319 struct kgd_mem *mem = bo->kfd_bo; 312 struct kgd_mem *mem = bo->kfd_bo;
320 313
321 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, 314 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
322 range->start, 315 start, end))
323 end)) 316 amdgpu_amdkfd_evict_userptr(mem, amn->mm);
324 amdgpu_amdkfd_evict_userptr(mem, range->mm);
325 } 317 }
326 } 318 }
327 319
320 amdgpu_mn_read_unlock(amn);
321
328 return 0; 322 return 0;
329} 323}
330 324
331/** 325/* Low bits of any reasonable mm pointer will be unused due to struct
332 * amdgpu_mn_invalidate_range_end - callback to notify about mm change 326 * alignment. Use these bits to make a unique key from the mm pointer
333 * 327 * and notifier type.
334 * @mn: our notifier
335 * @mm: the mm this callback is about
336 * @start: start of updated range
337 * @end: end of updated range
338 *
339 * Release the lock again to allow new command submissions.
340 */ 328 */
341static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, 329#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
342 const struct mmu_notifier_range *range)
343{
344 struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
345
346 amdgpu_mn_read_unlock(amn);
347}
348 330
349static const struct mmu_notifier_ops amdgpu_mn_ops[] = { 331static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
350 [AMDGPU_MN_TYPE_GFX] = { 332 [AMDGPU_MN_TYPE_GFX] = {
351 .release = amdgpu_mn_release, 333 .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
352 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, 334 .release = amdgpu_hmm_mirror_release
353 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
354 }, 335 },
355 [AMDGPU_MN_TYPE_HSA] = { 336 [AMDGPU_MN_TYPE_HSA] = {
356 .release = amdgpu_mn_release, 337 .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
357 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, 338 .release = amdgpu_hmm_mirror_release
358 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
359 }, 339 },
360}; 340};
361 341
362/* Low bits of any reasonable mm pointer will be unused due to struct
363 * alignment. Use these bits to make a unique key from the mm pointer
364 * and notifier type.
365 */
366#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
367
368/** 342/**
369 * amdgpu_mn_get - create notifier context 343 * amdgpu_mn_get - create HMM mirror context
370 * 344 *
371 * @adev: amdgpu device pointer 345 * @adev: amdgpu device pointer
372 * @type: type of MMU notifier context 346 * @type: type of MMU notifier context
373 * 347 *
374 * Creates a notifier context for current->mm. 348 * Creates a HMM mirror context for current->mm.
375 */ 349 */
376struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, 350struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
377 enum amdgpu_mn_type type) 351 enum amdgpu_mn_type type)
@@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
401 amn->mm = mm; 375 amn->mm = mm;
402 init_rwsem(&amn->lock); 376 init_rwsem(&amn->lock);
403 amn->type = type; 377 amn->type = type;
404 amn->mn.ops = &amdgpu_mn_ops[type];
405 amn->objects = RB_ROOT_CACHED; 378 amn->objects = RB_ROOT_CACHED;
406 mutex_init(&amn->read_lock);
407 atomic_set(&amn->recursion, 0);
408 379
409 r = __mmu_notifier_register(&amn->mn, mm); 380 amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
381 r = hmm_mirror_register(&amn->mirror, mm);
410 if (r) 382 if (r)
411 goto free_amn; 383 goto free_amn;
412 384
@@ -432,7 +404,7 @@ free_amn:
432 * @bo: amdgpu buffer object 404 * @bo: amdgpu buffer object
433 * @addr: userptr addr we should monitor 405 * @addr: userptr addr we should monitor
434 * 406 *
435 * Registers an MMU notifier for the given BO at the specified address. 407 * Registers an HMM mirror for the given BO at the specified address.
436 * Returns 0 on success, -ERRNO if anything goes wrong. 408 * Returns 0 on success, -ERRNO if anything goes wrong.
437 */ 409 */
438int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) 410int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
488} 460}
489 461
490/** 462/**
491 * amdgpu_mn_unregister - unregister a BO for notifier updates 463 * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
492 * 464 *
493 * @bo: amdgpu buffer object 465 * @bo: amdgpu buffer object
494 * 466 *
495 * Remove any registration of MMU notifier updates from the buffer object. 467 * Remove any registration of HMM mirror updates from the buffer object.
496 */ 468 */
497void amdgpu_mn_unregister(struct amdgpu_bo *bo) 469void amdgpu_mn_unregister(struct amdgpu_bo *bo)
498{ 470{
@@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
528 mutex_unlock(&adev->mn_lock); 500 mutex_unlock(&adev->mn_lock);
529} 501}
530 502
503/* flags used by HMM internal, not related to CPU/GPU PTE flags */
504static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
505 (1 << 0), /* HMM_PFN_VALID */
506 (1 << 1), /* HMM_PFN_WRITE */
507 0 /* HMM_PFN_DEVICE_PRIVATE */
508};
509
510static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
511 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
512 0, /* HMM_PFN_NONE */
513 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
514};
515
516void amdgpu_hmm_init_range(struct hmm_range *range)
517{
518 if (range) {
519 range->flags = hmm_range_flags;
520 range->values = hmm_range_values;
521 range->pfn_shift = PAGE_SHIFT;
522 range->pfns = NULL;
523 INIT_LIST_HEAD(&range->list);
524 }
525}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..f5b67c63ed6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,22 +25,24 @@
25#define __AMDGPU_MN_H__ 25#define __AMDGPU_MN_H__
26 26
27/* 27/*
28 * MMU Notifier 28 * HMM mirror
29 */ 29 */
30struct amdgpu_mn; 30struct amdgpu_mn;
31struct hmm_range;
31 32
32enum amdgpu_mn_type { 33enum amdgpu_mn_type {
33 AMDGPU_MN_TYPE_GFX, 34 AMDGPU_MN_TYPE_GFX,
34 AMDGPU_MN_TYPE_HSA, 35 AMDGPU_MN_TYPE_HSA,
35}; 36};
36 37
37#if defined(CONFIG_MMU_NOTIFIER) 38#if defined(CONFIG_HMM_MIRROR)
38void amdgpu_mn_lock(struct amdgpu_mn *mn); 39void amdgpu_mn_lock(struct amdgpu_mn *mn);
39void amdgpu_mn_unlock(struct amdgpu_mn *mn); 40void amdgpu_mn_unlock(struct amdgpu_mn *mn);
40struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, 41struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
41 enum amdgpu_mn_type type); 42 enum amdgpu_mn_type type);
42int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); 43int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
43void amdgpu_mn_unregister(struct amdgpu_bo *bo); 44void amdgpu_mn_unregister(struct amdgpu_bo *bo);
45void amdgpu_hmm_init_range(struct hmm_range *range);
44#else 46#else
45static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} 47static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
46static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} 48static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
@@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
51} 53}
52static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) 54static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
53{ 55{
56 DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
57 "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
54 return -ENODEV; 58 return -ENODEV;
55} 59}
56static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} 60static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 2e9e3db778c6..eb9975f4decb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -331,8 +331,6 @@ struct amdgpu_mode_info {
331 struct drm_property *audio_property; 331 struct drm_property *audio_property;
332 /* FMT dithering */ 332 /* FMT dithering */
333 struct drm_property *dither_property; 333 struct drm_property *dither_property;
334 /* maximum number of bits per channel for monitor color */
335 struct drm_property *max_bpc_property;
336 /* Adaptive Backlight Modulation (power feature) */ 334 /* Adaptive Backlight Modulation (power feature) */
337 struct drm_property *abm_level_property; 335 struct drm_property *abm_level_property;
338 /* hardcoded DFP edid from BIOS */ 336 /* hardcoded DFP edid from BIOS */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 34471dbaa872..a73e1903d29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {
67 {0, NULL}, 67 {0, NULL},
68}; 68};
69 69
70static const struct hwmon_temp_label {
71 enum PP_HWMON_TEMP channel;
72 const char *label;
73} temp_label[] = {
74 {PP_TEMP_EDGE, "edge"},
75 {PP_TEMP_JUNCTION, "junction"},
76 {PP_TEMP_MEM, "mem"},
77};
78
70void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) 79void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
71{ 80{
72 if (adev->pm.dpm_enabled) { 81 if (adev->pm.dpm_enabled) {
@@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
758 767
759 pr_debug("featuremask = 0x%llx\n", featuremask); 768 pr_debug("featuremask = 0x%llx\n", featuremask);
760 769
761 if (adev->powerplay.pp_funcs->set_ppfeature_status) { 770 if (is_support_sw_smu(adev)) {
771 ret = smu_set_ppfeature_status(&adev->smu, featuremask);
772 if (ret)
773 return -EINVAL;
774 } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
762 ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); 775 ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
763 if (ret) 776 if (ret)
764 return -EINVAL; 777 return -EINVAL;
@@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
774 struct drm_device *ddev = dev_get_drvdata(dev); 787 struct drm_device *ddev = dev_get_drvdata(dev);
775 struct amdgpu_device *adev = ddev->dev_private; 788 struct amdgpu_device *adev = ddev->dev_private;
776 789
777 if (adev->powerplay.pp_funcs->get_ppfeature_status) 790 if (is_support_sw_smu(adev)) {
791 return smu_get_ppfeature_status(&adev->smu, buf);
792 } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
778 return amdgpu_dpm_get_ppfeature_status(adev, buf); 793 return amdgpu_dpm_get_ppfeature_status(adev, buf);
779 794
780 return snprintf(buf, PAGE_SIZE, "\n"); 795 return snprintf(buf, PAGE_SIZE, "\n");
@@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
1303} 1318}
1304 1319
1305/** 1320/**
1321 * DOC: mem_busy_percent
1322 *
1323 * The amdgpu driver provides a sysfs API for reading how busy the VRAM
1324 * is as a percentage. The file mem_busy_percent is used for this.
1325 * The SMU firmware computes a percentage of load based on the
1326 * aggregate activity level in the IP cores.
1327 */
1328static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
1329 struct device_attribute *attr,
1330 char *buf)
1331{
1332 struct drm_device *ddev = dev_get_drvdata(dev);
1333 struct amdgpu_device *adev = ddev->dev_private;
1334 int r, value, size = sizeof(value);
1335
1336 /* read the IP busy sensor */
1337 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
1338 (void *)&value, &size);
1339
1340 if (r)
1341 return r;
1342
1343 return snprintf(buf, PAGE_SIZE, "%d\n", value);
1344}
1345
1346/**
1306 * DOC: pcie_bw 1347 * DOC: pcie_bw
1307 * 1348 *
1308 * The amdgpu driver provides a sysfs API for estimating how much data 1349 * The amdgpu driver provides a sysfs API for estimating how much data
@@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
1327 count0, count1, pcie_get_mps(adev->pdev)); 1368 count0, count1, pcie_get_mps(adev->pdev));
1328} 1369}
1329 1370
1371/**
1372 * DOC: unique_id
1373 *
1374 * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
1375 * The file unique_id is used for this.
1376 * This will provide a Unique ID that will persist from machine to machine
1377 *
1378 * NOTE: This will only work for GFX9 and newer. This file will be absent
1379 * on unsupported ASICs (GFX8 and older)
1380 */
1381static ssize_t amdgpu_get_unique_id(struct device *dev,
1382 struct device_attribute *attr,
1383 char *buf)
1384{
1385 struct drm_device *ddev = dev_get_drvdata(dev);
1386 struct amdgpu_device *adev = ddev->dev_private;
1387
1388 if (adev->unique_id)
1389 return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
1390
1391 return 0;
1392}
1393
1330static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); 1394static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
1331static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, 1395static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
1332 amdgpu_get_dpm_forced_performance_level, 1396 amdgpu_get_dpm_forced_performance_level,
@@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
1371 amdgpu_set_pp_od_clk_voltage); 1435 amdgpu_set_pp_od_clk_voltage);
1372static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, 1436static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
1373 amdgpu_get_busy_percent, NULL); 1437 amdgpu_get_busy_percent, NULL);
1438static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
1439 amdgpu_get_memory_busy_percent, NULL);
1374static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); 1440static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
1375static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, 1441static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
1376 amdgpu_get_ppfeature_status, 1442 amdgpu_get_ppfeature_status,
1377 amdgpu_set_ppfeature_status); 1443 amdgpu_set_ppfeature_status);
1444static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
1378 1445
1379static ssize_t amdgpu_hwmon_show_temp(struct device *dev, 1446static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
1380 struct device_attribute *attr, 1447 struct device_attribute *attr,
@@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
1382{ 1449{
1383 struct amdgpu_device *adev = dev_get_drvdata(dev); 1450 struct amdgpu_device *adev = dev_get_drvdata(dev);
1384 struct drm_device *ddev = adev->ddev; 1451 struct drm_device *ddev = adev->ddev;
1452 int channel = to_sensor_dev_attr(attr)->index;
1385 int r, temp, size = sizeof(temp); 1453 int r, temp, size = sizeof(temp);
1386 1454
1387 /* Can't get temperature when the card is off */ 1455 /* Can't get temperature when the card is off */
@@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
1389 (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) 1457 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
1390 return -EINVAL; 1458 return -EINVAL;
1391 1459
1392 /* get the temperature */ 1460 if (channel >= PP_TEMP_MAX)
1393 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, 1461 return -EINVAL;
1394 (void *)&temp, &size); 1462
1395 if (r) 1463 switch (channel) {
1396 return r; 1464 case PP_TEMP_JUNCTION:
1465 /* get current junction temperature */
1466 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
1467 (void *)&temp, &size);
1468 if (r)
1469 return r;
1470 break;
1471 case PP_TEMP_EDGE:
1472 /* get current edge temperature */
1473 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
1474 (void *)&temp, &size);
1475 if (r)
1476 return r;
1477 break;
1478 case PP_TEMP_MEM:
1479 /* get current memory temperature */
1480 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
1481 (void *)&temp, &size);
1482 if (r)
1483 return r;
1484 break;
1485 }
1397 1486
1398 return snprintf(buf, PAGE_SIZE, "%d\n", temp); 1487 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
1399} 1488}
@@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
1414 return snprintf(buf, PAGE_SIZE, "%d\n", temp); 1503 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
1415} 1504}
1416 1505
1506static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
1507 struct device_attribute *attr,
1508 char *buf)
1509{
1510 struct amdgpu_device *adev = dev_get_drvdata(dev);
1511 int hyst = to_sensor_dev_attr(attr)->index;
1512 int temp;
1513
1514 if (hyst)
1515 temp = adev->pm.dpm.thermal.min_hotspot_temp;
1516 else
1517 temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
1518
1519 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
1520}
1521
1522static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
1523 struct device_attribute *attr,
1524 char *buf)
1525{
1526 struct amdgpu_device *adev = dev_get_drvdata(dev);
1527 int hyst = to_sensor_dev_attr(attr)->index;
1528 int temp;
1529
1530 if (hyst)
1531 temp = adev->pm.dpm.thermal.min_mem_temp;
1532 else
1533 temp = adev->pm.dpm.thermal.max_mem_crit_temp;
1534
1535 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
1536}
1537
1538static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
1539 struct device_attribute *attr,
1540 char *buf)
1541{
1542 int channel = to_sensor_dev_attr(attr)->index;
1543
1544 if (channel >= PP_TEMP_MAX)
1545 return -EINVAL;
1546
1547 return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
1548}
1549
1550static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
1551 struct device_attribute *attr,
1552 char *buf)
1553{
1554 struct amdgpu_device *adev = dev_get_drvdata(dev);
1555 int channel = to_sensor_dev_attr(attr)->index;
1556 int temp = 0;
1557
1558 if (channel >= PP_TEMP_MAX)
1559 return -EINVAL;
1560
1561 switch (channel) {
1562 case PP_TEMP_JUNCTION:
1563 temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
1564 break;
1565 case PP_TEMP_EDGE:
1566 temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
1567 break;
1568 case PP_TEMP_MEM:
1569 temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
1570 break;
1571 }
1572
1573 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
1574}
1575
1417static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, 1576static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
1418 struct device_attribute *attr, 1577 struct device_attribute *attr,
1419 char *buf) 1578 char *buf)
@@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
1983 * 2142 *
1984 * hwmon interfaces for GPU temperature: 2143 * hwmon interfaces for GPU temperature:
1985 * 2144 *
1986 * - temp1_input: the on die GPU temperature in millidegrees Celsius 2145 * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
2146 * - temp2_input and temp3_input are supported on SOC15 dGPUs only
2147 *
2148 * - temp[1-3]_label: temperature channel label
2149 * - temp2_label and temp3_label are supported on SOC15 dGPUs only
2150 *
2151 * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
2152 * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
1987 * 2153 *
1988 * - temp1_crit: temperature critical max value in millidegrees Celsius 2154 * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
2155 * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
1989 * 2156 *
1990 * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius 2157 * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
2158 * - these are supported on SOC15 dGPUs only
1991 * 2159 *
1992 * hwmon interfaces for GPU voltage: 2160 * hwmon interfaces for GPU voltage:
1993 * 2161 *
@@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
2035 * 2203 *
2036 */ 2204 */
2037 2205
2038static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); 2206static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
2039static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); 2207static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
2040static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); 2208static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
2209static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
2210static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
2211static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
2212static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
2213static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
2214static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
2215static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
2216static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
2217static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
2218static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
2219static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
2220static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
2041static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); 2221static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
2042static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); 2222static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
2043static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); 2223static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {
2064 &sensor_dev_attr_temp1_input.dev_attr.attr, 2244 &sensor_dev_attr_temp1_input.dev_attr.attr,
2065 &sensor_dev_attr_temp1_crit.dev_attr.attr, 2245 &sensor_dev_attr_temp1_crit.dev_attr.attr,
2066 &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, 2246 &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
2247 &sensor_dev_attr_temp2_input.dev_attr.attr,
2248 &sensor_dev_attr_temp2_crit.dev_attr.attr,
2249 &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
2250 &sensor_dev_attr_temp3_input.dev_attr.attr,
2251 &sensor_dev_attr_temp3_crit.dev_attr.attr,
2252 &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
2253 &sensor_dev_attr_temp1_emergency.dev_attr.attr,
2254 &sensor_dev_attr_temp2_emergency.dev_attr.attr,
2255 &sensor_dev_attr_temp3_emergency.dev_attr.attr,
2256 &sensor_dev_attr_temp1_label.dev_attr.attr,
2257 &sensor_dev_attr_temp2_label.dev_attr.attr,
2258 &sensor_dev_attr_temp3_label.dev_attr.attr,
2067 &sensor_dev_attr_pwm1.dev_attr.attr, 2259 &sensor_dev_attr_pwm1.dev_attr.attr,
2068 &sensor_dev_attr_pwm1_enable.dev_attr.attr, 2260 &sensor_dev_attr_pwm1_enable.dev_attr.attr,
2069 &sensor_dev_attr_pwm1_min.dev_attr.attr, 2261 &sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
2186 attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) 2378 attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
2187 return 0; 2379 return 0;
2188 2380
2381 /* only SOC15 dGPUs support hotspot and mem temperatures */
2382 if (((adev->flags & AMD_IS_APU) ||
2383 adev->asic_type < CHIP_VEGA10) &&
2384 (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
2385 attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
2386 attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
2387 attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
2388 attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
2389 attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
2390 attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
2391 attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
2392 attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
2393 attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
2394 attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
2395 return 0;
2396
2189 return effective_mode; 2397 return effective_mode;
2190} 2398}
2191 2399
@@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
2612 "gpu_busy_level\n"); 2820 "gpu_busy_level\n");
2613 return ret; 2821 return ret;
2614 } 2822 }
2823 /* APU does not have its own dedicated memory */
2824 if (!(adev->flags & AMD_IS_APU)) {
2825 ret = device_create_file(adev->dev,
2826 &dev_attr_mem_busy_percent);
2827 if (ret) {
2828 DRM_ERROR("failed to create device file "
2829 "mem_busy_percent\n");
2830 return ret;
2831 }
2832 }
2615 /* PCIe Perf counters won't work on APU nodes */ 2833 /* PCIe Perf counters won't work on APU nodes */
2616 if (!(adev->flags & AMD_IS_APU)) { 2834 if (!(adev->flags & AMD_IS_APU)) {
2617 ret = device_create_file(adev->dev, &dev_attr_pcie_bw); 2835 ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
2620 return ret; 2838 return ret;
2621 } 2839 }
2622 } 2840 }
2841 if (adev->unique_id)
2842 ret = device_create_file(adev->dev, &dev_attr_unique_id);
2843 if (ret) {
2844 DRM_ERROR("failed to create device file unique_id\n");
2845 return ret;
2846 }
2623 ret = amdgpu_debugfs_pm_init(adev); 2847 ret = amdgpu_debugfs_pm_init(adev);
2624 if (ret) { 2848 if (ret) {
2625 DRM_ERROR("Failed to register debugfs file for dpm!\n"); 2849 DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
2678 &dev_attr_pp_od_clk_voltage); 2902 &dev_attr_pp_od_clk_voltage);
2679 device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); 2903 device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
2680 if (!(adev->flags & AMD_IS_APU)) 2904 if (!(adev->flags & AMD_IS_APU))
2905 device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
2906 if (!(adev->flags & AMD_IS_APU))
2681 device_remove_file(adev->dev, &dev_attr_pcie_bw); 2907 device_remove_file(adev->dev, &dev_attr_pcie_bw);
2908 if (adev->unique_id)
2909 device_remove_file(adev->dev, &dev_attr_unique_id);
2682 if ((adev->asic_type >= CHIP_VEGA10) && 2910 if ((adev->asic_type >= CHIP_VEGA10) &&
2683 !(adev->flags & AMD_IS_APU)) 2911 !(adev->flags & AMD_IS_APU))
2684 device_remove_file(adev->dev, &dev_attr_ppfeatures); 2912 device_remove_file(adev->dev, &dev_attr_ppfeatures);
@@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
2775 /* GPU Load */ 3003 /* GPU Load */
2776 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) 3004 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
2777 seq_printf(m, "GPU Load: %u %%\n", value); 3005 seq_printf(m, "GPU Load: %u %%\n", value);
3006 /* MEM Load */
3007 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
3008 seq_printf(m, "MEM Load: %u %%\n", value);
3009
2778 seq_printf(m, "\n"); 3010 seq_printf(m, "\n");
2779 3011
2780 /* SMC feature mask */ 3012 /* SMC feature mask */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 86cc24b2e0aa..af9835c8395d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
289 return ret; 289 return ret;
290} 290}
291 291
292static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
293 uint32_t id, uint32_t value)
294{
295 cmd->cmd_id = GFX_CMD_ID_PROG_REG;
296 cmd->cmd.cmd_setup_reg_prog.reg_value = value;
297 cmd->cmd.cmd_setup_reg_prog.reg_id = id;
298}
299
300int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
301 uint32_t value)
302{
303 struct psp_gfx_cmd_resp *cmd = NULL;
304 int ret = 0;
305
306 if (reg >= PSP_REG_LAST)
307 return -EINVAL;
308
309 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
310 if (!cmd)
311 return -ENOMEM;
312
313 psp_prep_reg_prog_cmd_buf(cmd, reg, value);
314 ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
315
316 kfree(cmd);
317 return ret;
318}
319
292static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, 320static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
293 uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, 321 uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
294 uint32_t xgmi_ta_size, uint32_t shared_size) 322 uint32_t xgmi_ta_size, uint32_t shared_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f07c96..cf49539b0b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -62,6 +62,14 @@ struct psp_ring
62 uint32_t ring_size; 62 uint32_t ring_size;
63}; 63};
64 64
65/* More registers may will be supported */
66enum psp_reg_prog_id {
67 PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
68 PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
69 PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
70 PSP_REG_LAST
71};
72
65struct psp_funcs 73struct psp_funcs
66{ 74{
67 int (*init_microcode)(struct psp_context *psp); 75 int (*init_microcode)(struct psp_context *psp);
@@ -95,12 +103,26 @@ struct psp_funcs
95 int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); 103 int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
96}; 104};
97 105
106#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
107struct psp_xgmi_node_info {
108 uint64_t node_id;
109 uint8_t num_hops;
110 uint8_t is_sharing_enabled;
111 enum ta_xgmi_assigned_sdma_engine sdma_engine;
112};
113
114struct psp_xgmi_topology_info {
115 uint32_t num_nodes;
116 struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
117};
118
98struct psp_xgmi_context { 119struct psp_xgmi_context {
99 uint8_t initialized; 120 uint8_t initialized;
100 uint32_t session_id; 121 uint32_t session_id;
101 struct amdgpu_bo *xgmi_shared_bo; 122 struct amdgpu_bo *xgmi_shared_bo;
102 uint64_t xgmi_shared_mc_addr; 123 uint64_t xgmi_shared_mc_addr;
103 void *xgmi_shared_buf; 124 void *xgmi_shared_buf;
125 struct psp_xgmi_topology_info top_info;
104}; 126};
105 127
106struct psp_ras_context { 128struct psp_ras_context {
@@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
181 enum AMDGPU_UCODE_ID); 203 enum AMDGPU_UCODE_ID);
182}; 204};
183 205
184#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
185struct psp_xgmi_node_info {
186 uint64_t node_id;
187 uint8_t num_hops;
188 uint8_t is_sharing_enabled;
189 enum ta_xgmi_assigned_sdma_engine sdma_engine;
190};
191
192struct psp_xgmi_topology_info {
193 uint32_t num_nodes;
194 struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
195};
196 206
197#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) 207#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
198#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) 208#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
250 union ta_ras_cmd_input *info, bool enable); 260 union ta_ras_cmd_input *info, bool enable);
251 261
252extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; 262extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
253 263int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
264 uint32_t value);
254#endif 265#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..7c8a4aedf07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -90,6 +90,12 @@ struct ras_manager {
90 struct ras_err_data err_data; 90 struct ras_err_data err_data;
91}; 91};
92 92
93struct ras_badpage {
94 unsigned int bp;
95 unsigned int size;
96 unsigned int flags;
97};
98
93const char *ras_error_string[] = { 99const char *ras_error_string[] = {
94 "none", 100 "none",
95 "parity", 101 "parity",
@@ -118,7 +124,8 @@ const char *ras_block_string[] = {
118#define ras_err_str(i) (ras_error_string[ffs(i)]) 124#define ras_err_str(i) (ras_error_string[ffs(i)])
119#define ras_block_str(i) (ras_block_string[i]) 125#define ras_block_str(i) (ras_block_string[i])
120 126
121#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 127#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
128#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
122#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) 129#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
123 130
124static void amdgpu_ras_self_test(struct amdgpu_device *adev) 131static void amdgpu_ras_self_test(struct amdgpu_device *adev)
@@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
237 244
238 return 0; 245 return 0;
239} 246}
240/* 247/**
241 * DOC: ras debugfs control interface 248 * DOC: AMDGPU RAS debugfs control interface
242 * 249 *
243 * It accepts struct ras_debug_if who has two members. 250 * It accepts struct ras_debug_if who has two members.
244 * 251 *
@@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
521 enable ? "enable":"disable", 528 enable ? "enable":"disable",
522 ras_block_str(head->block), 529 ras_block_str(head->block),
523 ret); 530 ret);
531 if (ret == TA_RAS_STATUS__RESET_NEEDED)
532 return -EAGAIN;
524 return -EINVAL; 533 return -EINVAL;
525 } 534 }
526 535
@@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
541 return -EINVAL; 550 return -EINVAL;
542 551
543 if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { 552 if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
544 /* If ras is enabled by vbios, we set up ras object first in 553 if (enable) {
545 * both case. For enable, that is all what we need do. For 554 /* There is no harm to issue a ras TA cmd regardless of
546 * disable, we need perform a ras TA disable cmd after that. 555 * the currecnt ras state.
547 */ 556 * If current state == target state, it will do nothing
548 ret = __amdgpu_ras_feature_enable(adev, head, 1); 557 * But sometimes it requests driver to reset and repost
549 if (ret) 558 * with error code -EAGAIN.
550 return ret; 559 */
560 ret = amdgpu_ras_feature_enable(adev, head, 1);
561 /* With old ras TA, we might fail to enable ras.
562 * Log it and just setup the object.
563 * TODO need remove this WA in the future.
564 */
565 if (ret == -EINVAL) {
566 ret = __amdgpu_ras_feature_enable(adev, head, 1);
567 if (!ret)
568 DRM_INFO("RAS INFO: %s setup object\n",
569 ras_block_str(head->block));
570 }
571 } else {
572 /* setup the object then issue a ras TA disable cmd.*/
573 ret = __amdgpu_ras_feature_enable(adev, head, 1);
574 if (ret)
575 return ret;
551 576
552 if (!enable)
553 ret = amdgpu_ras_feature_enable(adev, head, 0); 577 ret = amdgpu_ras_feature_enable(adev, head, 0);
578 }
554 } else 579 } else
555 ret = amdgpu_ras_feature_enable(adev, head, enable); 580 ret = amdgpu_ras_feature_enable(adev, head, enable);
556 581
@@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
691 716
692/* sysfs begin */ 717/* sysfs begin */
693 718
719static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
720 struct ras_badpage **bps, unsigned int *count);
721
722static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
723{
724 switch (flags) {
725 case 0:
726 return "R";
727 case 1:
728 return "P";
729 case 2:
730 default:
731 return "F";
732 };
733}
734
735/*
736 * DOC: ras sysfs gpu_vram_bad_pages interface
737 *
738 * It allows user to read the bad pages of vram on the gpu through
739 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
740 *
741 * It outputs multiple lines, and each line stands for one gpu page.
742 *
743 * The format of one line is below,
744 * gpu pfn : gpu page size : flags
745 *
746 * gpu pfn and gpu page size are printed in hex format.
747 * flags can be one of below character,
748 * R: reserved, this gpu page is reserved and not able to use.
749 * P: pending for reserve, this gpu page is marked as bad, will be reserved
750 * in next window of page_reserve.
751 * F: unable to reserve. this gpu page can't be reserved due to some reasons.
752 *
753 * examples:
754 * 0x00000001 : 0x00001000 : R
755 * 0x00000002 : 0x00001000 : P
756 */
757
758static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
759 struct kobject *kobj, struct bin_attribute *attr,
760 char *buf, loff_t ppos, size_t count)
761{
762 struct amdgpu_ras *con =
763 container_of(attr, struct amdgpu_ras, badpages_attr);
764 struct amdgpu_device *adev = con->adev;
765 const unsigned int element_size =
766 sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
767 unsigned int start = div64_ul(ppos + element_size - 1, element_size);
768 unsigned int end = div64_ul(ppos + count - 1, element_size);
769 ssize_t s = 0;
770 struct ras_badpage *bps = NULL;
771 unsigned int bps_count = 0;
772
773 memset(buf, 0, count);
774
775 if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
776 return 0;
777
778 for (; start < end && start < bps_count; start++)
779 s += scnprintf(&buf[s], element_size + 1,
780 "0x%08x : 0x%08x : %1s\n",
781 bps[start].bp,
782 bps[start].size,
783 amdgpu_ras_badpage_flags_str(bps[start].flags));
784
785 kfree(bps);
786
787 return s;
788}
789
694static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, 790static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
695 struct device_attribute *attr, char *buf) 791 struct device_attribute *attr, char *buf)
696{ 792{
@@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
731 &con->features_attr.attr, 827 &con->features_attr.attr,
732 NULL 828 NULL
733 }; 829 };
830 struct bin_attribute *bin_attrs[] = {
831 &con->badpages_attr,
832 NULL
833 };
734 struct attribute_group group = { 834 struct attribute_group group = {
735 .name = "ras", 835 .name = "ras",
736 .attrs = attrs, 836 .attrs = attrs,
837 .bin_attrs = bin_attrs,
737 }; 838 };
738 839
739 con->features_attr = (struct device_attribute) { 840 con->features_attr = (struct device_attribute) {
@@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
743 }, 844 },
744 .show = amdgpu_ras_sysfs_features_read, 845 .show = amdgpu_ras_sysfs_features_read,
745 }; 846 };
847
848 con->badpages_attr = (struct bin_attribute) {
849 .attr = {
850 .name = "gpu_vram_bad_pages",
851 .mode = S_IRUGO,
852 },
853 .size = 0,
854 .private = NULL,
855 .read = amdgpu_ras_sysfs_badpages_read,
856 };
857
746 sysfs_attr_init(attrs[0]); 858 sysfs_attr_init(attrs[0]);
859 sysfs_bin_attr_init(bin_attrs[0]);
747 860
748 return sysfs_create_group(&adev->dev->kobj, &group); 861 return sysfs_create_group(&adev->dev->kobj, &group);
749} 862}
@@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
755 &con->features_attr.attr, 868 &con->features_attr.attr,
756 NULL 869 NULL
757 }; 870 };
871 struct bin_attribute *bin_attrs[] = {
872 &con->badpages_attr,
873 NULL
874 };
758 struct attribute_group group = { 875 struct attribute_group group = {
759 .name = "ras", 876 .name = "ras",
760 .attrs = attrs, 877 .attrs = attrs,
878 .bin_attrs = bin_attrs,
761 }; 879 };
762 880
763 sysfs_remove_group(&adev->dev->kobj, &group); 881 sysfs_remove_group(&adev->dev->kobj, &group);
@@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
1089/* ih end */ 1207/* ih end */
1090 1208
1091/* recovery begin */ 1209/* recovery begin */
1210
1211/* return 0 on success.
1212 * caller need free bps.
1213 */
1214static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1215 struct ras_badpage **bps, unsigned int *count)
1216{
1217 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1218 struct ras_err_handler_data *data;
1219 int i = 0;
1220 int ret = 0;
1221
1222 if (!con || !con->eh_data || !bps || !count)
1223 return -EINVAL;
1224
1225 mutex_lock(&con->recovery_lock);
1226 data = con->eh_data;
1227 if (!data || data->count == 0) {
1228 *bps = NULL;
1229 goto out;
1230 }
1231
1232 *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
1233 if (!*bps) {
1234 ret = -ENOMEM;
1235 goto out;
1236 }
1237
1238 for (; i < data->count; i++) {
1239 (*bps)[i] = (struct ras_badpage){
1240 .bp = data->bps[i].bp,
1241 .size = AMDGPU_GPU_PAGE_SIZE,
1242 .flags = 0,
1243 };
1244
1245 if (data->last_reserved <= i)
1246 (*bps)[i].flags = 1;
1247 else if (data->bps[i].bo == NULL)
1248 (*bps)[i].flags = 2;
1249 }
1250
1251 *count = data->count;
1252out:
1253 mutex_unlock(&con->recovery_lock);
1254 return ret;
1255}
1256
1092static void amdgpu_ras_do_recovery(struct work_struct *work) 1257static void amdgpu_ras_do_recovery(struct work_struct *work)
1093{ 1258{
1094 struct amdgpu_ras *ras = 1259 struct amdgpu_ras *ras =
@@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
1340} 1505}
1341/* recovery end */ 1506/* recovery end */
1342 1507
1508/* return 0 if ras will reset gpu and repost.*/
1509int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
1510 unsigned int block)
1511{
1512 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1513
1514 if (!ras)
1515 return -EINVAL;
1516
1517 ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1518 return 0;
1519}
1520
1343/* 1521/*
1344 * check hardware's ras ability which will be saved in hw_supported. 1522 * check hardware's ras ability which will be saved in hw_supported.
1345 * if hardware does not support ras, we can skip some ras initializtion and 1523 * if hardware does not support ras, we can skip some ras initializtion and
@@ -1415,8 +1593,10 @@ recovery_out:
1415 return -EINVAL; 1593 return -EINVAL;
1416} 1594}
1417 1595
1418/* do some init work after IP late init as dependence */ 1596/* do some init work after IP late init as dependence.
1419void amdgpu_ras_post_init(struct amdgpu_device *adev) 1597 * and it runs in resume/gpu reset/booting up cases.
1598 */
1599void amdgpu_ras_resume(struct amdgpu_device *adev)
1420{ 1600{
1421 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 1601 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1422 struct ras_manager *obj, *tmp; 1602 struct ras_manager *obj, *tmp;
@@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
1444 } 1624 }
1445 } 1625 }
1446 } 1626 }
1627
1628 if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
1629 con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
1630 /* setup ras obj state as disabled.
1631 * for init_by_vbios case.
1632 * if we want to enable ras, just enable it in a normal way.
1633 * If we want do disable it, need setup ras obj as enabled,
1634 * then issue another TA disable cmd.
1635 * See feature_enable_on_boot
1636 */
1637 amdgpu_ras_disable_all_features(adev, 1);
1638 amdgpu_ras_reset_gpu(adev, 0);
1639 }
1640}
1641
1642void amdgpu_ras_suspend(struct amdgpu_device *adev)
1643{
1644 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1645
1646 if (!con)
1647 return;
1648
1649 amdgpu_ras_disable_all_features(adev, 0);
1650 /* Make sure all ras objects are disabled. */
1651 if (con->features)
1652 amdgpu_ras_disable_all_features(adev, 1);
1447} 1653}
1448 1654
1449/* do some fini work before IP fini as dependence */ 1655/* do some fini work before IP fini as dependence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index eaef5edefc34..c6b34fbd695f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -93,6 +93,7 @@ struct amdgpu_ras {
93 struct dentry *ent; 93 struct dentry *ent;
94 /* sysfs */ 94 /* sysfs */
95 struct device_attribute features_attr; 95 struct device_attribute features_attr;
96 struct bin_attribute badpages_attr;
96 /* block array */ 97 /* block array */
97 struct ras_manager *objs; 98 struct ras_manager *objs;
98 99
@@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
175 return ras && (ras->supported & (1 << block)); 176 return ras && (ras->supported & (1 << block));
176} 177}
177 178
179int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
180 unsigned int block);
181
182void amdgpu_ras_resume(struct amdgpu_device *adev);
183void amdgpu_ras_suspend(struct amdgpu_device *adev);
184
178int amdgpu_ras_query_error_count(struct amdgpu_device *adev, 185int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
179 bool is_ce); 186 bool is_ce);
180 187
@@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
187static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, 194static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
188 bool is_baco) 195 bool is_baco)
189{ 196{
190 /* remove me when gpu reset works on vega20 A1. */
191#if 0
192 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 197 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
193 198
194 if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) 199 if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
195 schedule_work(&ras->recovery_work); 200 schedule_work(&ras->recovery_work);
196#endif
197 return 0; 201 return 0;
198} 202}
199 203
@@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
255 259
256/* called in ip_init and ip_fini */ 260/* called in ip_init and ip_fini */
257int amdgpu_ras_init(struct amdgpu_device *adev); 261int amdgpu_ras_init(struct amdgpu_device *adev);
258void amdgpu_ras_post_init(struct amdgpu_device *adev);
259int amdgpu_ras_fini(struct amdgpu_device *adev); 262int amdgpu_ras_fini(struct amdgpu_device *adev);
260int amdgpu_ras_pre_fini(struct amdgpu_device *adev); 263int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
261 264
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d7fae2676269..cdddce938bf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
114 uint32_t align_mask; 114 uint32_t align_mask;
115 u32 nop; 115 u32 nop;
116 bool support_64bit_ptrs; 116 bool support_64bit_ptrs;
117 bool no_user_fence;
117 unsigned vmhub; 118 unsigned vmhub;
118 unsigned extra_dw; 119 unsigned extra_dw;
119 120
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c52d1f9fe0f..7138dc1dd1f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
43#include <linux/pagemap.h> 43#include <linux/pagemap.h>
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/iommu.h> 45#include <linux/iommu.h>
46#include <linux/hmm.h>
46#include "amdgpu.h" 47#include "amdgpu.h"
47#include "amdgpu_object.h" 48#include "amdgpu_object.h"
48#include "amdgpu_trace.h" 49#include "amdgpu_trace.h"
@@ -703,143 +704,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
703/* 704/*
704 * TTM backend functions. 705 * TTM backend functions.
705 */ 706 */
706struct amdgpu_ttm_gup_task_list {
707 struct list_head list;
708 struct task_struct *task;
709};
710
711struct amdgpu_ttm_tt { 707struct amdgpu_ttm_tt {
712 struct ttm_dma_tt ttm; 708 struct ttm_dma_tt ttm;
713 u64 offset; 709 u64 offset;
714 uint64_t userptr; 710 uint64_t userptr;
715 struct task_struct *usertask; 711 struct task_struct *usertask;
716 uint32_t userflags; 712 uint32_t userflags;
717 spinlock_t guptasklock; 713#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
718 struct list_head guptasks; 714 struct hmm_range *ranges;
719 atomic_t mmu_invalidations; 715 int nr_ranges;
720 uint32_t last_set_pages; 716#endif
721}; 717};
722 718
723/** 719/**
724 * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR 720 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
725 * pointer to memory 721 * memory and start HMM tracking CPU page table update
726 * 722 *
727 * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). 723 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
728 * This provides a wrapper around the get_user_pages() call to provide 724 * once afterwards to stop HMM tracking
729 * device accessible pages that back user memory.
730 */ 725 */
726#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
727
728/* Support Userptr pages cross max 16 vmas */
729#define MAX_NR_VMAS (16)
730
731int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) 731int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
732{ 732{
733 struct amdgpu_ttm_tt *gtt = (void *)ttm; 733 struct amdgpu_ttm_tt *gtt = (void *)ttm;
734 struct mm_struct *mm = gtt->usertask->mm; 734 struct mm_struct *mm = gtt->usertask->mm;
735 unsigned int flags = 0; 735 unsigned long start = gtt->userptr;
736 unsigned pinned = 0; 736 unsigned long end = start + ttm->num_pages * PAGE_SIZE;
737 int r; 737 struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
738 struct hmm_range *ranges;
739 unsigned long nr_pages, i;
740 uint64_t *pfns, f;
741 int r = 0;
738 742
739 if (!mm) /* Happens during process shutdown */ 743 if (!mm) /* Happens during process shutdown */
740 return -ESRCH; 744 return -ESRCH;
741 745
742 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
743 flags |= FOLL_WRITE;
744
745 down_read(&mm->mmap_sem); 746 down_read(&mm->mmap_sem);
746 747
747 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 748 /* user pages may cross multiple VMAs */
748 /* 749 gtt->nr_ranges = 0;
749 * check that we only use anonymous memory to prevent problems 750 do {
750 * with writeback 751 unsigned long vm_start;
751 */
752 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
753 struct vm_area_struct *vma;
754 752
755 vma = find_vma(mm, gtt->userptr); 753 if (gtt->nr_ranges >= MAX_NR_VMAS) {
756 if (!vma || vma->vm_file || vma->vm_end < end) { 754 DRM_ERROR("Too many VMAs in userptr range\n");
757 up_read(&mm->mmap_sem); 755 r = -EFAULT;
758 return -EPERM; 756 goto out;
759 } 757 }
758
759 vm_start = vma ? vma->vm_end : start;
760 vma = find_vma(mm, vm_start);
761 if (unlikely(!vma || vm_start < vma->vm_start)) {
762 r = -EFAULT;
763 goto out;
764 }
765 vmas[gtt->nr_ranges++] = vma;
766 } while (end > vma->vm_end);
767
768 DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
769 start, gtt->nr_ranges, ttm->num_pages);
770
771 if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
772 vmas[0]->vm_file)) {
773 r = -EPERM;
774 goto out;
760 } 775 }
761 776
762 /* loop enough times using contiguous pages of memory */ 777 ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
763 do { 778 if (unlikely(!ranges)) {
764 unsigned num_pages = ttm->num_pages - pinned; 779 r = -ENOMEM;
765 uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; 780 goto out;
766 struct page **p = pages + pinned; 781 }
767 struct amdgpu_ttm_gup_task_list guptask;
768 782
769 guptask.task = current; 783 pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
770 spin_lock(&gtt->guptasklock); 784 if (unlikely(!pfns)) {
771 list_add(&guptask.list, &gtt->guptasks); 785 r = -ENOMEM;
772 spin_unlock(&gtt->guptasklock); 786 goto out_free_ranges;
787 }
773 788
774 if (mm == current->mm) 789 for (i = 0; i < gtt->nr_ranges; i++)
775 r = get_user_pages(userptr, num_pages, flags, p, NULL); 790 amdgpu_hmm_init_range(&ranges[i]);
776 else
777 r = get_user_pages_remote(gtt->usertask,
778 mm, userptr, num_pages,
779 flags, p, NULL, NULL);
780 791
781 spin_lock(&gtt->guptasklock); 792 f = ranges[0].flags[HMM_PFN_VALID];
782 list_del(&guptask.list); 793 f |= amdgpu_ttm_tt_is_readonly(ttm) ?
783 spin_unlock(&gtt->guptasklock); 794 0 : ranges[0].flags[HMM_PFN_WRITE];
795 memset64(pfns, f, ttm->num_pages);
784 796
785 if (r < 0) 797 for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
786 goto release_pages; 798 ranges[i].vma = vmas[i];
799 ranges[i].start = max(start, vmas[i]->vm_start);
800 ranges[i].end = min(end, vmas[i]->vm_end);
801 ranges[i].pfns = pfns + nr_pages;
802 nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
787 803
788 pinned += r; 804 r = hmm_vma_fault(&ranges[i], true);
805 if (unlikely(r))
806 break;
807 }
808 if (unlikely(r)) {
809 while (i--)
810 hmm_vma_range_done(&ranges[i]);
789 811
790 } while (pinned < ttm->num_pages); 812 goto out_free_pfns;
813 }
791 814
792 up_read(&mm->mmap_sem); 815 up_read(&mm->mmap_sem);
816
817 for (i = 0; i < ttm->num_pages; i++) {
818 pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
819 if (!pages[i]) {
820 pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
821 i, pfns[i]);
822 goto out_invalid_pfn;
823 }
824 }
825 gtt->ranges = ranges;
826
793 return 0; 827 return 0;
794 828
795release_pages: 829out_free_pfns:
796 release_pages(pages, pinned); 830 kvfree(pfns);
831out_free_ranges:
832 kvfree(ranges);
833out:
797 up_read(&mm->mmap_sem); 834 up_read(&mm->mmap_sem);
835
798 return r; 836 return r;
837
838out_invalid_pfn:
839 for (i = 0; i < gtt->nr_ranges; i++)
840 hmm_vma_range_done(&ranges[i]);
841 kvfree(pfns);
842 kvfree(ranges);
843 return -ENOMEM;
799} 844}
800 845
801/** 846/**
802 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. 847 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
848 * Check if the pages backing this ttm range have been invalidated
803 * 849 *
804 * Called by amdgpu_cs_list_validate(). This creates the page list 850 * Returns: true if pages are still valid
805 * that backs user memory and will ultimately be mapped into the device
806 * address space.
807 */ 851 */
808void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 852bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
809{ 853{
810 struct amdgpu_ttm_tt *gtt = (void *)ttm; 854 struct amdgpu_ttm_tt *gtt = (void *)ttm;
811 unsigned i; 855 bool r = false;
856 int i;
812 857
813 gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations); 858 if (!gtt || !gtt->userptr)
814 for (i = 0; i < ttm->num_pages; ++i) { 859 return false;
815 if (ttm->pages[i])
816 put_page(ttm->pages[i]);
817 860
818 ttm->pages[i] = pages ? pages[i] : NULL; 861 DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
862 gtt->userptr, gtt->nr_ranges, ttm->num_pages);
863
864 WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
865 "No user pages to check\n");
866
867 if (gtt->ranges) {
868 for (i = 0; i < gtt->nr_ranges; i++)
869 r |= hmm_vma_range_done(&gtt->ranges[i]);
870 kvfree(gtt->ranges[0].pfns);
871 kvfree(gtt->ranges);
872 gtt->ranges = NULL;
819 } 873 }
874
875 return r;
820} 876}
877#endif
821 878
822/** 879/**
823 * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty 880 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
824 * 881 *
825 * Called while unpinning userptr pages 882 * Called by amdgpu_cs_list_validate(). This creates the page list
883 * that backs user memory and will ultimately be mapped into the device
884 * address space.
826 */ 885 */
827void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) 886void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
828{ 887{
829 struct amdgpu_ttm_tt *gtt = (void *)ttm; 888 unsigned long i;
830 unsigned i;
831
832 for (i = 0; i < ttm->num_pages; ++i) {
833 struct page *page = ttm->pages[i];
834 889
835 if (!page) 890 for (i = 0; i < ttm->num_pages; ++i)
836 continue; 891 ttm->pages[i] = pages ? pages[i] : NULL;
837
838 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
839 set_page_dirty(page);
840
841 mark_page_accessed(page);
842 }
843} 892}
844 893
845/** 894/**
@@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
901 /* unmap the pages mapped to the device */ 950 /* unmap the pages mapped to the device */
902 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 951 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
903 952
904 /* mark the pages as dirty */
905 amdgpu_ttm_tt_mark_user_pages(ttm);
906
907 sg_free_table(ttm->sg); 953 sg_free_table(ttm->sg);
954
955#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
956 if (gtt->ranges &&
957 ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
958 gtt->ranges[0].pfns[0]))
959 WARN_ONCE(1, "Missing get_user_page_done\n");
960#endif
908} 961}
909 962
910int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, 963int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1254 gtt->usertask = current->group_leader; 1307 gtt->usertask = current->group_leader;
1255 get_task_struct(gtt->usertask); 1308 get_task_struct(gtt->usertask);
1256 1309
1257 spin_lock_init(&gtt->guptasklock);
1258 INIT_LIST_HEAD(&gtt->guptasks);
1259 atomic_set(&gtt->mmu_invalidations, 0);
1260 gtt->last_set_pages = 0;
1261
1262 return 0; 1310 return 0;
1263} 1311}
1264 1312
@@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1287 unsigned long end) 1335 unsigned long end)
1288{ 1336{
1289 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1337 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1290 struct amdgpu_ttm_gup_task_list *entry;
1291 unsigned long size; 1338 unsigned long size;
1292 1339
1293 if (gtt == NULL || !gtt->userptr) 1340 if (gtt == NULL || !gtt->userptr)
@@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1300 if (gtt->userptr > end || gtt->userptr + size <= start) 1347 if (gtt->userptr > end || gtt->userptr + size <= start)
1301 return false; 1348 return false;
1302 1349
1303 /* Search the lists of tasks that hold this mapping and see
1304 * if current is one of them. If it is return false.
1305 */
1306 spin_lock(&gtt->guptasklock);
1307 list_for_each_entry(entry, &gtt->guptasks, list) {
1308 if (entry->task == current) {
1309 spin_unlock(&gtt->guptasklock);
1310 return false;
1311 }
1312 }
1313 spin_unlock(&gtt->guptasklock);
1314
1315 atomic_inc(&gtt->mmu_invalidations);
1316
1317 return true; 1350 return true;
1318} 1351}
1319 1352
1320/** 1353/**
1321 * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? 1354 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1322 */
1323bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1324 int *last_invalidated)
1325{
1326 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1327 int prev_invalidated = *last_invalidated;
1328
1329 *last_invalidated = atomic_read(&gtt->mmu_invalidations);
1330 return prev_invalidated != *last_invalidated;
1331}
1332
1333/**
1334 * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
1335 * been invalidated since the last time they've been set?
1336 */ 1355 */
1337bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) 1356bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1338{ 1357{
1339 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1358 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1340 1359
1341 if (gtt == NULL || !gtt->userptr) 1360 if (gtt == NULL || !gtt->userptr)
1342 return false; 1361 return false;
1343 1362
1344 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages; 1363 return true;
1345} 1364}
1346 1365
1347/** 1366/**
@@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1753 1772
1754 /* Initialize various on-chip memory pools */ 1773 /* Initialize various on-chip memory pools */
1755 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, 1774 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
1756 adev->gds.mem.total_size); 1775 adev->gds.gds_size);
1757 if (r) { 1776 if (r) {
1758 DRM_ERROR("Failed initializing GDS heap.\n"); 1777 DRM_ERROR("Failed initializing GDS heap.\n");
1759 return r; 1778 return r;
1760 } 1779 }
1761 1780
1762 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1763 4, AMDGPU_GEM_DOMAIN_GDS,
1764 &adev->gds.gds_gfx_bo, NULL, NULL);
1765 if (r)
1766 return r;
1767
1768 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, 1781 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
1769 adev->gds.gws.total_size); 1782 adev->gds.gws_size);
1770 if (r) { 1783 if (r) {
1771 DRM_ERROR("Failed initializing gws heap.\n"); 1784 DRM_ERROR("Failed initializing gws heap.\n");
1772 return r; 1785 return r;
1773 } 1786 }
1774 1787
1775 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1776 1, AMDGPU_GEM_DOMAIN_GWS,
1777 &adev->gds.gws_gfx_bo, NULL, NULL);
1778 if (r)
1779 return r;
1780
1781 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, 1788 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
1782 adev->gds.oa.total_size); 1789 adev->gds.oa_size);
1783 if (r) { 1790 if (r) {
1784 DRM_ERROR("Failed initializing oa heap.\n"); 1791 DRM_ERROR("Failed initializing oa heap.\n");
1785 return r; 1792 return r;
1786 } 1793 }
1787 1794
1788 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1789 1, AMDGPU_GEM_DOMAIN_OA,
1790 &adev->gds.oa_gfx_bo, NULL, NULL);
1791 if (r)
1792 return r;
1793
1794 /* Register debugfs entries for amdgpu_ttm */ 1795 /* Register debugfs entries for amdgpu_ttm */
1795 r = amdgpu_ttm_debugfs_init(adev); 1796 r = amdgpu_ttm_debugfs_init(adev);
1796 if (r) { 1797 if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..c2b7669004ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
101int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); 101int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
102int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); 102int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
103 103
104#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
104int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); 105int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
106bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
107#else
108static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
109{
110 return -EPERM;
111}
112static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
113{
114 return false;
115}
116#endif
117
105void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); 118void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
106void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
107int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 119int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
108 uint32_t flags); 120 uint32_t flags);
109bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); 121bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
112 unsigned long end); 124 unsigned long end);
113bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, 125bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
114 int *last_invalidated); 126 int *last_invalidated);
115bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); 127bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
116bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); 128bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
117uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); 129uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
118uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, 130uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7b33867036e7..33c1eb76c076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
313 return AMDGPU_FW_LOAD_DIRECT; 313 return AMDGPU_FW_LOAD_DIRECT;
314} 314}
315 315
316#define FW_VERSION_ATTR(name, mode, field) \
317static ssize_t show_##name(struct device *dev, \
318 struct device_attribute *attr, \
319 char *buf) \
320{ \
321 struct drm_device *ddev = dev_get_drvdata(dev); \
322 struct amdgpu_device *adev = ddev->dev_private; \
323 \
324 return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
325} \
326static DEVICE_ATTR(name, mode, show_##name, NULL)
327
328FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
329FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
330FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
331FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
332FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
333FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
334FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
335FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
336FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
337FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
338FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
339FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
340FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
341FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
342FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
343FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
344FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
345FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
346FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
347FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
348FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
349
350static struct attribute *fw_attrs[] = {
351 &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
352 &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
353 &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
354 &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
355 &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
356 &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
357 &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
358 &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
359 &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
360 &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
361 &dev_attr_dmcu_fw_version.attr, NULL
362};
363
364static const struct attribute_group fw_attr_group = {
365 .name = "fw_version",
366 .attrs = fw_attrs
367};
368
369int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
370{
371 return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
372}
373
374void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
375{
376 sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
377}
378
316static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, 379static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
317 struct amdgpu_firmware_info *ucode, 380 struct amdgpu_firmware_info *ucode,
318 uint64_t mc_addr, void *kptr) 381 uint64_t mc_addr, void *kptr)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 7ac25a1c7853..ec4c2ea1f05a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
291 291
292int amdgpu_ucode_init_bo(struct amdgpu_device *adev); 292int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
293int amdgpu_ucode_create_bo(struct amdgpu_device *adev); 293int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
294int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
294void amdgpu_ucode_free_bo(struct amdgpu_device *adev); 295void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
296void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
295 297
296enum amdgpu_firmware_load_type 298enum amdgpu_firmware_load_type
297amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); 299amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index ecf6f96df2ad..118451f5e3aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
212 return 0; 212 return 0;
213} 213}
214 214
215static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
216 struct dpg_pause_state *new_state)
217{
218 int ret_code;
219 uint32_t reg_data = 0;
220 uint32_t reg_data2 = 0;
221 struct amdgpu_ring *ring;
222
223 /* pause/unpause if state is changed */
224 if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
225 DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
226 adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
227 new_state->fw_based, new_state->jpeg);
228
229 reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
230 (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
231
232 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
233 ret_code = 0;
234
235 if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
236 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
237 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
238 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
239
240 if (!ret_code) {
241 /* pause DPG non-jpeg */
242 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
243 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
244 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
245 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
246 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
247
248 /* Restore */
249 ring = &adev->vcn.ring_enc[0];
250 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
251 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
252 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
253 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
254 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
255
256 ring = &adev->vcn.ring_enc[1];
257 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
258 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
259 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
260 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
261 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
262
263 ring = &adev->vcn.ring_dec;
264 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
265 RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
266 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
267 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
268 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
269 }
270 } else {
271 /* unpause dpg non-jpeg, no need to wait */
272 reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
273 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
274 }
275 adev->vcn.pause_state.fw_based = new_state->fw_based;
276 }
277
278 /* pause/unpause if state is changed */
279 if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
280 DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
281 adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
282 new_state->fw_based, new_state->jpeg);
283
284 reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
285 (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
286
287 if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
288 ret_code = 0;
289
290 if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
291 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
292 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
293 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
294
295 if (!ret_code) {
296 /* Make sure JPRG Snoop is disabled before sending the pause */
297 reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
298 reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
299 WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
300
301 /* pause DPG jpeg */
302 reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
303 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
304 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
305 UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
306 UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
307
308 /* Restore */
309 ring = &adev->vcn.ring_jpeg;
310 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
311 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
312 UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
313 UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
314 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
315 lower_32_bits(ring->gpu_addr));
316 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
317 upper_32_bits(ring->gpu_addr));
318 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
319 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
320 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
321 UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
322
323 ring = &adev->vcn.ring_dec;
324 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
325 RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
326 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
327 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
328 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
329 }
330 } else {
331 /* unpause dpg jpeg, no need to wait */
332 reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
333 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
334 }
335 adev->vcn.pause_state.jpeg = new_state->jpeg;
336 }
337
338 return 0;
339}
340
341static void amdgpu_vcn_idle_work_handler(struct work_struct *work) 215static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
342{ 216{
343 struct amdgpu_device *adev = 217 struct amdgpu_device *adev =
@@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
362 else 236 else
363 new_state.jpeg = VCN_DPG_STATE__UNPAUSE; 237 new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
364 238
365 amdgpu_vcn_pause_dpg_mode(adev, &new_state); 239 adev->vcn.pause_dpg_mode(adev, &new_state);
366 } 240 }
367 241
368 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); 242 fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
@@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
417 else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) 291 else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
418 new_state.jpeg = VCN_DPG_STATE__PAUSE; 292 new_state.jpeg = VCN_DPG_STATE__PAUSE;
419 293
420 amdgpu_vcn_pause_dpg_mode(adev, &new_state); 294 adev->vcn.pause_dpg_mode(adev, &new_state);
421 } 295 }
422} 296}
423 297
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a0ad19af9080..a1ee19251aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,27 @@
45#define VCN_ENC_CMD_REG_WRITE 0x0000000b 45#define VCN_ENC_CMD_REG_WRITE 0x0000000b
46#define VCN_ENC_CMD_REG_WAIT 0x0000000c 46#define VCN_ENC_CMD_REG_WAIT 0x0000000c
47 47
48#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
49 ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
50 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
51 UVD_DPG_LMA_CTL__MASK_EN_MASK | \
52 ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
53 << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
54 (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
55 RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
56 })
57
58#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
59 do { \
60 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
61 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
62 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
63 UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
64 ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
65 << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
66 (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
67 } while (0)
68
48enum engine_status_constants { 69enum engine_status_constants {
49 UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, 70 UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
50 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, 71 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
@@ -81,6 +102,8 @@ struct amdgpu_vcn {
81 unsigned num_enc_rings; 102 unsigned num_enc_rings;
82 enum amd_powergating_state cur_state; 103 enum amd_powergating_state cur_state;
83 struct dpg_pause_state pause_state; 104 struct dpg_pause_state pause_state;
105 int (*pause_dpg_mode)(struct amdgpu_device *adev,
106 struct dpg_pause_state *new_state);
84}; 107};
85 108
86int amdgpu_vcn_sw_init(struct amdgpu_device *adev); 109int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7d484fad3909..1f0bd4d16475 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
426 return clk; 426 return clk;
427} 427}
428 428
429void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
430{
431 struct amdgpu_virt *virt = &adev->virt;
432
433 if (virt->ops && virt->ops->init_reg_access_mode)
434 virt->ops->init_reg_access_mode(adev);
435}
436
437bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
438{
439 bool ret = false;
440 struct amdgpu_virt *virt = &adev->virt;
441
442 if (amdgpu_sriov_vf(adev)
443 && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
444 ret = true;
445
446 return ret;
447}
448
449bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
450{
451 bool ret = false;
452 struct amdgpu_virt *virt = &adev->virt;
453
454 if (amdgpu_sriov_vf(adev)
455 && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
456 && !(amdgpu_sriov_runtime(adev)))
457 ret = true;
458
459 return ret;
460}
461
462bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
463{
464 bool ret = false;
465 struct amdgpu_virt *virt = &adev->virt;
466
467 if (amdgpu_sriov_vf(adev)
468 && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
469 ret = true;
470
471 return ret;
472}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 584947b7ccf3..dca25deee75c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
48 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; 48 uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
49}; 49};
50 50
51/* According to the fw feature, some new reg access modes are supported */
52#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
53#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
54#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
55#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
56
51/** 57/**
52 * struct amdgpu_virt_ops - amdgpu device virt operations 58 * struct amdgpu_virt_ops - amdgpu device virt operations
53 */ 59 */
@@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
59 void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); 65 void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
60 int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); 66 int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
61 int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); 67 int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
68 void (*init_reg_access_mode)(struct amdgpu_device *adev);
62}; 69};
63 70
64/* 71/*
@@ -258,6 +265,7 @@ struct amdgpu_virt {
258 uint32_t gim_feature; 265 uint32_t gim_feature;
259 /* protect DPM events to GIM */ 266 /* protect DPM events to GIM */
260 struct mutex dpm_mutex; 267 struct mutex dpm_mutex;
268 uint32_t reg_access_mode;
261}; 269};
262 270
263#define amdgpu_sriov_enabled(adev) \ 271#define amdgpu_sriov_enabled(adev) \
@@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
307uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); 315uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
308uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); 316uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
309 317
318void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
319bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
320bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
321bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
322
310#endif 323#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a48c84c51775..d11eba09eadd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
40 return &hive->device_list; 40 return &hive->device_list;
41} 41}
42 42
43/**
44 * DOC: AMDGPU XGMI Support
45 *
46 * XGMI is a high speed interconnect that joins multiple GPU cards
47 * into a homogeneous memory space that is organized by a collective
48 * hive ID and individual node IDs, both of which are 64-bit numbers.
49 *
50 * The file xgmi_device_id contains the unique per GPU device ID and
51 * is stored in the /sys/class/drm/card${cardno}/device/ directory.
52 *
53 * Inside the device directory a sub-directory 'xgmi_hive_info' is
54 * created which contains the hive ID and the list of nodes.
55 *
56 * The hive ID is stored in:
57 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
58 *
59 * The node information is stored in numbered directories:
60 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
61 *
62 * Each device has their own xgmi_hive_info direction with a mirror
63 * set of node sub-directories.
64 *
65 * The XGMI memory space is built by contiguously adding the power of
66 * two padded VRAM space from each node to each other.
67 *
68 */
69
70
43static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, 71static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
44 struct device_attribute *attr, char *buf) 72 struct device_attribute *attr, char *buf)
45{ 73{
@@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
238 /* Each psp need to set the latest topology */ 266 /* Each psp need to set the latest topology */
239 ret = psp_xgmi_set_topology_info(&adev->psp, 267 ret = psp_xgmi_set_topology_info(&adev->psp,
240 hive->number_devices, 268 hive->number_devices,
241 &hive->topology_info); 269 &adev->psp.xgmi_context.top_info);
242 if (ret) 270 if (ret)
243 dev_err(adev->dev, 271 dev_err(adev->dev,
244 "XGMI: Set topology failure on device %llx, hive %llx, ret %d", 272 "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
248 return ret; 276 return ret;
249} 277}
250 278
279
280int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
281 struct amdgpu_device *peer_adev)
282{
283 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
284 int i;
285
286 for (i = 0 ; i < top->num_nodes; ++i)
287 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
288 return top->nodes[i].num_hops;
289 return -EINVAL;
290}
291
251int amdgpu_xgmi_add_device(struct amdgpu_device *adev) 292int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
252{ 293{
253 struct psp_xgmi_topology_info *hive_topology; 294 struct psp_xgmi_topology_info *top_info;
254 struct amdgpu_hive_info *hive; 295 struct amdgpu_hive_info *hive;
255 struct amdgpu_xgmi *entry; 296 struct amdgpu_xgmi *entry;
256 struct amdgpu_device *tmp_adev = NULL; 297 struct amdgpu_device *tmp_adev = NULL;
@@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
283 goto exit; 324 goto exit;
284 } 325 }
285 326
286 hive_topology = &hive->topology_info; 327 top_info = &adev->psp.xgmi_context.top_info;
287 328
288 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); 329 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
289 list_for_each_entry(entry, &hive->device_list, head) 330 list_for_each_entry(entry, &hive->device_list, head)
290 hive_topology->nodes[count++].node_id = entry->node_id; 331 top_info->nodes[count++].node_id = entry->node_id;
332 top_info->num_nodes = count;
291 hive->number_devices = count; 333 hive->number_devices = count;
292 334
293 /* Each psp need to get the latest topology */
294 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 335 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
295 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); 336 /* update node list for other device in the hive */
337 if (tmp_adev != adev) {
338 top_info = &tmp_adev->psp.xgmi_context.top_info;
339 top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
340 top_info->num_nodes = count;
341 }
342 ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
343 if (ret)
344 goto exit;
345 }
346
347 /* get latest topology info for each device from psp */
348 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
349 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
350 &tmp_adev->psp.xgmi_context.top_info);
296 if (ret) { 351 if (ret) {
297 dev_err(tmp_adev->dev, 352 dev_err(tmp_adev->dev,
298 "XGMI: Get topology failure on device %llx, hive %llx, ret %d", 353 "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
299 tmp_adev->gmc.xgmi.node_id, 354 tmp_adev->gmc.xgmi.node_id,
300 tmp_adev->gmc.xgmi.hive_id, ret); 355 tmp_adev->gmc.xgmi.hive_id, ret);
301 /* To do : continue with some node failed or disable the whole hive */ 356 /* To do : continue with some node failed or disable the whole hive */
302 break; 357 goto exit;
303 } 358 }
304 } 359 }
305 360
306 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
307 ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
308 if (ret)
309 break;
310 }
311
312 if (!ret) 361 if (!ret)
313 ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); 362 ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
314 363
364
365 mutex_unlock(&hive->hive_lock);
366exit:
315 if (!ret) 367 if (!ret)
316 dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", 368 dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
317 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); 369 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
@@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
320 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, 372 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
321 ret); 373 ret);
322 374
323
324 mutex_unlock(&hive->hive_lock);
325exit:
326 return ret; 375 return ret;
327} 376}
328 377
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 3e9c91e9a4bf..fbcee31788c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -27,7 +27,6 @@
27struct amdgpu_hive_info { 27struct amdgpu_hive_info {
28 uint64_t hive_id; 28 uint64_t hive_id;
29 struct list_head device_list; 29 struct list_head device_list;
30 struct psp_xgmi_topology_info topology_info;
31 int number_devices; 30 int number_devices;
32 struct mutex hive_lock, reset_lock; 31 struct mutex hive_lock, reset_lock;
33 struct kobject *kobj; 32 struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
41int amdgpu_xgmi_add_device(struct amdgpu_device *adev); 40int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
42void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); 41void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
43int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); 42int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
43int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
44 struct amdgpu_device *peer_adev);
44 45
45static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, 46static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
46 struct amdgpu_device *bo_adev) 47 struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07c1f239e9c3..3a4f20766a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
1804 return false; 1804 return false;
1805} 1805}
1806 1806
1807static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
1808{
1809 uint64_t nak_r, nak_g;
1810
1811 /* Get the number of NAKs received and generated */
1812 nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
1813 nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
1814
1815 /* Add the total number of NAKs, i.e the number of replays */
1816 return (nak_r + nak_g);
1817}
1818
1807static const struct amdgpu_asic_funcs cik_asic_funcs = 1819static const struct amdgpu_asic_funcs cik_asic_funcs =
1808{ 1820{
1809 .read_disabled_bios = &cik_read_disabled_bios, 1821 .read_disabled_bios = &cik_read_disabled_bios,
@@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
1821 .init_doorbell_index = &legacy_doorbell_index_init, 1833 .init_doorbell_index = &legacy_doorbell_index_init,
1822 .get_pcie_usage = &cik_get_pcie_usage, 1834 .get_pcie_usage = &cik_get_pcie_usage,
1823 .need_reset_on_init = &cik_need_reset_on_init, 1835 .need_reset_on_init = &cik_need_reset_on_init,
1836 .get_pcie_replay_count = &cik_get_pcie_replay_count,
1824}; 1837};
1825 1838
1826static int cik_common_early_init(void *handle) 1839static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index d5ebe566809b..8c09bf994acd 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
105 *flags |= AMD_CG_SUPPORT_DF_MGCG; 105 *flags |= AMD_CG_SUPPORT_DF_MGCG;
106} 106}
107 107
108/* hold counter assignment per gpu struct */
109struct df_v3_6_event_mask {
110 struct amdgpu_device gpu;
111 uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
112};
113
114/* get assigned df perfmon ctr as int */
115static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
116 uint64_t config,
117 int *counter)
118{
119 struct df_v3_6_event_mask *mask;
120 int i;
121
122 mask = container_of(adev, struct df_v3_6_event_mask, gpu);
123
124 for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
125 if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
126 *counter = i;
127 return;
128 }
129 }
130}
131
132/* get address based on counter assignment */
133static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
134 uint64_t config,
135 int is_ctrl,
136 uint32_t *lo_base_addr,
137 uint32_t *hi_base_addr)
138{
139
140 int target_cntr = -1;
141
142 df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
143
144 if (target_cntr < 0)
145 return;
146
147 switch (target_cntr) {
148
149 case 0:
150 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
151 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
152 break;
153 case 1:
154 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
155 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
156 break;
157 case 2:
158 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
159 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
160 break;
161 case 3:
162 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
163 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
164 break;
165
166 }
167
168}
169
170/* get read counter address */
171static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
172 uint64_t config,
173 uint32_t *lo_base_addr,
174 uint32_t *hi_base_addr)
175{
176 df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
177}
178
179/* get control counter settings i.e. address and values to set */
180static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
181 uint64_t config,
182 uint32_t *lo_base_addr,
183 uint32_t *hi_base_addr,
184 uint32_t *lo_val,
185 uint32_t *hi_val)
186{
187
188 uint32_t eventsel, instance, unitmask;
189 uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
190
191 df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
192
193 if (lo_val == NULL || hi_val == NULL)
194 return;
195
196 if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
197 DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
198 *lo_base_addr, *hi_base_addr);
199 return;
200 }
201
202 eventsel = GET_EVENT(config);
203 instance = GET_INSTANCE(config);
204 unitmask = GET_UNITMASK(config);
205
206 es_5_0 = eventsel & 0x3FUL;
207 es_13_6 = instance;
208 es_13_0 = (es_13_6 << 6) + es_5_0;
209 es_13_12 = (es_13_0 & 0x03000UL) >> 12;
210 es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
211 es_7_0 = es_13_0 & 0x0FFUL;
212 *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
213 *hi_val = (es_11_8 | ((es_13_12)<<(29)));
214}
215
216/* assign df performance counters for read */
217static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
218 uint64_t config,
219 int *is_assigned)
220{
221
222 struct df_v3_6_event_mask *mask;
223 int i, target_cntr;
224
225 target_cntr = -1;
226
227 *is_assigned = 0;
228
229 df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
230
231 if (target_cntr >= 0) {
232 *is_assigned = 1;
233 return 0;
234 }
235
236 mask = container_of(adev, struct df_v3_6_event_mask, gpu);
237
238 for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
239 if (mask->config_assign_mask[i] == 0ULL) {
240 mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
241 return 0;
242 }
243 }
244
245 return -ENOSPC;
246}
247
248/* release performance counter */
249static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
250 uint64_t config)
251{
252
253 struct df_v3_6_event_mask *mask;
254 int target_cntr;
255
256 target_cntr = -1;
257
258 df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
259
260 mask = container_of(adev, struct df_v3_6_event_mask, gpu);
261
262 if (target_cntr >= 0)
263 mask->config_assign_mask[target_cntr] = 0ULL;
264
265}
266
267/*
268 * get xgmi link counters via programmable data fabric (df) counters (max 4)
269 * using cake tx event.
270 *
271 * @adev -> amdgpu device
272 * @instance-> currently cake has 2 links to poll on vega20
273 * @count -> counters to pass
274 *
275 */
276
277static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
278 int instance,
279 uint64_t *count)
280{
281 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
282 uint64_t config;
283
284 config = GET_INSTANCE_CONFIG(instance);
285
286 df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
287 &hi_base_addr);
288
289 if ((lo_base_addr == 0) || (hi_base_addr == 0))
290 return;
291
292 lo_val = RREG32_PCIE(lo_base_addr);
293 hi_val = RREG32_PCIE(hi_base_addr);
294
295 *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
296}
297
298/*
299 * reset xgmi link counters
300 *
301 * @adev -> amdgpu device
302 * @instance-> currently cake has 2 links to poll on vega20
303 *
304 */
305static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
306 int instance)
307{
308 uint32_t lo_base_addr, hi_base_addr;
309 uint64_t config;
310
311 config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
312
313 df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
314 &hi_base_addr);
315
316 if ((lo_base_addr == 0) || (hi_base_addr == 0))
317 return;
318
319 WREG32_PCIE(lo_base_addr, 0UL);
320 WREG32_PCIE(hi_base_addr, 0UL);
321}
322
323/*
324 * add xgmi link counters
325 *
326 * @adev -> amdgpu device
327 * @instance-> currently cake has 2 links to poll on vega20
328 *
329 */
330
331static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
332 int instance)
333{
334 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
335 uint64_t config;
336 int ret, is_assigned;
337
338 if (instance < 0 || instance > 1)
339 return -EINVAL;
340
341 config = GET_INSTANCE_CONFIG(instance);
342
343 ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
344
345 if (ret || is_assigned)
346 return ret;
347
348 df_v3_6_pmc_get_ctrl_settings(adev,
349 config,
350 &lo_base_addr,
351 &hi_base_addr,
352 &lo_val,
353 &hi_val);
354
355 WREG32_PCIE(lo_base_addr, lo_val);
356 WREG32_PCIE(hi_base_addr, hi_val);
357
358 return ret;
359}
360
361
362/*
363 * start xgmi link counters
364 *
365 * @adev -> amdgpu device
366 * @instance-> currently cake has 2 links to poll on vega20
367 * @is_enable -> either resume or assign event via df perfmon
368 *
369 */
370
371static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
372 int instance,
373 int is_enable)
374{
375 uint32_t lo_base_addr, hi_base_addr, lo_val;
376 uint64_t config;
377 int ret;
378
379 if (instance < 0 || instance > 1)
380 return -EINVAL;
381
382 if (is_enable) {
383
384 ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
385
386 if (ret)
387 return ret;
388
389 } else {
390
391 config = GET_INSTANCE_CONFIG(instance);
392
393 df_v3_6_pmc_get_ctrl_settings(adev,
394 config,
395 &lo_base_addr,
396 &hi_base_addr,
397 NULL,
398 NULL);
399
400 if (lo_base_addr == 0)
401 return -EINVAL;
402
403 lo_val = RREG32_PCIE(lo_base_addr);
404
405 WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
406
407 ret = 0;
408 }
409
410 return ret;
411
412}
413
414/*
415 * start xgmi link counters
416 *
417 * @adev -> amdgpu device
418 * @instance-> currently cake has 2 links to poll on vega20
419 * @is_enable -> either pause or unassign event via df perfmon
420 *
421 */
422
423static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
424 int instance,
425 int is_disable)
426{
427
428 uint32_t lo_base_addr, hi_base_addr, lo_val;
429 uint64_t config;
430
431 config = GET_INSTANCE_CONFIG(instance);
432
433 if (is_disable) {
434 df_v3_6_reset_xgmi_link_cntr(adev, instance);
435 df_v3_6_pmc_release_cntr(adev, config);
436 } else {
437
438 df_v3_6_pmc_get_ctrl_settings(adev,
439 config,
440 &lo_base_addr,
441 &hi_base_addr,
442 NULL,
443 NULL);
444
445 if ((lo_base_addr == 0) || (hi_base_addr == 0))
446 return -EINVAL;
447
448 lo_val = RREG32_PCIE(lo_base_addr);
449
450 WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
451 }
452
453 return 0;
454}
455
456static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
457 int is_enable)
458{
459 int xgmi_tx_link, ret = 0;
460
461 switch (adev->asic_type) {
462 case CHIP_VEGA20:
463 xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
464 : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
465
466 if (xgmi_tx_link >= 0)
467 ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
468 is_enable);
469
470 if (ret)
471 return ret;
472
473 ret = 0;
474 break;
475 default:
476 break;
477 }
478
479 return ret;
480}
481
482static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
483 int is_disable)
484{
485 int xgmi_tx_link, ret = 0;
486
487 switch (adev->asic_type) {
488 case CHIP_VEGA20:
489 xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
490 : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
491
492 if (xgmi_tx_link >= 0) {
493 ret = df_v3_6_stop_xgmi_link_cntr(adev,
494 xgmi_tx_link,
495 is_disable);
496 if (ret)
497 return ret;
498 }
499
500 ret = 0;
501 break;
502 default:
503 break;
504 }
505
506 return ret;
507}
508
509static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
510 uint64_t config,
511 uint64_t *count)
512{
513
514 int xgmi_tx_link;
515
516 switch (adev->asic_type) {
517 case CHIP_VEGA20:
518 xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
519 : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
520
521 if (xgmi_tx_link >= 0) {
522 df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
523 df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
524 }
525
526 break;
527 default:
528 break;
529 }
530
531}
532
108const struct amdgpu_df_funcs df_v3_6_funcs = { 533const struct amdgpu_df_funcs df_v3_6_funcs = {
109 .init = df_v3_6_init, 534 .init = df_v3_6_init,
110 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, 535 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
@@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
113 .update_medium_grain_clock_gating = 538 .update_medium_grain_clock_gating =
114 df_v3_6_update_medium_grain_clock_gating, 539 df_v3_6_update_medium_grain_clock_gating,
115 .get_clockgating_state = df_v3_6_get_clockgating_state, 540 .get_clockgating_state = df_v3_6_get_clockgating_state,
541 .pmc_start = df_v3_6_pmc_start,
542 .pmc_stop = df_v3_6_pmc_stop,
543 .pmc_get_count = df_v3_6_pmc_get_count
116}; 544};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
index e79c58e5efcb..fcffd807764d 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
35 DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 35 DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
36}; 36};
37 37
38/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
39#define AMDGPU_DF_MAX_COUNTERS 4
40
41/* get flags from df perfmon config */
42#define GET_EVENT(x) (x & 0xFFUL)
43#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
44#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
45#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \
46 | ((0x046ULL + x) << 8) \
47 | (0x02 << 16))
48
49/* df event conf macros */
50#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
51 && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
52#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
53 && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
54
38extern const struct amdgpu_df_funcs df_v3_6_funcs; 55extern const struct amdgpu_df_funcs df_v3_6_funcs;
39 56
40#endif 57#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a59e0fdf5a97..4cd1731d62fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle)
4493 4493
4494static int gfx_v7_0_sw_fini(void *handle) 4494static int gfx_v7_0_sw_fini(void *handle)
4495{ 4495{
4496 int i;
4497 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4496 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4498 4497 int i;
4499 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
4500 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
4501 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
4502 4498
4503 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4499 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4504 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 4500 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
5070static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) 5066static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
5071{ 5067{
5072 /* init asci gds info */ 5068 /* init asci gds info */
5073 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 5069 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
5074 adev->gds.gws.total_size = 64; 5070 adev->gds.gws_size = 64;
5075 adev->gds.oa.total_size = 16; 5071 adev->gds.oa_size = 16;
5076 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); 5072 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
5077
5078 if (adev->gds.mem.total_size == 64 * 1024) {
5079 adev->gds.mem.gfx_partition_size = 4096;
5080 adev->gds.mem.cs_partition_size = 4096;
5081
5082 adev->gds.gws.gfx_partition_size = 4;
5083 adev->gds.gws.cs_partition_size = 4;
5084
5085 adev->gds.oa.gfx_partition_size = 4;
5086 adev->gds.oa.cs_partition_size = 1;
5087 } else {
5088 adev->gds.mem.gfx_partition_size = 1024;
5089 adev->gds.mem.cs_partition_size = 1024;
5090
5091 adev->gds.gws.gfx_partition_size = 16;
5092 adev->gds.gws.cs_partition_size = 16;
5093
5094 adev->gds.oa.gfx_partition_size = 4;
5095 adev->gds.oa.cs_partition_size = 4;
5096 }
5097} 5073}
5098 5074
5099 5075
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 02955e6e9dd9..25400b708722 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle)
2057 2057
2058static int gfx_v8_0_sw_fini(void *handle) 2058static int gfx_v8_0_sw_fini(void *handle)
2059{ 2059{
2060 int i;
2061 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2060 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2062 2061 int i;
2063 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2064 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2065 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2066 2062
2067 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2063 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2068 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2064 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7010static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7006static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7011{ 7007{
7012 /* init asci gds info */ 7008 /* init asci gds info */
7013 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7009 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7014 adev->gds.gws.total_size = 64; 7010 adev->gds.gws_size = 64;
7015 adev->gds.oa.total_size = 16; 7011 adev->gds.oa_size = 16;
7016 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); 7012 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7017
7018 if (adev->gds.mem.total_size == 64 * 1024) {
7019 adev->gds.mem.gfx_partition_size = 4096;
7020 adev->gds.mem.cs_partition_size = 4096;
7021
7022 adev->gds.gws.gfx_partition_size = 4;
7023 adev->gds.gws.cs_partition_size = 4;
7024
7025 adev->gds.oa.gfx_partition_size = 4;
7026 adev->gds.oa.cs_partition_size = 1;
7027 } else {
7028 adev->gds.mem.gfx_partition_size = 1024;
7029 adev->gds.mem.cs_partition_size = 1024;
7030
7031 adev->gds.gws.gfx_partition_size = 16;
7032 adev->gds.gws.cs_partition_size = 16;
7033
7034 adev->gds.oa.gfx_partition_size = 4;
7035 adev->gds.oa.cs_partition_size = 4;
7036 }
7037} 7013}
7038 7014
7039static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7015static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ba67d1023264..c763733619fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -34,6 +34,7 @@
34#include "vega10_enum.h" 34#include "vega10_enum.h"
35#include "hdp/hdp_4_0_offset.h" 35#include "hdp/hdp_4_0_offset.h"
36 36
37#include "soc15.h"
37#include "soc15_common.h" 38#include "soc15_common.h"
38#include "clearstate_gfx9.h" 39#include "clearstate_gfx9.h"
39#include "v9_structs.h" 40#include "v9_structs.h"
@@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
307{ 308{
308 switch (adev->asic_type) { 309 switch (adev->asic_type) {
309 case CHIP_VEGA10: 310 case CHIP_VEGA10:
310 soc15_program_register_sequence(adev, 311 if (!amdgpu_virt_support_skip_setting(adev)) {
311 golden_settings_gc_9_0, 312 soc15_program_register_sequence(adev,
312 ARRAY_SIZE(golden_settings_gc_9_0)); 313 golden_settings_gc_9_0,
313 soc15_program_register_sequence(adev, 314 ARRAY_SIZE(golden_settings_gc_9_0));
314 golden_settings_gc_9_0_vg10, 315 soc15_program_register_sequence(adev,
315 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 316 golden_settings_gc_9_0_vg10,
317 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
318 }
316 break; 319 break;
317 case CHIP_VEGA12: 320 case CHIP_VEGA12:
318 soc15_program_register_sequence(adev, 321 soc15_program_register_sequence(adev,
@@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1458 1461
1459 /* GDS reserve memory: 64 bytes alignment */ 1462 /* GDS reserve memory: 64 bytes alignment */
1460 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1463 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1461 adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; 1464 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1462 adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1463 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1465 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1464 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1466 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1465 1467
@@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1567 1569
1568 gfx_v9_0_write_data_to_reg(ring, 0, false, 1570 gfx_v9_0_write_data_to_reg(ring, 0, false,
1569 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1571 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1570 (adev->gds.mem.total_size + 1572 (adev->gds.gds_size +
1571 adev->gfx.ngg.gds_reserve_size)); 1573 adev->gfx.ngg.gds_reserve_size));
1572 1574
1573 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1575 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
@@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle)
1781 kfree(ras_if); 1783 kfree(ras_if);
1782 } 1784 }
1783 1785
1784 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1785 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1786 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1787
1788 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1786 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1789 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1787 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1790 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1788 for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
1834 else 1832 else
1835 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1833 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1836 1834
1837 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); 1835 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1838} 1836}
1839 1837
1840static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1838static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
@@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1902 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1900 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1903 soc15_grbm_select(adev, 0, 0, 0, i); 1901 soc15_grbm_select(adev, 0, 0, 0, i);
1904 /* CP and shaders */ 1902 /* CP and shaders */
1905 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1903 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1906 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1904 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1907 } 1905 }
1908 soc15_grbm_select(adev, 0, 0, 0, 0); 1906 soc15_grbm_select(adev, 0, 0, 0, 0);
1909 mutex_unlock(&adev->srbm_mutex); 1907 mutex_unlock(&adev->srbm_mutex);
@@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1914 u32 tmp; 1912 u32 tmp;
1915 int i; 1913 int i;
1916 1914
1917 WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1915 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1918 1916
1919 gfx_v9_0_tiling_mode_table_init(adev); 1917 gfx_v9_0_tiling_mode_table_init(adev);
1920 1918
@@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1957 */ 1955 */
1958 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1956 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1959 1957
1960 WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, 1958 WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1961 (adev->gfx.config.sc_prim_fifo_size_frontend << 1959 (adev->gfx.config.sc_prim_fifo_size_frontend <<
1962 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1960 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1963 (adev->gfx.config.sc_prim_fifo_size_backend << 1961 (adev->gfx.config.sc_prim_fifo_size_backend <<
@@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2024static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2022static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2025{ 2023{
2026 /* csib */ 2024 /* csib */
2027 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2025 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2028 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2026 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2029 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2027 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2030 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2028 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2031 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2029 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2032 adev->gfx.rlc.clear_state_size); 2030 adev->gfx.rlc.clear_state_size);
2033} 2031}
2034 2032
@@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2498 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2496 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2499 adev->gfx.gfx_ring[i].sched.ready = false; 2497 adev->gfx.gfx_ring[i].sched.ready = false;
2500 } 2498 }
2501 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 2499 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2502 udelay(50); 2500 udelay(50);
2503} 2501}
2504 2502
@@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2696 int i; 2694 int i;
2697 2695
2698 if (enable) { 2696 if (enable) {
2699 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); 2697 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2700 } else { 2698 } else {
2701 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 2699 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2702 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2700 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2703 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2701 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2704 adev->gfx.compute_ring[i].sched.ready = false; 2702 adev->gfx.compute_ring[i].sched.ready = false;
@@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2759 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2757 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2760 tmp &= 0xffffff00; 2758 tmp &= 0xffffff00;
2761 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2759 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2762 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2760 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2763 tmp |= 0x80; 2761 tmp |= 0x80;
2764 WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2762 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2765} 2763}
2766 2764
2767static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2765static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2979 /* disable wptr polling */ 2977 /* disable wptr polling */
2980 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2978 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2981 2979
2982 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2980 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2983 mqd->cp_hqd_eop_base_addr_lo); 2981 mqd->cp_hqd_eop_base_addr_lo);
2984 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2982 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2985 mqd->cp_hqd_eop_base_addr_hi); 2983 mqd->cp_hqd_eop_base_addr_hi);
2986 2984
2987 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2985 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2988 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, 2986 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2989 mqd->cp_hqd_eop_control); 2987 mqd->cp_hqd_eop_control);
2990 2988
2991 /* enable doorbell? */ 2989 /* enable doorbell? */
2992 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 2990 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2993 mqd->cp_hqd_pq_doorbell_control); 2991 mqd->cp_hqd_pq_doorbell_control);
2994 2992
2995 /* disable the queue if it's active */ 2993 /* disable the queue if it's active */
2996 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 2994 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2997 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 2995 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2998 for (j = 0; j < adev->usec_timeout; j++) { 2996 for (j = 0; j < adev->usec_timeout; j++) {
2999 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 2997 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3000 break; 2998 break;
3001 udelay(1); 2999 udelay(1);
3002 } 3000 }
3003 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3001 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3004 mqd->cp_hqd_dequeue_request); 3002 mqd->cp_hqd_dequeue_request);
3005 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 3003 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3006 mqd->cp_hqd_pq_rptr); 3004 mqd->cp_hqd_pq_rptr);
3007 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3005 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3008 mqd->cp_hqd_pq_wptr_lo); 3006 mqd->cp_hqd_pq_wptr_lo);
3009 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3007 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3010 mqd->cp_hqd_pq_wptr_hi); 3008 mqd->cp_hqd_pq_wptr_hi);
3011 } 3009 }
3012 3010
3013 /* set the pointer to the MQD */ 3011 /* set the pointer to the MQD */
3014 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, 3012 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3015 mqd->cp_mqd_base_addr_lo); 3013 mqd->cp_mqd_base_addr_lo);
3016 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3014 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3017 mqd->cp_mqd_base_addr_hi); 3015 mqd->cp_mqd_base_addr_hi);
3018 3016
3019 /* set MQD vmid to 0 */ 3017 /* set MQD vmid to 0 */
3020 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, 3018 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3021 mqd->cp_mqd_control); 3019 mqd->cp_mqd_control);
3022 3020
3023 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3021 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3024 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, 3022 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3025 mqd->cp_hqd_pq_base_lo); 3023 mqd->cp_hqd_pq_base_lo);
3026 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, 3024 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3027 mqd->cp_hqd_pq_base_hi); 3025 mqd->cp_hqd_pq_base_hi);
3028 3026
3029 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3027 /* set up the HQD, this is similar to CP_RB0_CNTL */
3030 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, 3028 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3031 mqd->cp_hqd_pq_control); 3029 mqd->cp_hqd_pq_control);
3032 3030
3033 /* set the wb address whether it's enabled or not */ 3031 /* set the wb address whether it's enabled or not */
3034 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3032 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3035 mqd->cp_hqd_pq_rptr_report_addr_lo); 3033 mqd->cp_hqd_pq_rptr_report_addr_lo);
3036 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3037 mqd->cp_hqd_pq_rptr_report_addr_hi); 3035 mqd->cp_hqd_pq_rptr_report_addr_hi);
3038 3036
3039 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3037 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3040 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3038 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3041 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3039 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3042 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3040 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3043 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3041 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3044 3042
3045 /* enable the doorbell if requested */ 3043 /* enable the doorbell if requested */
@@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3054 mqd->cp_hqd_pq_doorbell_control); 3052 mqd->cp_hqd_pq_doorbell_control);
3055 3053
3056 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3054 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3057 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3055 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3058 mqd->cp_hqd_pq_wptr_lo); 3056 mqd->cp_hqd_pq_wptr_lo);
3059 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3057 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3060 mqd->cp_hqd_pq_wptr_hi); 3058 mqd->cp_hqd_pq_wptr_hi);
3061 3059
3062 /* set the vmid for the queue */ 3060 /* set the vmid for the queue */
3063 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3061 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3064 3062
3065 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3063 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3066 mqd->cp_hqd_persistent_state); 3064 mqd->cp_hqd_persistent_state);
3067 3065
3068 /* activate the queue */ 3066 /* activate the queue */
3069 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3070 mqd->cp_hqd_active); 3068 mqd->cp_hqd_active);
3071 3069
3072 if (ring->use_doorbell) 3070 if (ring->use_doorbell)
@@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3083 /* disable the queue if it's active */ 3081 /* disable the queue if it's active */
3084 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3082 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3085 3083
3086 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3084 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3087 3085
3088 for (j = 0; j < adev->usec_timeout; j++) { 3086 for (j = 0; j < adev->usec_timeout; j++) {
3089 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3087 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
@@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3095 DRM_DEBUG("KIQ dequeue request failed.\n"); 3093 DRM_DEBUG("KIQ dequeue request failed.\n");
3096 3094
3097 /* Manual disable if dequeue request times out */ 3095 /* Manual disable if dequeue request times out */
3098 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); 3096 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3099 } 3097 }
3100 3098
3101 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3099 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3102 0); 3100 0);
3103 } 3101 }
3104 3102
3105 WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3103 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3106 WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3104 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3107 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3105 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3108 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3106 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3109 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3107 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3110 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3108 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3111 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3109 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3112 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3110 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3113 3111
3114 return 0; 3112 return 0;
3115} 3113}
@@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3529 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3527 (1 << (oa_size + oa_base)) - (1 << oa_base));
3530} 3528}
3531 3529
3530static const u32 vgpr_init_compute_shader[] =
3531{
3532 0xb07c0000, 0xbe8000ff,
3533 0x000000f8, 0xbf110800,
3534 0x7e000280, 0x7e020280,
3535 0x7e040280, 0x7e060280,
3536 0x7e080280, 0x7e0a0280,
3537 0x7e0c0280, 0x7e0e0280,
3538 0x80808800, 0xbe803200,
3539 0xbf84fff5, 0xbf9c0000,
3540 0xd28c0001, 0x0001007f,
3541 0xd28d0001, 0x0002027e,
3542 0x10020288, 0xb8810904,
3543 0xb7814000, 0xd1196a01,
3544 0x00000301, 0xbe800087,
3545 0xbefc00c1, 0xd89c4000,
3546 0x00020201, 0xd89cc080,
3547 0x00040401, 0x320202ff,
3548 0x00000800, 0x80808100,
3549 0xbf84fff8, 0x7e020280,
3550 0xbf810000, 0x00000000,
3551};
3552
3553static const u32 sgpr_init_compute_shader[] =
3554{
3555 0xb07c0000, 0xbe8000ff,
3556 0x0000005f, 0xbee50080,
3557 0xbe812c65, 0xbe822c65,
3558 0xbe832c65, 0xbe842c65,
3559 0xbe852c65, 0xb77c0005,
3560 0x80808500, 0xbf84fff8,
3561 0xbe800080, 0xbf810000,
3562};
3563
3564static const struct soc15_reg_entry vgpr_init_regs[] = {
3565 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3566 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3567 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3568 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3569 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3570 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3571 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3572 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3573 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3574 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
3575};
3576
3577static const struct soc15_reg_entry sgpr_init_regs[] = {
3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3587 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3588};
3589
3590static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3591 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3592 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3593 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3594 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3595 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3596 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3597 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3598 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3599 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3600 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3601 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3602 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3603 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3604 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3605 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3606 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3607 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3608 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3609 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3610 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3611 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3612 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3613 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3614 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3615 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3616 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3617 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3618 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3619 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3620 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3621 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3622};
3623
3624static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3625{
3626 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3627 struct amdgpu_ib ib;
3628 struct dma_fence *f = NULL;
3629 int r, i, j;
3630 unsigned total_size, vgpr_offset, sgpr_offset;
3631 u64 gpu_addr;
3632
3633 /* only support when RAS is enabled */
3634 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3635 return 0;
3636
3637 /* bail if the compute ring is not ready */
3638 if (!ring->sched.ready)
3639 return 0;
3640
3641 total_size =
3642 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3643 total_size +=
3644 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3645 total_size = ALIGN(total_size, 256);
3646 vgpr_offset = total_size;
3647 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3648 sgpr_offset = total_size;
3649 total_size += sizeof(sgpr_init_compute_shader);
3650
3651 /* allocate an indirect buffer to put the commands in */
3652 memset(&ib, 0, sizeof(ib));
3653 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3654 if (r) {
3655 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3656 return r;
3657 }
3658
3659 /* load the compute shaders */
3660 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3661 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3662
3663 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3664 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3665
3666 /* init the ib length to 0 */
3667 ib.length_dw = 0;
3668
3669 /* VGPR */
3670 /* write the register state for the compute dispatch */
3671 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3673 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3674 - PACKET3_SET_SH_REG_START;
3675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3676 }
3677 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3678 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3679 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3680 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3681 - PACKET3_SET_SH_REG_START;
3682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3684
3685 /* write dispatch packet */
3686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3687 ib.ptr[ib.length_dw++] = 128; /* x */
3688 ib.ptr[ib.length_dw++] = 1; /* y */
3689 ib.ptr[ib.length_dw++] = 1; /* z */
3690 ib.ptr[ib.length_dw++] =
3691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3692
3693 /* write CS partial flush packet */
3694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3696
3697 /* SGPR */
3698 /* write the register state for the compute dispatch */
3699 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3701 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3702 - PACKET3_SET_SH_REG_START;
3703 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3704 }
3705 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3706 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3708 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3709 - PACKET3_SET_SH_REG_START;
3710 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3711 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3712
3713 /* write dispatch packet */
3714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3715 ib.ptr[ib.length_dw++] = 128; /* x */
3716 ib.ptr[ib.length_dw++] = 1; /* y */
3717 ib.ptr[ib.length_dw++] = 1; /* z */
3718 ib.ptr[ib.length_dw++] =
3719 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3720
3721 /* write CS partial flush packet */
3722 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3723 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3724
3725 /* shedule the ib on the ring */
3726 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3727 if (r) {
3728 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3729 goto fail;
3730 }
3731
3732 /* wait for the GPU to finish processing the IB */
3733 r = dma_fence_wait(f, false);
3734 if (r) {
3735 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3736 goto fail;
3737 }
3738
3739 /* read back registers to clear the counters */
3740 mutex_lock(&adev->grbm_idx_mutex);
3741 for (j = 0; j < 16; j++) {
3742 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3743 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3744 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3745 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3746 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3747 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3748 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3749 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3750 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3751 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3752 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3753 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3754 }
3755 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3756 mutex_unlock(&adev->grbm_idx_mutex);
3757
3758fail:
3759 amdgpu_ib_free(adev, &ib, NULL);
3760 dma_fence_put(f);
3761
3762 return r;
3763}
3764
3532static int gfx_v9_0_early_init(void *handle) 3765static int gfx_v9_0_early_init(void *handle)
3533{ 3766{
3534 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3767 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
3570 return 0; 3803 return 0;
3571 } 3804 }
3572 3805
3573 if (*ras_if) 3806 /* requires IBs so do in late init after IB pool is initialized */
3807 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3808 if (r)
3809 return r;
3810
3811 /* handle resume path. */
3812 if (*ras_if) {
3813 /* resend ras TA enable cmd during resume.
3814 * prepare to handle failure.
3815 */
3816 ih_info.head = **ras_if;
3817 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3818 if (r) {
3819 if (r == -EAGAIN) {
3820 /* request a gpu reset. will run again. */
3821 amdgpu_ras_request_reset_on_boot(adev,
3822 AMDGPU_RAS_BLOCK__GFX);
3823 return 0;
3824 }
3825 /* fail to enable ras, cleanup all. */
3826 goto irq;
3827 }
3828 /* enable successfully. continue. */
3574 goto resume; 3829 goto resume;
3830 }
3575 3831
3576 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3832 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3577 if (!*ras_if) 3833 if (!*ras_if)
@@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
3580 **ras_if = ras_block; 3836 **ras_if = ras_block;
3581 3837
3582 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3838 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3583 if (r) 3839 if (r) {
3840 if (r == -EAGAIN) {
3841 amdgpu_ras_request_reset_on_boot(adev,
3842 AMDGPU_RAS_BLOCK__GFX);
3843 r = 0;
3844 }
3584 goto feature; 3845 goto feature;
3846 }
3585 3847
3586 ih_info.head = **ras_if; 3848 ih_info.head = **ras_if;
3587 fs_info.head = **ras_if; 3849 fs_info.head = **ras_if;
@@ -3614,7 +3876,7 @@ interrupt:
3614feature: 3876feature:
3615 kfree(*ras_if); 3877 kfree(*ras_if);
3616 *ras_if = NULL; 3878 *ras_if = NULL;
3617 return -EINVAL; 3879 return r;
3618} 3880}
3619 3881
3620static int gfx_v9_0_late_init(void *handle) 3882static int gfx_v9_0_late_init(void *handle)
@@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4319 mutex_lock(&adev->srbm_mutex); 4581 mutex_lock(&adev->srbm_mutex);
4320 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4582 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4321 4583
4322 WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4584 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4323 WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4585 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4324 4586
4325 soc15_grbm_select(adev, 0, 0, 0, 0); 4587 soc15_grbm_select(adev, 0, 0, 0, 0);
4326 mutex_unlock(&adev->srbm_mutex); 4588 mutex_unlock(&adev->srbm_mutex);
@@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5056 case CHIP_VEGA10: 5318 case CHIP_VEGA10:
5057 case CHIP_VEGA12: 5319 case CHIP_VEGA12:
5058 case CHIP_VEGA20: 5320 case CHIP_VEGA20:
5059 adev->gds.mem.total_size = 0x10000; 5321 adev->gds.gds_size = 0x10000;
5060 break; 5322 break;
5061 case CHIP_RAVEN: 5323 case CHIP_RAVEN:
5062 adev->gds.mem.total_size = 0x1000; 5324 adev->gds.gds_size = 0x1000;
5063 break; 5325 break;
5064 default: 5326 default:
5065 adev->gds.mem.total_size = 0x10000; 5327 adev->gds.gds_size = 0x10000;
5066 break; 5328 break;
5067 } 5329 }
5068 5330
@@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5086 break; 5348 break;
5087 } 5349 }
5088 5350
5089 adev->gds.gws.total_size = 64; 5351 adev->gds.gws_size = 64;
5090 adev->gds.oa.total_size = 16; 5352 adev->gds.oa_size = 16;
5091
5092 if (adev->gds.mem.total_size == 64 * 1024) {
5093 adev->gds.mem.gfx_partition_size = 4096;
5094 adev->gds.mem.cs_partition_size = 4096;
5095
5096 adev->gds.gws.gfx_partition_size = 4;
5097 adev->gds.gws.cs_partition_size = 4;
5098
5099 adev->gds.oa.gfx_partition_size = 4;
5100 adev->gds.oa.cs_partition_size = 1;
5101 } else {
5102 adev->gds.mem.gfx_partition_size = 1024;
5103 adev->gds.mem.cs_partition_size = 1024;
5104
5105 adev->gds.gws.gfx_partition_size = 16;
5106 adev->gds.gws.cs_partition_size = 16;
5107
5108 adev->gds.oa.gfx_partition_size = 4;
5109 adev->gds.oa.cs_partition_size = 4;
5110 }
5111} 5353}
5112 5354
5113static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5355static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 7bb5359d0bbd..0dc8926111e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
71 uint64_t value; 71 uint64_t value;
72 72
73 /* Program the AGP BAR */ 73 /* Program the AGP BAR */
74 WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); 74 WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
75 WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); 75 WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
76 WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); 76 WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
77 77
78 /* Program the system aperture low logical page number. */ 78 /* Program the system aperture low logical page number. */
79 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 79 WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
80 min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); 80 min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
81 81
82 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) 82 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
@@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
86 * workaround that increase system aperture high address (add 1) 86 * workaround that increase system aperture high address (add 1)
87 * to get rid of the VM fault and hardware hang. 87 * to get rid of the VM fault and hardware hang.
88 */ 88 */
89 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 89 WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
90 max((adev->gmc.fb_end >> 18) + 0x1, 90 max((adev->gmc.fb_end >> 18) + 0x1,
91 adev->gmc.agp_end >> 18)); 91 adev->gmc.agp_end >> 18));
92 else 92 else
93 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 93 WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
94 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); 94 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
95 95
96 /* Set default page address. */ 96 /* Set default page address. */
@@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
129 MTYPE, MTYPE_UC);/* XXX for emulation. */ 129 MTYPE, MTYPE_UC);/* XXX for emulation. */
130 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); 130 tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
131 131
132 WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); 132 WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
133} 133}
134 134
135static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) 135static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
@@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
267 * VF copy registers so vbios post doesn't program them, for 267 * VF copy registers so vbios post doesn't program them, for
268 * SRIOV driver need to program them 268 * SRIOV driver need to program them
269 */ 269 */
270 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 270 WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
271 adev->gmc.vram_start >> 24); 271 adev->gmc.vram_start >> 24);
272 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 272 WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
273 adev->gmc.vram_end >> 24); 273 adev->gmc.vram_end >> 24);
274 } 274 }
275 275
@@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
303 MC_VM_MX_L1_TLB_CNTL, 303 MC_VM_MX_L1_TLB_CNTL,
304 ENABLE_ADVANCED_DRIVER_MODEL, 304 ENABLE_ADVANCED_DRIVER_MODEL,
305 0); 305 0);
306 WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); 306 WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
307 307
308 /* Setup L2 cache */ 308 /* Setup L2 cache */
309 WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 309 WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8a3b5e6fc6c9..8bf2ba310fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -289,7 +289,7 @@ out:
289 * 289 *
290 * @adev: amdgpu_device pointer 290 * @adev: amdgpu_device pointer
291 * 291 *
292 * Load the GDDR MC ucode into the hw (CIK). 292 * Load the GDDR MC ucode into the hw (VI).
293 * Returns 0 on success, error on failure. 293 * Returns 0 on success, error on failure.
294 */ 294 */
295static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) 295static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
@@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
443 * @adev: amdgpu_device pointer 443 * @adev: amdgpu_device pointer
444 * 444 *
445 * Set the location of vram, gart, and AGP in the GPU's 445 * Set the location of vram, gart, and AGP in the GPU's
446 * physical address space (CIK). 446 * physical address space (VI).
447 */ 447 */
448static void gmc_v8_0_mc_program(struct amdgpu_device *adev) 448static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
449{ 449{
@@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
515 * @adev: amdgpu_device pointer 515 * @adev: amdgpu_device pointer
516 * 516 *
517 * Look up the amount of vram, vram width, and decide how to place 517 * Look up the amount of vram, vram width, and decide how to place
518 * vram and gart within the GPU's physical address space (CIK). 518 * vram and gart within the GPU's physical address space (VI).
519 * Returns 0 for success. 519 * Returns 0 for success.
520 */ 520 */
521static int gmc_v8_0_mc_init(struct amdgpu_device *adev) 521static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
@@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
630 * @adev: amdgpu_device pointer 630 * @adev: amdgpu_device pointer
631 * @vmid: vm instance to flush 631 * @vmid: vm instance to flush
632 * 632 *
633 * Flush the TLB for the requested page table (CIK). 633 * Flush the TLB for the requested page table (VI).
634 */ 634 */
635static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, 635static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
636 uint32_t vmid, uint32_t flush_type) 636 uint32_t vmid, uint32_t flush_type)
@@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
800 * This sets up the TLBs, programs the page tables for VMID0, 800 * This sets up the TLBs, programs the page tables for VMID0,
801 * sets up the hw for VMIDs 1-15 which are allocated on 801 * sets up the hw for VMIDs 1-15 which are allocated on
802 * demand, and sets up the global locations for the LDS, GDS, 802 * demand, and sets up the global locations for the LDS, GDS,
803 * and GPUVM for FSA64 clients (CIK). 803 * and GPUVM for FSA64 clients (VI).
804 * Returns 0 for success, errors for failure. 804 * Returns 0 for success, errors for failure.
805 */ 805 */
806static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) 806static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
@@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
948 * 948 *
949 * @adev: amdgpu_device pointer 949 * @adev: amdgpu_device pointer
950 * 950 *
951 * This disables all VM page table (CIK). 951 * This disables all VM page table (VI).
952 */ 952 */
953static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) 953static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
954{ 954{
@@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
978 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value 978 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
979 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value 979 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
980 * 980 *
981 * Print human readable fault information (CIK). 981 * Print human readable fault information (VI).
982 */ 982 */
983static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, 983static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
984 u32 addr, u32 mc_client, unsigned pasid) 984 u32 addr, u32 mc_client, unsigned pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3b7370d914a5..602593bab7a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
687 return 0; 687 return 0;
688 } 688 }
689 /* handle resume path. */ 689 /* handle resume path. */
690 if (*ras_if) 690 if (*ras_if) {
691 /* resend ras TA enable cmd during resume.
692 * prepare to handle failure.
693 */
694 ih_info.head = **ras_if;
695 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
696 if (r) {
697 if (r == -EAGAIN) {
698 /* request a gpu reset. will run again. */
699 amdgpu_ras_request_reset_on_boot(adev,
700 AMDGPU_RAS_BLOCK__UMC);
701 return 0;
702 }
703 /* fail to enable ras, cleanup all. */
704 goto irq;
705 }
706 /* enable successfully. continue. */
691 goto resume; 707 goto resume;
708 }
692 709
693 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 710 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
694 if (!*ras_if) 711 if (!*ras_if)
@@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
697 **ras_if = ras_block; 714 **ras_if = ras_block;
698 715
699 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 716 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
700 if (r) 717 if (r) {
718 if (r == -EAGAIN) {
719 amdgpu_ras_request_reset_on_boot(adev,
720 AMDGPU_RAS_BLOCK__UMC);
721 r = 0;
722 }
701 goto feature; 723 goto feature;
724 }
702 725
703 ih_info.head = **ras_if; 726 ih_info.head = **ras_if;
704 fs_info.head = **ras_if; 727 fs_info.head = **ras_if;
@@ -731,7 +754,7 @@ interrupt:
731feature: 754feature:
732 kfree(*ras_if); 755 kfree(*ras_if);
733 *ras_if = NULL; 756 *ras_if = NULL;
734 return -EINVAL; 757 return r;
735} 758}
736 759
737 760
@@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1100 1123
1101 switch (adev->asic_type) { 1124 switch (adev->asic_type) {
1102 case CHIP_VEGA10: 1125 case CHIP_VEGA10:
1126 if (amdgpu_virt_support_skip_setting(adev))
1127 break;
1128 /* fall through */
1103 case CHIP_VEGA20: 1129 case CHIP_VEGA20:
1104 soc15_program_register_sequence(adev, 1130 soc15_program_register_sequence(adev,
1105 golden_settings_mmhub_1_0_0, 1131 golden_settings_mmhub_1_0_0,
@@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1164 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); 1190 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1165 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 1191 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1166 1192
1193 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
1194 WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
1195
1167 /* After HDP is initialized, flush HDP.*/ 1196 /* After HDP is initialized, flush HDP.*/
1168 adev->nbio_funcs->hdp_flush(adev, NULL); 1197 adev->nbio_funcs->hdp_flush(adev, NULL);
1169 1198
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 41a9a5779623..05d1d448c8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
111 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 111 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
112 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); 112 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
113 113
114 if (amdgpu_virt_support_skip_setting(adev))
115 return;
116
114 /* Set default page address. */ 117 /* Set default page address. */
115 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + 118 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
116 adev->vm_manager.vram_base_offset; 119 adev->vm_manager.vram_base_offset;
@@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
156{ 159{
157 uint32_t tmp; 160 uint32_t tmp;
158 161
162 if (amdgpu_virt_support_skip_setting(adev))
163 return;
164
159 /* Setup L2 cache */ 165 /* Setup L2 cache */
160 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); 166 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
161 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); 167 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
202 208
203static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) 209static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
204{ 210{
211 if (amdgpu_virt_support_skip_setting(adev))
212 return;
213
205 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 214 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
206 0XFFFFFFFF); 215 0XFFFFFFFF);
207 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, 216 WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
338 0); 347 0);
339 WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); 348 WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
340 349
341 /* Setup L2 cache */ 350 if (!amdgpu_virt_support_skip_setting(adev)) {
342 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); 351 /* Setup L2 cache */
343 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); 352 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
344 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); 353 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
345 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); 354 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
355 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
356 }
346} 357}
347 358
348/** 359/**
@@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
354void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) 365void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
355{ 366{
356 u32 tmp; 367 u32 tmp;
368
369 if (amdgpu_virt_support_skip_setting(adev))
370 return;
371
357 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); 372 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
358 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 373 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
359 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 374 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2471e7cf75ea..31030f86be86 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -26,6 +26,7 @@
26#include "nbio/nbio_6_1_sh_mask.h" 26#include "nbio/nbio_6_1_sh_mask.h"
27#include "gc/gc_9_0_offset.h" 27#include "gc/gc_9_0_offset.h"
28#include "gc/gc_9_0_sh_mask.h" 28#include "gc/gc_9_0_sh_mask.h"
29#include "mp/mp_9_0_offset.h"
29#include "soc15.h" 30#include "soc15.h"
30#include "vega10_ih.h" 31#include "vega10_ih.h"
31#include "soc15_common.h" 32#include "soc15_common.h"
@@ -343,7 +344,7 @@ flr_done:
343 344
344 /* Trigger recovery for world switch failure if no TDR */ 345 /* Trigger recovery for world switch failure if no TDR */
345 if (amdgpu_device_should_recover_gpu(adev) 346 if (amdgpu_device_should_recover_gpu(adev)
346 && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) 347 && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
347 amdgpu_device_gpu_recover(adev, NULL); 348 amdgpu_device_gpu_recover(adev, NULL);
348} 349}
349 350
@@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
448 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 449 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
449} 450}
450 451
452static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
453{
454 uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
455 uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
456
457 adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
458
459 if (rlc_fw_ver >= 0x5d)
460 adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
461
462 if (sos_fw_ver >= 0x80455)
463 adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
464
465 if (sos_fw_ver >= 0x8045b)
466 adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
467}
468
451const struct amdgpu_virt_ops xgpu_ai_virt_ops = { 469const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
452 .req_full_gpu = xgpu_ai_request_full_gpu_access, 470 .req_full_gpu = xgpu_ai_request_full_gpu_access,
453 .rel_full_gpu = xgpu_ai_release_full_gpu_access, 471 .rel_full_gpu = xgpu_ai_release_full_gpu_access,
@@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
456 .trans_msg = xgpu_ai_mailbox_trans_msg, 474 .trans_msg = xgpu_ai_mailbox_trans_msg,
457 .get_pp_clk = xgpu_ai_get_pp_clk, 475 .get_pp_clk = xgpu_ai_get_pp_clk,
458 .force_dpm_level = xgpu_ai_force_dpm_level, 476 .force_dpm_level = xgpu_ai_force_dpm_level,
477 .init_reg_access_mode = xgpu_ai_init_reg_access_mode,
459}; 478};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1cdb98ad2db3..73419fa38159 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -29,9 +29,18 @@
29#include "nbio/nbio_7_0_sh_mask.h" 29#include "nbio/nbio_7_0_sh_mask.h"
30#include "nbio/nbio_7_0_smn.h" 30#include "nbio/nbio_7_0_smn.h"
31#include "vega10_enum.h" 31#include "vega10_enum.h"
32#include <uapi/linux/kfd_ioctl.h>
32 33
33#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c 34#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
34 35
36static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
37{
38 WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
39 adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
40 WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
41 adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
42}
43
35static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) 44static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
36{ 45{
37 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); 46 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
55 struct amdgpu_ring *ring) 64 struct amdgpu_ring *ring)
56{ 65{
57 if (!ring || !ring->funcs->emit_wreg) 66 if (!ring || !ring->funcs->emit_wreg)
58 WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); 67 WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
59 else 68 else
60 amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( 69 amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
61 NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
62} 70}
63 71
64static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) 72static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
283 .ih_control = nbio_v7_0_ih_control, 291 .ih_control = nbio_v7_0_ih_control,
284 .init_registers = nbio_v7_0_init_registers, 292 .init_registers = nbio_v7_0_init_registers,
285 .detect_hw_virt = nbio_v7_0_detect_hw_virt, 293 .detect_hw_virt = nbio_v7_0_detect_hw_virt,
294 .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
286}; 295};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c69d51598cfe..bfaaa327ae3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -27,9 +27,18 @@
27#include "nbio/nbio_7_4_offset.h" 27#include "nbio/nbio_7_4_offset.h"
28#include "nbio/nbio_7_4_sh_mask.h" 28#include "nbio/nbio_7_4_sh_mask.h"
29#include "nbio/nbio_7_4_0_smn.h" 29#include "nbio/nbio_7_4_0_smn.h"
30#include <uapi/linux/kfd_ioctl.h>
30 31
31#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c 32#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
32 33
34static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
35{
36 WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
37 adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
38 WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
39 adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
40}
41
33static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) 42static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
34{ 43{
35 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); 44 u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
53 struct amdgpu_ring *ring) 62 struct amdgpu_ring *ring)
54{ 63{
55 if (!ring || !ring->funcs->emit_wreg) 64 if (!ring || !ring->funcs->emit_wreg)
56 WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); 65 WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
57 else 66 else
58 amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( 67 amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
59 NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
60} 68}
61 69
62static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) 70static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
262 .ih_control = nbio_v7_4_ih_control, 270 .ih_control = nbio_v7_4_ih_control,
263 .init_registers = nbio_v7_4_init_registers, 271 .init_registers = nbio_v7_4_init_registers,
264 .detect_hw_virt = nbio_v7_4_detect_hw_virt, 272 .detect_hw_virt = nbio_v7_4_detect_hw_virt,
273 .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
265}; 274};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 2f79765b4bdb..7f8edc66ddff 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
94 GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ 94 GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
95 GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ 95 GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
96 GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ 96 GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
97 GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
97}; 98};
98 99
99 100
@@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
217 enum psp_gfx_fw_type fw_type; /* FW type */ 218 enum psp_gfx_fw_type fw_type; /* FW type */
218}; 219};
219 220
221/* Command to setup register program */
222struct psp_gfx_cmd_reg_prog {
223 uint32_t reg_value;
224 uint32_t reg_id;
225};
226
220/* All GFX ring buffer commands. */ 227/* All GFX ring buffer commands. */
221union psp_gfx_commands 228union psp_gfx_commands
222{ 229{
@@ -226,6 +233,7 @@ union psp_gfx_commands
226 struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; 233 struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
227 struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; 234 struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
228 struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; 235 struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
236 struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
229}; 237};
230 238
231 239
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 143f0fae69d5..3f5827764df0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
50 50
51static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; 51static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
52 52
53static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
54static int psp_v3_1_ring_stop(struct psp_context *psp,
55 enum psp_ring_type ring_type);
56
53static int psp_v3_1_init_microcode(struct psp_context *psp) 57static int psp_v3_1_init_microcode(struct psp_context *psp)
54{ 58{
55 struct amdgpu_device *adev = psp->adev; 59 struct amdgpu_device *adev = psp->adev;
@@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
296 300
297 psp_v3_1_reroute_ih(psp); 301 psp_v3_1_reroute_ih(psp);
298 302
299 /* Write low address of the ring to C2PMSG_69 */ 303 if (psp_v3_1_support_vmr_ring(psp)) {
300 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); 304 ret = psp_v3_1_ring_stop(psp, ring_type);
301 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); 305 if (ret) {
302 /* Write high address of the ring to C2PMSG_70 */ 306 DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
303 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); 307 return ret;
304 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); 308 }
305 /* Write size of ring to C2PMSG_71 */ 309
306 psp_ring_reg = ring->ring_size; 310 /* Write low address of the ring to C2PMSG_102 */
307 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); 311 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
308 /* Write the ring initialization command to C2PMSG_64 */ 312 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
309 psp_ring_reg = ring_type; 313 /* Write high address of the ring to C2PMSG_103 */
310 psp_ring_reg = psp_ring_reg << 16; 314 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
311 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); 315 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
312 316 /* No size initialization for sriov */
313 /* there might be handshake issue with hardware which needs delay */ 317 /* Write the ring initialization command to C2PMSG_101 */
314 mdelay(20); 318 psp_ring_reg = ring_type;
315 319 psp_ring_reg = psp_ring_reg << 16;
316 /* Wait for response flag (bit 31) in C2PMSG_64 */ 320 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
317 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 321
318 0x80000000, 0x8000FFFF, false); 322 /* there might be hardware handshake issue which needs delay */
323 mdelay(20);
324
325 /* Wait for response flag (bit 31) in C2PMSG_101 */
326 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
327 mmMP0_SMN_C2PMSG_101), 0x80000000,
328 0x8000FFFF, false);
329 } else {
330
331 /* Write low address of the ring to C2PMSG_69 */
332 psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
333 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
334 /* Write high address of the ring to C2PMSG_70 */
335 psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
336 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
337 /* Write size of ring to C2PMSG_71 */
338 psp_ring_reg = ring->ring_size;
339 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
340 /* Write the ring initialization command to C2PMSG_64 */
341 psp_ring_reg = ring_type;
342 psp_ring_reg = psp_ring_reg << 16;
343 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
344
345 /* there might be hardware handshake issue which needs delay */
346 mdelay(20);
347
348 /* Wait for response flag (bit 31) in C2PMSG_64 */
349 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
350 mmMP0_SMN_C2PMSG_64), 0x80000000,
351 0x8000FFFF, false);
319 352
353 }
320 return ret; 354 return ret;
321} 355}
322 356
@@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
327 unsigned int psp_ring_reg = 0; 361 unsigned int psp_ring_reg = 0;
328 struct amdgpu_device *adev = psp->adev; 362 struct amdgpu_device *adev = psp->adev;
329 363
330 /* Write the ring destroy command to C2PMSG_64 */ 364 if (psp_v3_1_support_vmr_ring(psp)) {
331 psp_ring_reg = 3 << 16; 365 /* Write the Destroy GPCOM ring command to C2PMSG_101 */
332 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); 366 psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
333 367 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
334 /* there might be handshake issue with hardware which needs delay */ 368
335 mdelay(20); 369 /* there might be handshake issue which needs delay */
336 370 mdelay(20);
337 /* Wait for response flag (bit 31) in C2PMSG_64 */ 371
338 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 372 /* Wait for response flag (bit 31) in C2PMSG_101 */
339 0x80000000, 0x80000000, false); 373 ret = psp_wait_for(psp,
374 SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
375 0x80000000, 0x80000000, false);
376 } else {
377 /* Write the ring destroy command to C2PMSG_64 */
378 psp_ring_reg = 3 << 16;
379 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
380
381 /* there might be handshake issue which needs delay */
382 mdelay(20);
383
384 /* Wait for response flag (bit 31) in C2PMSG_64 */
385 ret = psp_wait_for(psp,
386 SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
387 0x80000000, 0x80000000, false);
388 }
340 389
341 return ret; 390 return ret;
342} 391}
@@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
375 uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; 424 uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
376 425
377 /* KM (GPCOM) prepare write pointer */ 426 /* KM (GPCOM) prepare write pointer */
378 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); 427 if (psp_v3_1_support_vmr_ring(psp))
428 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
429 else
430 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
379 431
380 /* Update KM RB frame pointer to new frame */ 432 /* Update KM RB frame pointer to new frame */
381 /* write_frame ptr increments by size of rb_frame in bytes */ 433 /* write_frame ptr increments by size of rb_frame in bytes */
@@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
404 456
405 /* Update the write Pointer in DWORDs */ 457 /* Update the write Pointer in DWORDs */
406 psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; 458 psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
407 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); 459 if (psp_v3_1_support_vmr_ring(psp)) {
460 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
461 /* send interrupt to PSP for SRIOV ring write pointer update */
462 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
463 GFX_CTRL_CMD_ID_CONSUME_CMD);
464 } else
465 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
408 466
409 return 0; 467 return 0;
410} 468}
@@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
574 return 0; 632 return 0;
575} 633}
576 634
635static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
636{
637 if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
638 return true;
639
640 return false;
641}
642
577static const struct psp_funcs psp_v3_1_funcs = { 643static const struct psp_funcs psp_v3_1_funcs = {
578 .init_microcode = psp_v3_1_init_microcode, 644 .init_microcode = psp_v3_1_init_microcode,
579 .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, 645 .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
586 .compare_sram_data = psp_v3_1_compare_sram_data, 652 .compare_sram_data = psp_v3_1_compare_sram_data,
587 .smu_reload_quirk = psp_v3_1_smu_reload_quirk, 653 .smu_reload_quirk = psp_v3_1_smu_reload_quirk,
588 .mode1_reset = psp_v3_1_mode1_reset, 654 .mode1_reset = psp_v3_1_mode1_reset,
655 .support_vmr_ring = psp_v3_1_support_vmr_ring,
589}; 656};
590 657
591void psp_v3_1_set_psp_funcs(struct psp_context *psp) 658void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9c88ce513d78..7a259c5b6c62 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
210{ 210{
211 switch (adev->asic_type) { 211 switch (adev->asic_type) {
212 case CHIP_VEGA10: 212 case CHIP_VEGA10:
213 soc15_program_register_sequence(adev, 213 if (!amdgpu_virt_support_skip_setting(adev)) {
214 golden_settings_sdma_4, 214 soc15_program_register_sequence(adev,
215 ARRAY_SIZE(golden_settings_sdma_4)); 215 golden_settings_sdma_4,
216 soc15_program_register_sequence(adev, 216 ARRAY_SIZE(golden_settings_sdma_4));
217 golden_settings_sdma_vg10, 217 soc15_program_register_sequence(adev,
218 ARRAY_SIZE(golden_settings_sdma_vg10)); 218 golden_settings_sdma_vg10,
219 ARRAY_SIZE(golden_settings_sdma_vg10));
220 }
219 break; 221 break;
220 case CHIP_VEGA12: 222 case CHIP_VEGA12:
221 soc15_program_register_sequence(adev, 223 soc15_program_register_sequence(adev,
@@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle)
1521 } 1523 }
1522 1524
1523 /* handle resume path. */ 1525 /* handle resume path. */
1524 if (*ras_if) 1526 if (*ras_if) {
1527 /* resend ras TA enable cmd during resume.
1528 * prepare to handle failure.
1529 */
1530 ih_info.head = **ras_if;
1531 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
1532 if (r) {
1533 if (r == -EAGAIN) {
1534 /* request a gpu reset. will run again. */
1535 amdgpu_ras_request_reset_on_boot(adev,
1536 AMDGPU_RAS_BLOCK__SDMA);
1537 return 0;
1538 }
1539 /* fail to enable ras, cleanup all. */
1540 goto irq;
1541 }
1542 /* enable successfully. continue. */
1525 goto resume; 1543 goto resume;
1544 }
1526 1545
1527 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 1546 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
1528 if (!*ras_if) 1547 if (!*ras_if)
@@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle)
1531 **ras_if = ras_block; 1550 **ras_if = ras_block;
1532 1551
1533 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 1552 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
1534 if (r) 1553 if (r) {
1554 if (r == -EAGAIN) {
1555 amdgpu_ras_request_reset_on_boot(adev,
1556 AMDGPU_RAS_BLOCK__SDMA);
1557 r = 0;
1558 }
1535 goto feature; 1559 goto feature;
1560 }
1536 1561
1537 ih_info.head = **ras_if; 1562 ih_info.head = **ras_if;
1538 fs_info.head = **ras_if; 1563 fs_info.head = **ras_if;
@@ -1571,7 +1596,7 @@ interrupt:
1571feature: 1596feature:
1572 kfree(*ras_if); 1597 kfree(*ras_if);
1573 *ras_if = NULL; 1598 *ras_if = NULL;
1574 return -EINVAL; 1599 return r;
1575} 1600}
1576 1601
1577static int sdma_v4_0_sw_init(void *handle) 1602static int sdma_v4_0_sw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 9d8df68893b9..4ff930a47e10 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
1375 *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); 1375 *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
1376} 1376}
1377 1377
1378static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
1379{
1380 uint64_t nak_r, nak_g;
1381
1382 /* Get the number of NAKs received and generated */
1383 nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
1384 nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
1385
1386 /* Add the total number of NAKs, i.e the number of replays */
1387 return (nak_r + nak_g);
1388}
1389
1378static const struct amdgpu_asic_funcs si_asic_funcs = 1390static const struct amdgpu_asic_funcs si_asic_funcs =
1379{ 1391{
1380 .read_disabled_bios = &si_read_disabled_bios, 1392 .read_disabled_bios = &si_read_disabled_bios,
@@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
1393 .need_full_reset = &si_need_full_reset, 1405 .need_full_reset = &si_need_full_reset,
1394 .get_pcie_usage = &si_get_pcie_usage, 1406 .get_pcie_usage = &si_get_pcie_usage,
1395 .need_reset_on_init = &si_need_reset_on_init, 1407 .need_reset_on_init = &si_need_reset_on_init,
1408 .get_pcie_replay_count = &si_get_pcie_replay_count,
1396}; 1409};
1397 1410
1398static uint32_t si_get_rev_id(struct amdgpu_device *adev) 1411static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index b7e594c2bfb4..d9fdd95fd6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -44,6 +44,7 @@
44#include "smuio/smuio_9_0_offset.h" 44#include "smuio/smuio_9_0_offset.h"
45#include "smuio/smuio_9_0_sh_mask.h" 45#include "smuio/smuio_9_0_sh_mask.h"
46#include "nbio/nbio_7_0_default.h" 46#include "nbio/nbio_7_0_default.h"
47#include "nbio/nbio_7_0_offset.h"
47#include "nbio/nbio_7_0_sh_mask.h" 48#include "nbio/nbio_7_0_sh_mask.h"
48#include "nbio/nbio_7_0_smn.h" 49#include "nbio/nbio_7_0_smn.h"
49#include "mp/mp_9_0_offset.h" 50#include "mp/mp_9_0_offset.h"
@@ -64,6 +65,9 @@
64#include "dce_virtual.h" 65#include "dce_virtual.h"
65#include "mxgpu_ai.h" 66#include "mxgpu_ai.h"
66#include "amdgpu_smu.h" 67#include "amdgpu_smu.h"
68#include "amdgpu_ras.h"
69#include "amdgpu_xgmi.h"
70#include <uapi/linux/kfd_ioctl.h>
67 71
68#define mmMP0_MISC_CGTT_CTRL0 0x01b9 72#define mmMP0_MISC_CGTT_CTRL0 0x01b9
69#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 73#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
@@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
230 grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); 234 grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
231 grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); 235 grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
232 236
233 WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); 237 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
234} 238}
235 239
236static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) 240static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
@@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
385 tmp &= ~(entry->and_mask); 389 tmp &= ~(entry->and_mask);
386 tmp |= entry->or_mask; 390 tmp |= entry->or_mask;
387 } 391 }
388 WREG32(reg, tmp); 392
393 if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
394 reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
395 reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
396 reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
397 WREG32_RLC(reg, tmp);
398 else
399 WREG32(reg, tmp);
400
389 } 401 }
390 402
391} 403}
@@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
475 soc15_asic_get_baco_capability(adev, &baco_reset); 487 soc15_asic_get_baco_capability(adev, &baco_reset);
476 else 488 else
477 baco_reset = false; 489 baco_reset = false;
490 if (baco_reset) {
491 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
492 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
493
494 if (hive || (ras && ras->supported))
495 baco_reset = false;
496 }
478 break; 497 break;
479 default: 498 default:
480 baco_reset = false; 499 baco_reset = false;
@@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
606 case CHIP_VEGA20: 625 case CHIP_VEGA20:
607 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); 626 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
608 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); 627 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
609 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); 628
610 if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 629 /* For Vega10 SR-IOV, PSP need to be initialized before IH */
611 if (adev->asic_type == CHIP_VEGA20) 630 if (amdgpu_sriov_vf(adev)) {
612 amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); 631 if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
613 else 632 if (adev->asic_type == CHIP_VEGA20)
614 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); 633 amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
634 else
635 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
636 }
637 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
638 } else {
639 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
640 if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
641 if (adev->asic_type == CHIP_VEGA20)
642 amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
643 else
644 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
645 }
615 } 646 }
616 amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); 647 amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
617 amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); 648 amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
@@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
733 /* Just return false for soc15 GPUs. Reset does not seem to 764 /* Just return false for soc15 GPUs. Reset does not seem to
734 * be necessary. 765 * be necessary.
735 */ 766 */
736 return false; 767 if (!amdgpu_passthrough(adev))
768 return false;
737 769
738 if (adev->flags & AMD_IS_APU) 770 if (adev->flags & AMD_IS_APU)
739 return false; 771 return false;
@@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
748 return false; 780 return false;
749} 781}
750 782
783static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
784{
785 uint64_t nak_r, nak_g;
786
787 /* Get the number of NAKs received and generated */
788 nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
789 nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
790
791 /* Add the total number of NAKs, i.e the number of replays */
792 return (nak_r + nak_g);
793}
794
751static const struct amdgpu_asic_funcs soc15_asic_funcs = 795static const struct amdgpu_asic_funcs soc15_asic_funcs =
752{ 796{
753 .read_disabled_bios = &soc15_read_disabled_bios, 797 .read_disabled_bios = &soc15_read_disabled_bios,
@@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
765 .init_doorbell_index = &vega10_doorbell_index_init, 809 .init_doorbell_index = &vega10_doorbell_index_init,
766 .get_pcie_usage = &soc15_get_pcie_usage, 810 .get_pcie_usage = &soc15_get_pcie_usage,
767 .need_reset_on_init = &soc15_need_reset_on_init, 811 .need_reset_on_init = &soc15_need_reset_on_init,
812 .get_pcie_replay_count = &soc15_get_pcie_replay_count,
768}; 813};
769 814
770static const struct amdgpu_asic_funcs vega20_asic_funcs = 815static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
784 .init_doorbell_index = &vega20_doorbell_index_init, 829 .init_doorbell_index = &vega20_doorbell_index_init,
785 .get_pcie_usage = &soc15_get_pcie_usage, 830 .get_pcie_usage = &soc15_get_pcie_usage,
786 .need_reset_on_init = &soc15_need_reset_on_init, 831 .need_reset_on_init = &soc15_need_reset_on_init,
832 .get_pcie_replay_count = &soc15_get_pcie_replay_count,
787}; 833};
788 834
789static int soc15_common_early_init(void *handle) 835static int soc15_common_early_init(void *handle)
790{ 836{
837#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
791 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 838 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
792 839
840 adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
841 adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
793 adev->smc_rreg = NULL; 842 adev->smc_rreg = NULL;
794 adev->smc_wreg = NULL; 843 adev->smc_wreg = NULL;
795 adev->pcie_rreg = &soc15_pcie_rreg; 844 adev->pcie_rreg = &soc15_pcie_rreg;
@@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
998 int i; 1047 int i;
999 struct amdgpu_ring *ring; 1048 struct amdgpu_ring *ring;
1000 1049
1001 for (i = 0; i < adev->sdma.num_instances; i++) { 1050 /* Two reasons to skip
1002 ring = &adev->sdma.instance[i].ring; 1051 * 1, Host driver already programmed them
1003 adev->nbio_funcs->sdma_doorbell_range(adev, i, 1052 * 2, To avoid registers program violations in SR-IOV
1004 ring->use_doorbell, ring->doorbell_index, 1053 */
1005 adev->doorbell_index.sdma_doorbell_range); 1054 if (!amdgpu_virt_support_skip_setting(adev)) {
1055 for (i = 0; i < adev->sdma.num_instances; i++) {
1056 ring = &adev->sdma.instance[i].ring;
1057 adev->nbio_funcs->sdma_doorbell_range(adev, i,
1058 ring->use_doorbell, ring->doorbell_index,
1059 adev->doorbell_index.sdma_doorbell_range);
1060 }
1006 } 1061 }
1007 1062
1008 adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, 1063 adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
@@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle)
1019 soc15_program_aspm(adev); 1074 soc15_program_aspm(adev);
1020 /* setup nbio registers */ 1075 /* setup nbio registers */
1021 adev->nbio_funcs->init_registers(adev); 1076 adev->nbio_funcs->init_registers(adev);
1077 /* remap HDP registers to a hole in mmio space,
1078 * for the purpose of expose those registers
1079 * to process space
1080 */
1081 if (adev->nbio_funcs->remap_hdp_registers)
1082 adev->nbio_funcs->remap_hdp_registers(adev);
1022 /* enable the doorbell aperture */ 1083 /* enable the doorbell aperture */
1023 soc15_enable_doorbell_aperture(adev, true); 1084 soc15_enable_doorbell_aperture(adev, true);
1024 /* HW doorbell routing policy: doorbell writing not 1085 /* HW doorbell routing policy: doorbell writing not
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a66c8bfbbaa6..06f39f5bbf76 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -42,8 +42,18 @@ struct soc15_reg_golden {
42 u32 or_mask; 42 u32 or_mask;
43}; 43};
44 44
45struct soc15_reg_entry {
46 uint32_t hwip;
47 uint32_t inst;
48 uint32_t seg;
49 uint32_t reg_offset;
50 uint32_t reg_value;
51};
52
45#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg 53#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
46 54
55#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
56
47#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ 57#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
48 { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } 58 { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
49 59
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 49c262540940..47f74dab365d 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,26 +69,60 @@
69 } \ 69 } \
70 } while (0) 70 } while (0)
71 71
72#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ 72#define WREG32_RLC(reg, value) \
73 ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ 73 do { \
74 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ 74 if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
75 UVD_DPG_LMA_CTL__MASK_EN_MASK | \ 75 uint32_t i = 0; \
76 ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ 76 uint32_t retries = 50000; \
77 << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ 77 uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
78 (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ 78 uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
79 RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) 79 uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
80 WREG32(r0, value); \
81 WREG32(r1, (reg | 0x80000000)); \
82 WREG32(spare_int, 0x1); \
83 for (i = 0; i < retries; i++) { \
84 u32 tmp = RREG32(r1); \
85 if (!(tmp & 0x80000000)) \
86 break; \
87 udelay(10); \
88 } \
89 if (i >= retries) \
90 pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
91 } else { \
92 WREG32(reg, value); \
93 } \
94 } while (0)
80 95
81#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ 96#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
82 do { \ 97 do { \
83 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ 98 uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
84 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ 99 if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
85 WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ 100 uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
86 UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ 101 uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
87 ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ 102 uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
88 << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ 103 uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
89 (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ 104 if (target_reg == grbm_cntl) \
105 WREG32(r2, value); \
106 else if (target_reg == grbm_idx) \
107 WREG32(r3, value); \
108 WREG32(target_reg, value); \
109 } else { \
110 WREG32(target_reg, value); \
111 } \
90 } while (0) 112 } while (0)
91 113
92#endif 114#define WREG32_SOC15_RLC(ip, inst, reg, value) \
115 do { \
116 uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
117 WREG32_RLC(target_reg, value); \
118 } while (0)
119
120#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
121 WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
122 (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
123 & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
93 124
125#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
126 WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
94 127
128#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c4fb58667fd4..bf3385280d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
741 .type = AMDGPU_RING_TYPE_UVD, 741 .type = AMDGPU_RING_TYPE_UVD,
742 .align_mask = 0xf, 742 .align_mask = 0xf,
743 .support_64bit_ptrs = false, 743 .support_64bit_ptrs = false,
744 .no_user_fence = true,
744 .get_rptr = uvd_v4_2_ring_get_rptr, 745 .get_rptr = uvd_v4_2_ring_get_rptr,
745 .get_wptr = uvd_v4_2_ring_get_wptr, 746 .get_wptr = uvd_v4_2_ring_get_wptr,
746 .set_wptr = uvd_v4_2_ring_set_wptr, 747 .set_wptr = uvd_v4_2_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 52bd8a654734..3210a7bd9a6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
849 .type = AMDGPU_RING_TYPE_UVD, 849 .type = AMDGPU_RING_TYPE_UVD,
850 .align_mask = 0xf, 850 .align_mask = 0xf,
851 .support_64bit_ptrs = false, 851 .support_64bit_ptrs = false,
852 .no_user_fence = true,
852 .get_rptr = uvd_v5_0_ring_get_rptr, 853 .get_rptr = uvd_v5_0_ring_get_rptr,
853 .get_wptr = uvd_v5_0_ring_get_wptr, 854 .get_wptr = uvd_v5_0_ring_get_wptr,
854 .set_wptr = uvd_v5_0_ring_set_wptr, 855 .set_wptr = uvd_v5_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9edddf9f88a..c61a314c56cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
1502 .type = AMDGPU_RING_TYPE_UVD, 1502 .type = AMDGPU_RING_TYPE_UVD,
1503 .align_mask = 0xf, 1503 .align_mask = 0xf,
1504 .support_64bit_ptrs = false, 1504 .support_64bit_ptrs = false,
1505 .no_user_fence = true,
1505 .get_rptr = uvd_v6_0_ring_get_rptr, 1506 .get_rptr = uvd_v6_0_ring_get_rptr,
1506 .get_wptr = uvd_v6_0_ring_get_wptr, 1507 .get_wptr = uvd_v6_0_ring_get_wptr,
1507 .set_wptr = uvd_v6_0_ring_set_wptr, 1508 .set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
1527 .type = AMDGPU_RING_TYPE_UVD, 1528 .type = AMDGPU_RING_TYPE_UVD,
1528 .align_mask = 0xf, 1529 .align_mask = 0xf,
1529 .support_64bit_ptrs = false, 1530 .support_64bit_ptrs = false,
1531 .no_user_fence = true,
1530 .get_rptr = uvd_v6_0_ring_get_rptr, 1532 .get_rptr = uvd_v6_0_ring_get_rptr,
1531 .get_wptr = uvd_v6_0_ring_get_wptr, 1533 .get_wptr = uvd_v6_0_ring_get_wptr,
1532 .set_wptr = uvd_v6_0_ring_set_wptr, 1534 .set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
1555 .align_mask = 0x3f, 1557 .align_mask = 0x3f,
1556 .nop = HEVC_ENC_CMD_NO_OP, 1558 .nop = HEVC_ENC_CMD_NO_OP,
1557 .support_64bit_ptrs = false, 1559 .support_64bit_ptrs = false,
1560 .no_user_fence = true,
1558 .get_rptr = uvd_v6_0_enc_ring_get_rptr, 1561 .get_rptr = uvd_v6_0_enc_ring_get_rptr,
1559 .get_wptr = uvd_v6_0_enc_ring_get_wptr, 1562 .get_wptr = uvd_v6_0_enc_ring_get_wptr,
1560 .set_wptr = uvd_v6_0_enc_ring_set_wptr, 1563 .set_wptr = uvd_v6_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 2191d3d0a219..cdb96d4cb424 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1759 .type = AMDGPU_RING_TYPE_UVD, 1759 .type = AMDGPU_RING_TYPE_UVD,
1760 .align_mask = 0xf, 1760 .align_mask = 0xf,
1761 .support_64bit_ptrs = false, 1761 .support_64bit_ptrs = false,
1762 .no_user_fence = true,
1762 .vmhub = AMDGPU_MMHUB, 1763 .vmhub = AMDGPU_MMHUB,
1763 .get_rptr = uvd_v7_0_ring_get_rptr, 1764 .get_rptr = uvd_v7_0_ring_get_rptr,
1764 .get_wptr = uvd_v7_0_ring_get_wptr, 1765 .get_wptr = uvd_v7_0_ring_get_wptr,
@@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
1791 .align_mask = 0x3f, 1792 .align_mask = 0x3f,
1792 .nop = HEVC_ENC_CMD_NO_OP, 1793 .nop = HEVC_ENC_CMD_NO_OP,
1793 .support_64bit_ptrs = false, 1794 .support_64bit_ptrs = false,
1795 .no_user_fence = true,
1794 .vmhub = AMDGPU_MMHUB, 1796 .vmhub = AMDGPU_MMHUB,
1795 .get_rptr = uvd_v7_0_enc_ring_get_rptr, 1797 .get_rptr = uvd_v7_0_enc_ring_get_rptr,
1796 .get_wptr = uvd_v7_0_enc_ring_get_wptr, 1798 .get_wptr = uvd_v7_0_enc_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 40363ca6c5f1..ab0cb8325796 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
605 .align_mask = 0xf, 605 .align_mask = 0xf,
606 .nop = VCE_CMD_NO_OP, 606 .nop = VCE_CMD_NO_OP,
607 .support_64bit_ptrs = false, 607 .support_64bit_ptrs = false,
608 .no_user_fence = true,
608 .get_rptr = vce_v2_0_ring_get_rptr, 609 .get_rptr = vce_v2_0_ring_get_rptr,
609 .get_wptr = vce_v2_0_ring_get_wptr, 610 .get_wptr = vce_v2_0_ring_get_wptr,
610 .set_wptr = vce_v2_0_ring_set_wptr, 611 .set_wptr = vce_v2_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6ec65cf11112..36902ec16dcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
894 .align_mask = 0xf, 894 .align_mask = 0xf,
895 .nop = VCE_CMD_NO_OP, 895 .nop = VCE_CMD_NO_OP,
896 .support_64bit_ptrs = false, 896 .support_64bit_ptrs = false,
897 .no_user_fence = true,
897 .get_rptr = vce_v3_0_ring_get_rptr, 898 .get_rptr = vce_v3_0_ring_get_rptr,
898 .get_wptr = vce_v3_0_ring_get_wptr, 899 .get_wptr = vce_v3_0_ring_get_wptr,
899 .set_wptr = vce_v3_0_ring_set_wptr, 900 .set_wptr = vce_v3_0_ring_set_wptr,
@@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
917 .align_mask = 0xf, 918 .align_mask = 0xf,
918 .nop = VCE_CMD_NO_OP, 919 .nop = VCE_CMD_NO_OP,
919 .support_64bit_ptrs = false, 920 .support_64bit_ptrs = false,
921 .no_user_fence = true,
920 .get_rptr = vce_v3_0_ring_get_rptr, 922 .get_rptr = vce_v3_0_ring_get_rptr,
921 .get_wptr = vce_v3_0_ring_get_wptr, 923 .get_wptr = vce_v3_0_ring_get_wptr,
922 .set_wptr = vce_v3_0_ring_set_wptr, 924 .set_wptr = vce_v3_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index c0ec27991c22..e267b073f525 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1069 .align_mask = 0x3f, 1069 .align_mask = 0x3f,
1070 .nop = VCE_CMD_NO_OP, 1070 .nop = VCE_CMD_NO_OP,
1071 .support_64bit_ptrs = false, 1071 .support_64bit_ptrs = false,
1072 .no_user_fence = true,
1072 .vmhub = AMDGPU_MMHUB, 1073 .vmhub = AMDGPU_MMHUB,
1073 .get_rptr = vce_v4_0_ring_get_rptr, 1074 .get_rptr = vce_v4_0_ring_get_rptr,
1074 .get_wptr = vce_v4_0_ring_get_wptr, 1075 .get_wptr = vce_v4_0_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 3dbc51f9d3b9..bb47f5b24be5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
49static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); 49static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
50static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); 50static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
51static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); 51static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
52static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
53 struct dpg_pause_state *new_state);
52 54
53/** 55/**
54 * vcn_v1_0_early_init - set function pointers 56 * vcn_v1_0_early_init - set function pointers
@@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
140 if (r) 142 if (r)
141 return r; 143 return r;
142 144
143 return r; 145 adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
146
147 return 0;
144} 148}
145 149
146/** 150/**
@@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
1204 return r; 1208 return r;
1205} 1209}
1206 1210
1211static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
1212 struct dpg_pause_state *new_state)
1213{
1214 int ret_code;
1215 uint32_t reg_data = 0;
1216 uint32_t reg_data2 = 0;
1217 struct amdgpu_ring *ring;
1218
1219 /* pause/unpause if state is changed */
1220 if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
1221 DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
1222 adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
1223 new_state->fw_based, new_state->jpeg);
1224
1225 reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
1226 (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1227
1228 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1229 ret_code = 0;
1230
1231 if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
1232 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
1233 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
1234 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1235
1236 if (!ret_code) {
1237 /* pause DPG non-jpeg */
1238 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1239 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1240 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
1241 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1242 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
1243
1244 /* Restore */
1245 ring = &adev->vcn.ring_enc[0];
1246 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
1247 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1248 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
1249 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1250 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1251
1252 ring = &adev->vcn.ring_enc[1];
1253 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1254 WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1255 WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
1256 WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1257 WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1258
1259 ring = &adev->vcn.ring_dec;
1260 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1261 RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
1262 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
1263 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
1264 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1265 }
1266 } else {
1267 /* unpause dpg non-jpeg, no need to wait */
1268 reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1269 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1270 }
1271 adev->vcn.pause_state.fw_based = new_state->fw_based;
1272 }
1273
1274 /* pause/unpause if state is changed */
1275 if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
1276 DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
1277 adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
1278 new_state->fw_based, new_state->jpeg);
1279
1280 reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
1281 (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
1282
1283 if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
1284 ret_code = 0;
1285
1286 if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
1287 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
1288 UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
1289 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1290
1291 if (!ret_code) {
1292 /* Make sure JPRG Snoop is disabled before sending the pause */
1293 reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
1294 reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
1295 WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
1296
1297 /* pause DPG jpeg */
1298 reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
1299 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1300 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
1301 UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
1302 UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
1303
1304 /* Restore */
1305 ring = &adev->vcn.ring_jpeg;
1306 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
1307 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
1308 UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
1309 UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
1310 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
1311 lower_32_bits(ring->gpu_addr));
1312 WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
1313 upper_32_bits(ring->gpu_addr));
1314 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
1315 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
1316 WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
1317 UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
1318
1319 ring = &adev->vcn.ring_dec;
1320 WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
1321 RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
1322 SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
1323 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
1324 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1325 }
1326 } else {
1327 /* unpause dpg jpeg, no need to wait */
1328 reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
1329 WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
1330 }
1331 adev->vcn.pause_state.jpeg = new_state->jpeg;
1332 }
1333
1334 return 0;
1335}
1336
1207static bool vcn_v1_0_is_idle(void *handle) 1337static bool vcn_v1_0_is_idle(void *handle)
1208{ 1338{
1209 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1339 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
2054 .type = AMDGPU_RING_TYPE_VCN_DEC, 2184 .type = AMDGPU_RING_TYPE_VCN_DEC,
2055 .align_mask = 0xf, 2185 .align_mask = 0xf,
2056 .support_64bit_ptrs = false, 2186 .support_64bit_ptrs = false,
2187 .no_user_fence = true,
2057 .vmhub = AMDGPU_MMHUB, 2188 .vmhub = AMDGPU_MMHUB,
2058 .get_rptr = vcn_v1_0_dec_ring_get_rptr, 2189 .get_rptr = vcn_v1_0_dec_ring_get_rptr,
2059 .get_wptr = vcn_v1_0_dec_ring_get_wptr, 2190 .get_wptr = vcn_v1_0_dec_ring_get_wptr,
@@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
2087 .align_mask = 0x3f, 2218 .align_mask = 0x3f,
2088 .nop = VCN_ENC_CMD_NO_OP, 2219 .nop = VCN_ENC_CMD_NO_OP,
2089 .support_64bit_ptrs = false, 2220 .support_64bit_ptrs = false,
2221 .no_user_fence = true,
2090 .vmhub = AMDGPU_MMHUB, 2222 .vmhub = AMDGPU_MMHUB,
2091 .get_rptr = vcn_v1_0_enc_ring_get_rptr, 2223 .get_rptr = vcn_v1_0_enc_ring_get_rptr,
2092 .get_wptr = vcn_v1_0_enc_ring_get_wptr, 2224 .get_wptr = vcn_v1_0_enc_ring_get_wptr,
@@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
2118 .align_mask = 0xf, 2250 .align_mask = 0xf,
2119 .nop = PACKET0(0x81ff, 0), 2251 .nop = PACKET0(0x81ff, 0),
2120 .support_64bit_ptrs = false, 2252 .support_64bit_ptrs = false,
2253 .no_user_fence = true,
2121 .vmhub = AMDGPU_MMHUB, 2254 .vmhub = AMDGPU_MMHUB,
2122 .extra_dw = 64, 2255 .extra_dw = 64,
2123 .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, 2256 .get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 8d89ab7f0ae8..5f54acc70fec 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
48 48
49 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); 49 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
50 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); 50 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
51 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); 51 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
52 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
53 DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
54 return;
55 }
56 } else {
57 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
58 }
52 adev->irq.ih.enabled = true; 59 adev->irq.ih.enabled = true;
53 60
54 if (adev->irq.ih1.ring_size) { 61 if (adev->irq.ih1.ring_size) {
55 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); 62 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
56 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, 63 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
57 RB_ENABLE, 1); 64 RB_ENABLE, 1);
58 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); 65 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
66 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
67 ih_rb_cntl)) {
68 DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
69 return;
70 }
71 } else {
72 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
73 }
59 adev->irq.ih1.enabled = true; 74 adev->irq.ih1.enabled = true;
60 } 75 }
61 76
@@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
63 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); 78 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
64 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, 79 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
65 RB_ENABLE, 1); 80 RB_ENABLE, 1);
66 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); 81 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
82 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
83 ih_rb_cntl)) {
84 DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
85 return;
86 }
87 } else {
88 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
89 }
67 adev->irq.ih2.enabled = true; 90 adev->irq.ih2.enabled = true;
68 } 91 }
69} 92}
@@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
81 104
82 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); 105 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
83 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); 106 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
84 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); 107 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
108 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
109 DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
110 return;
111 }
112 } else {
113 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
114 }
115
85 /* set rptr, wptr to 0 */ 116 /* set rptr, wptr to 0 */
86 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); 117 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
87 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); 118 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
92 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); 123 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
93 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, 124 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
94 RB_ENABLE, 0); 125 RB_ENABLE, 0);
95 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); 126 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
127 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
128 ih_rb_cntl)) {
129 DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
130 return;
131 }
132 } else {
133 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
134 }
96 /* set rptr, wptr to 0 */ 135 /* set rptr, wptr to 0 */
97 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); 136 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
98 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); 137 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
104 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); 143 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
105 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, 144 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
106 RB_ENABLE, 0); 145 RB_ENABLE, 0);
107 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); 146 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
147 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
148 ih_rb_cntl)) {
149 DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
150 return;
151 }
152 } else {
153 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
154 }
155
108 /* set rptr, wptr to 0 */ 156 /* set rptr, wptr to 0 */
109 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); 157 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
110 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); 158 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
@@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
187 ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); 235 ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
188 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 236 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
189 !!adev->irq.msi_enabled); 237 !!adev->irq.msi_enabled);
190 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); 238
239 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
240 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
241 DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
242 return -ETIMEDOUT;
243 }
244 } else {
245 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
246 }
191 247
192 /* set the writeback address whether it's enabled or not */ 248 /* set the writeback address whether it's enabled or not */
193 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, 249 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
214 WPTR_OVERFLOW_ENABLE, 0); 270 WPTR_OVERFLOW_ENABLE, 0);
215 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, 271 ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
216 RB_FULL_DRAIN_ENABLE, 1); 272 RB_FULL_DRAIN_ENABLE, 1);
217 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); 273 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
274 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
275 ih_rb_cntl)) {
276 DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
277 return -ETIMEDOUT;
278 }
279 } else {
280 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
281 }
218 282
219 /* set rptr, wptr to 0 */ 283 /* set rptr, wptr to 0 */
220 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); 284 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
232 296
233 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); 297 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
234 ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); 298 ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
235 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); 299
300 if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
301 if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
302 ih_rb_cntl)) {
303 DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
304 return -ETIMEDOUT;
305 }
306 } else {
307 WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
308 }
236 309
237 /* set rptr, wptr to 0 */ 310 /* set rptr, wptr to 0 */
238 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); 311 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5e5b42a0744a..b8adf3808de2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
987 *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); 987 *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
988} 988}
989 989
990static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
991{
992 uint64_t nak_r, nak_g;
993
994 /* Get the number of NAKs received and generated */
995 nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
996 nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
997
998 /* Add the total number of NAKs, i.e the number of replays */
999 return (nak_r + nak_g);
1000}
1001
990static bool vi_need_reset_on_init(struct amdgpu_device *adev) 1002static bool vi_need_reset_on_init(struct amdgpu_device *adev)
991{ 1003{
992 u32 clock_cntl, pc; 1004 u32 clock_cntl, pc;
@@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
1021 .init_doorbell_index = &legacy_doorbell_index_init, 1033 .init_doorbell_index = &legacy_doorbell_index_init,
1022 .get_pcie_usage = &vi_get_pcie_usage, 1034 .get_pcie_usage = &vi_get_pcie_usage,
1023 .need_reset_on_init = &vi_need_reset_on_init, 1035 .need_reset_on_init = &vi_need_reset_on_init,
1036 .get_pcie_replay_count = &vi_get_pcie_replay_count,
1024}; 1037};
1025 1038
1026#define CZ_REV_BRISTOL(rev) \ 1039#define CZ_REV_BRISTOL(rev) \
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 3621efbd5759..e413d4a71fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -21,7 +21,7 @@
21 */ 21 */
22 22
23static const uint32_t cwsr_trap_gfx8_hex[] = { 23static const uint32_t cwsr_trap_gfx8_hex[] = {
24 0xbf820001, 0xbf82012b, 24 0xbf820001, 0xbf820121,
25 0xb8f4f802, 0x89748674, 25 0xb8f4f802, 0x89748674,
26 0xb8f5f803, 0x8675ff75, 26 0xb8f5f803, 0x8675ff75,
27 0x00000400, 0xbf850017, 27 0x00000400, 0xbf850017,
@@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
36 0x8671ff71, 0x0000ffff, 36 0x8671ff71, 0x0000ffff,
37 0x8f728374, 0xb972e0c2, 37 0x8f728374, 0xb972e0c2,
38 0xbf800002, 0xb9740002, 38 0xbf800002, 0xb9740002,
39 0xbe801f70, 0xb8f5f803, 39 0xbe801f70, 0xbefa0080,
40 0x8675ff75, 0x00000100,
41 0xbf840006, 0xbefa0080,
42 0xb97a0203, 0x8671ff71,
43 0x0000ffff, 0x80f08870,
44 0x82f18071, 0xbefa0080,
45 0xb97a0283, 0xbef60068, 40 0xb97a0283, 0xbef60068,
46 0xbef70069, 0xb8fa1c07, 41 0xbef70069, 0xb8fa1c07,
47 0x8e7a9c7a, 0x87717a71, 42 0x8e7a9c7a, 0x87717a71,
@@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
279 274
280 275
281static const uint32_t cwsr_trap_gfx9_hex[] = { 276static const uint32_t cwsr_trap_gfx9_hex[] = {
282 0xbf820001, 0xbf82015d, 277 0xbf820001, 0xbf82015e,
283 0xb8f8f802, 0x89788678, 278 0xb8f8f802, 0x89788678,
284 0xb8f1f803, 0x866eff71, 279 0xb8fbf803, 0x866eff7b,
285 0x00000400, 0xbf850037, 280 0x00000400, 0xbf85003b,
286 0x866eff71, 0x00000800, 281 0x866eff7b, 0x00000800,
287 0xbf850003, 0x866eff71, 282 0xbf850003, 0x866eff7b,
288 0x00000100, 0xbf840008, 283 0x00000100, 0xbf84000c,
289 0x866eff78, 0x00002000, 284 0x866eff78, 0x00002000,
290 0xbf840001, 0xbf810000, 285 0xbf840005, 0xbf8e0010,
286 0xb8eef803, 0x866eff6e,
287 0x00000400, 0xbf84fffb,
291 0x8778ff78, 0x00002000, 288 0x8778ff78, 0x00002000,
292 0x80ec886c, 0x82ed806d, 289 0x80ec886c, 0x82ed806d,
293 0xb8eef807, 0x866fff6e, 290 0xb8eef807, 0x866fff6e,
@@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
295 0x8977ff77, 0xfc000000, 292 0x8977ff77, 0xfc000000,
296 0x87776f77, 0x896eff6e, 293 0x87776f77, 0x896eff6e,
297 0x001f8000, 0xb96ef807, 294 0x001f8000, 0xb96ef807,
298 0xb8f0f812, 0xb8f1f813, 295 0xb8faf812, 0xb8fbf813,
299 0x8ef08870, 0xc0071bb8, 296 0x8efa887a, 0xc0071bbd,
300 0x00000000, 0xbf8cc07f, 297 0x00000000, 0xbf8cc07f,
301 0xc0071c38, 0x00000008, 298 0xc0071ebd, 0x00000008,
302 0xbf8cc07f, 0x86ee6e6e, 299 0xbf8cc07f, 0x86ee6e6e,
303 0xbf840001, 0xbe801d6e, 300 0xbf840001, 0xbe801d6e,
304 0xb8f1f803, 0x8671ff71, 301 0xb8fbf803, 0x867bff7b,
305 0x000001ff, 0xbf850002, 302 0x000001ff, 0xbf850002,
306 0x806c846c, 0x826d806d, 303 0x806c846c, 0x826d806d,
307 0x866dff6d, 0x0000ffff, 304 0x866dff6d, 0x0000ffff,
@@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
311 0x8f6e8378, 0xb96ee0c2, 308 0x8f6e8378, 0xb96ee0c2,
312 0xbf800002, 0xb9780002, 309 0xbf800002, 0xb9780002,
313 0xbe801f6c, 0x866dff6d, 310 0xbe801f6c, 0x866dff6d,
314 0x0000ffff, 0xbef00080, 311 0x0000ffff, 0xbefa0080,
315 0xb9700283, 0xb8f02407, 312 0xb97a0283, 0xb8fa2407,
316 0x8e709c70, 0x876d706d, 313 0x8e7a9b7a, 0x876d7a6d,
317 0xb8f003c7, 0x8e709b70, 314 0xb8fa03c7, 0x8e7a9a7a,
318 0x876d706d, 0xb8f0f807, 315 0x876d7a6d, 0xb8faf807,
319 0x8670ff70, 0x00007fff, 316 0x867aff7a, 0x00007fff,
320 0xb970f807, 0xbeee007e, 317 0xb97af807, 0xbeee007e,
321 0xbeef007f, 0xbefe0180, 318 0xbeef007f, 0xbefe0180,
322 0xbf900004, 0x87708478, 319 0xbf900004, 0x877a8478,
323 0xb970f802, 0xbf8e0002, 320 0xb97af802, 0xbf8e0002,
324 0xbf88fffe, 0xb8f02a05, 321 0xbf88fffe, 0xb8fa2a05,
322 0x807a817a, 0x8e7a8a7a,
323 0xb8fb1605, 0x807b817b,
324 0x8e7b867b, 0x807a7b7a,
325 0x807a7e7a, 0x827b807f,
326 0x867bff7b, 0x0000ffff,
327 0xc04b1c3d, 0x00000050,
328 0xbf8cc07f, 0xc04b1d3d,
329 0x00000060, 0xbf8cc07f,
330 0xc0431e7d, 0x00000074,
331 0xbf8cc07f, 0xbef4007e,
332 0x8675ff7f, 0x0000ffff,
333 0x8775ff75, 0x00040000,
334 0xbef60080, 0xbef700ff,
335 0x00807fac, 0x867aff7f,
336 0x08000000, 0x8f7a837a,
337 0x87777a77, 0x867aff7f,
338 0x70000000, 0x8f7a817a,
339 0x87777a77, 0xbef1007c,
340 0xbef00080, 0xb8f02a05,
325 0x80708170, 0x8e708a70, 341 0x80708170, 0x8e708a70,
326 0xb8f11605, 0x80718171, 342 0xb8fa1605, 0x807a817a,
327 0x8e718671, 0x80707170, 343 0x8e7a867a, 0x80707a70,
328 0x80707e70, 0x8271807f, 344 0xbef60084, 0xbef600ff,
329 0x8671ff71, 0x0000ffff, 345 0x01000000, 0xbefe007c,
330 0xc0471cb8, 0x00000040, 346 0xbefc0070, 0xc0611c7a,
331 0xbf8cc07f, 0xc04b1d38,
332 0x00000048, 0xbf8cc07f,
333 0xc0431e78, 0x00000058,
334 0xbf8cc07f, 0xc0471eb8,
335 0x0000005c, 0xbf8cc07f,
336 0xbef4007e, 0x8675ff7f,
337 0x0000ffff, 0x8775ff75,
338 0x00040000, 0xbef60080,
339 0xbef700ff, 0x00807fac,
340 0x8670ff7f, 0x08000000,
341 0x8f708370, 0x87777077,
342 0x8670ff7f, 0x70000000,
343 0x8f708170, 0x87777077,
344 0xbefb007c, 0xbefa0080,
345 0xb8fa2a05, 0x807a817a,
346 0x8e7a8a7a, 0xb8f01605,
347 0x80708170, 0x8e708670,
348 0x807a707a, 0xbef60084,
349 0xbef600ff, 0x01000000,
350 0xbefe007c, 0xbefc007a,
351 0xc0611efa, 0x0000007c,
352 0xbf8cc07f, 0x807a847a,
353 0xbefc007e, 0xbefe007c,
354 0xbefc007a, 0xc0611b3a,
355 0x0000007c, 0xbf8cc07f, 347 0x0000007c, 0xbf8cc07f,
356 0x807a847a, 0xbefc007e, 348 0x80708470, 0xbefc007e,
357 0xbefe007c, 0xbefc007a, 349 0xbefe007c, 0xbefc0070,
358 0xc0611b7a, 0x0000007c, 350 0xc0611b3a, 0x0000007c,
359 0xbf8cc07f, 0x807a847a, 351 0xbf8cc07f, 0x80708470,
360 0xbefc007e, 0xbefe007c, 352 0xbefc007e, 0xbefe007c,
361 0xbefc007a, 0xc0611bba, 353 0xbefc0070, 0xc0611b7a,
362 0x0000007c, 0xbf8cc07f, 354 0x0000007c, 0xbf8cc07f,
363 0x807a847a, 0xbefc007e, 355 0x80708470, 0xbefc007e,
364 0xbefe007c, 0xbefc007a, 356 0xbefe007c, 0xbefc0070,
365 0xc0611bfa, 0x0000007c, 357 0xc0611bba, 0x0000007c,
366 0xbf8cc07f, 0x807a847a, 358 0xbf8cc07f, 0x80708470,
367 0xbefc007e, 0xbefe007c, 359 0xbefc007e, 0xbefe007c,
368 0xbefc007a, 0xc0611e3a, 360 0xbefc0070, 0xc0611bfa,
369 0x0000007c, 0xbf8cc07f, 361 0x0000007c, 0xbf8cc07f,
370 0x807a847a, 0xbefc007e, 362 0x80708470, 0xbefc007e,
371 0xb8f1f803, 0xbefe007c, 363 0xbefe007c, 0xbefc0070,
372 0xbefc007a, 0xc0611c7a, 364 0xc0611e3a, 0x0000007c,
373 0x0000007c, 0xbf8cc07f, 365 0xbf8cc07f, 0x80708470,
374 0x807a847a, 0xbefc007e, 366 0xbefc007e, 0xb8fbf803,
375 0xbefe007c, 0xbefc007a, 367 0xbefe007c, 0xbefc0070,
376 0xc0611a3a, 0x0000007c, 368 0xc0611efa, 0x0000007c,
377 0xbf8cc07f, 0x807a847a, 369 0xbf8cc07f, 0x80708470,
378 0xbefc007e, 0xbefe007c, 370 0xbefc007e, 0xbefe007c,
379 0xbefc007a, 0xc0611a7a, 371 0xbefc0070, 0xc0611a3a,
380 0x0000007c, 0xbf8cc07f,
381 0x807a847a, 0xbefc007e,
382 0xb8fbf801, 0xbefe007c,
383 0xbefc007a, 0xc0611efa,
384 0x0000007c, 0xbf8cc07f, 372 0x0000007c, 0xbf8cc07f,
385 0x807a847a, 0xbefc007e, 373 0x80708470, 0xbefc007e,
386 0x8670ff7f, 0x04000000, 374 0xbefe007c, 0xbefc0070,
387 0xbeef0080, 0x876f6f70, 375 0xc0611a7a, 0x0000007c,
388 0xb8fa2a05, 0x807a817a, 376 0xbf8cc07f, 0x80708470,
389 0x8e7a8a7a, 0xb8f11605, 377 0xbefc007e, 0xb8f1f801,
390 0x80718171, 0x8e718471, 378 0xbefe007c, 0xbefc0070,
391 0x8e768271, 0xbef600ff, 379 0xc0611c7a, 0x0000007c,
392 0x01000000, 0xbef20174, 380 0xbf8cc07f, 0x80708470,
393 0x80747a74, 0x82758075, 381 0xbefc007e, 0x867aff7f,
394 0xbefc0080, 0xbf800000, 382 0x04000000, 0xbeef0080,
395 0xbe802b00, 0xbe822b02, 383 0x876f6f7a, 0xb8f02a05,
396 0xbe842b04, 0xbe862b06, 384 0x80708170, 0x8e708a70,
397 0xbe882b08, 0xbe8a2b0a, 385 0xb8fb1605, 0x807b817b,
398 0xbe8c2b0c, 0xbe8e2b0e, 386 0x8e7b847b, 0x8e76827b,
399 0xc06b003a, 0x00000000,
400 0xbf8cc07f, 0xc06b013a,
401 0x00000010, 0xbf8cc07f,
402 0xc06b023a, 0x00000020,
403 0xbf8cc07f, 0xc06b033a,
404 0x00000030, 0xbf8cc07f,
405 0x8074c074, 0x82758075,
406 0x807c907c, 0xbf0a717c,
407 0xbf85ffe7, 0xbef40172,
408 0xbefa0080, 0xbefe00c1,
409 0xbeff00c1, 0xbee80080,
410 0xbee90080, 0xbef600ff,
411 0x01000000, 0xe0724000,
412 0x7a1d0000, 0xe0724100,
413 0x7a1d0100, 0xe0724200,
414 0x7a1d0200, 0xe0724300,
415 0x7a1d0300, 0xbefe00c1,
416 0xbeff00c1, 0xb8f14306,
417 0x8671c171, 0xbf84002c,
418 0xbf8a0000, 0x8670ff6f,
419 0x04000000, 0xbf840028,
420 0x8e718671, 0x8e718271,
421 0xbef60071, 0xb8fa2a05,
422 0x807a817a, 0x8e7a8a7a,
423 0xb8f01605, 0x80708170,
424 0x8e708670, 0x807a707a,
425 0x807aff7a, 0x00000080,
426 0xbef600ff, 0x01000000, 387 0xbef600ff, 0x01000000,
427 0xbefc0080, 0xd28c0002, 388 0xbef20174, 0x80747074,
428 0x000100c1, 0xd28d0003, 389 0x82758075, 0xbefc0080,
429 0x000204c1, 0xd1060002, 390 0xbf800000, 0xbe802b00,
430 0x00011103, 0x7e0602ff, 391 0xbe822b02, 0xbe842b04,
431 0x00000200, 0xbefc00ff, 392 0xbe862b06, 0xbe882b08,
432 0x00010000, 0xbe800077, 393 0xbe8a2b0a, 0xbe8c2b0c,
433 0x8677ff77, 0xff7fffff, 394 0xbe8e2b0e, 0xc06b003a,
434 0x8777ff77, 0x00058000, 395 0x00000000, 0xbf8cc07f,
435 0xd8ec0000, 0x00000002, 396 0xc06b013a, 0x00000010,
436 0xbf8cc07f, 0xe0765000, 397 0xbf8cc07f, 0xc06b023a,
437 0x7a1d0002, 0x68040702, 398 0x00000020, 0xbf8cc07f,
438 0xd0c9006a, 0x0000e302, 399 0xc06b033a, 0x00000030,
439 0xbf87fff7, 0xbef70000, 400 0xbf8cc07f, 0x8074c074,
440 0xbefa00ff, 0x00000400, 401 0x82758075, 0x807c907c,
402 0xbf0a7b7c, 0xbf85ffe7,
403 0xbef40172, 0xbef00080,
441 0xbefe00c1, 0xbeff00c1, 404 0xbefe00c1, 0xbeff00c1,
442 0xb8f12a05, 0x80718171, 405 0xbee80080, 0xbee90080,
443 0x8e718271, 0x8e768871,
444 0xbef600ff, 0x01000000, 406 0xbef600ff, 0x01000000,
445 0xbefc0084, 0xbf0a717c, 407 0xe0724000, 0x701d0000,
446 0xbf840015, 0xbf11017c, 408 0xe0724100, 0x701d0100,
447 0x8071ff71, 0x00001000, 409 0xe0724200, 0x701d0200,
448 0x7e000300, 0x7e020301, 410 0xe0724300, 0x701d0300,
449 0x7e040302, 0x7e060303,
450 0xe0724000, 0x7a1d0000,
451 0xe0724100, 0x7a1d0100,
452 0xe0724200, 0x7a1d0200,
453 0xe0724300, 0x7a1d0300,
454 0x807c847c, 0x807aff7a,
455 0x00000400, 0xbf0a717c,
456 0xbf85ffef, 0xbf9c0000,
457 0xbf8200dc, 0xbef4007e,
458 0x8675ff7f, 0x0000ffff,
459 0x8775ff75, 0x00040000,
460 0xbef60080, 0xbef700ff,
461 0x00807fac, 0x866eff7f,
462 0x08000000, 0x8f6e836e,
463 0x87776e77, 0x866eff7f,
464 0x70000000, 0x8f6e816e,
465 0x87776e77, 0x866eff7f,
466 0x04000000, 0xbf84001e,
467 0xbefe00c1, 0xbeff00c1, 411 0xbefe00c1, 0xbeff00c1,
468 0xb8ef4306, 0x866fc16f, 412 0xb8fb4306, 0x867bc17b,
469 0xbf840019, 0x8e6f866f, 413 0xbf84002c, 0xbf8a0000,
470 0x8e6f826f, 0xbef6006f, 414 0x867aff6f, 0x04000000,
471 0xb8f82a05, 0x80788178, 415 0xbf840028, 0x8e7b867b,
472 0x8e788a78, 0xb8ee1605, 416 0x8e7b827b, 0xbef6007b,
473 0x806e816e, 0x8e6e866e, 417 0xb8f02a05, 0x80708170,
474 0x80786e78, 0x8078ff78, 418 0x8e708a70, 0xb8fa1605,
419 0x807a817a, 0x8e7a867a,
420 0x80707a70, 0x8070ff70,
475 0x00000080, 0xbef600ff, 421 0x00000080, 0xbef600ff,
476 0x01000000, 0xbefc0080, 422 0x01000000, 0xbefc0080,
477 0xe0510000, 0x781d0000, 423 0xd28c0002, 0x000100c1,
478 0xe0510100, 0x781d0000, 424 0xd28d0003, 0x000204c1,
479 0x807cff7c, 0x00000200, 425 0xd1060002, 0x00011103,
480 0x8078ff78, 0x00000200, 426 0x7e0602ff, 0x00000200,
481 0xbf0a6f7c, 0xbf85fff6, 427 0xbefc00ff, 0x00010000,
482 0xbef80080, 0xbefe00c1, 428 0xbe800077, 0x8677ff77,
483 0xbeff00c1, 0xb8ef2a05, 429 0xff7fffff, 0x8777ff77,
484 0x806f816f, 0x8e6f826f, 430 0x00058000, 0xd8ec0000,
485 0x8e76886f, 0xbef600ff, 431 0x00000002, 0xbf8cc07f,
486 0x01000000, 0xbeee0078, 432 0xe0765000, 0x701d0002,
487 0x8078ff78, 0x00000400, 433 0x68040702, 0xd0c9006a,
488 0xbefc0084, 0xbf11087c, 434 0x0000f702, 0xbf87fff7,
489 0x806fff6f, 0x00008000, 435 0xbef70000, 0xbef000ff,
490 0xe0524000, 0x781d0000, 436 0x00000400, 0xbefe00c1,
491 0xe0524100, 0x781d0100, 437 0xbeff00c1, 0xb8fb2a05,
492 0xe0524200, 0x781d0200, 438 0x807b817b, 0x8e7b827b,
493 0xe0524300, 0x781d0300, 439 0x8e76887b, 0xbef600ff,
494 0xbf8c0f70, 0x7e000300, 440 0x01000000, 0xbefc0084,
441 0xbf0a7b7c, 0xbf840015,
442 0xbf11017c, 0x807bff7b,
443 0x00001000, 0x7e000300,
495 0x7e020301, 0x7e040302, 444 0x7e020301, 0x7e040302,
496 0x7e060303, 0x807c847c, 445 0x7e060303, 0xe0724000,
497 0x8078ff78, 0x00000400, 446 0x701d0000, 0xe0724100,
498 0xbf0a6f7c, 0xbf85ffee, 447 0x701d0100, 0xe0724200,
499 0xbf9c0000, 0xe0524000, 448 0x701d0200, 0xe0724300,
500 0x6e1d0000, 0xe0524100, 449 0x701d0300, 0x807c847c,
501 0x6e1d0100, 0xe0524200, 450 0x8070ff70, 0x00000400,
502 0x6e1d0200, 0xe0524300, 451 0xbf0a7b7c, 0xbf85ffef,
503 0x6e1d0300, 0xb8f82a05, 452 0xbf9c0000, 0xbf8200da,
453 0xbef4007e, 0x8675ff7f,
454 0x0000ffff, 0x8775ff75,
455 0x00040000, 0xbef60080,
456 0xbef700ff, 0x00807fac,
457 0x866eff7f, 0x08000000,
458 0x8f6e836e, 0x87776e77,
459 0x866eff7f, 0x70000000,
460 0x8f6e816e, 0x87776e77,
461 0x866eff7f, 0x04000000,
462 0xbf84001e, 0xbefe00c1,
463 0xbeff00c1, 0xb8ef4306,
464 0x866fc16f, 0xbf840019,
465 0x8e6f866f, 0x8e6f826f,
466 0xbef6006f, 0xb8f82a05,
504 0x80788178, 0x8e788a78, 467 0x80788178, 0x8e788a78,
505 0xb8ee1605, 0x806e816e, 468 0xb8ee1605, 0x806e816e,
506 0x8e6e866e, 0x80786e78, 469 0x8e6e866e, 0x80786e78,
507 0x80f8c078, 0xb8ef1605, 470 0x8078ff78, 0x00000080,
508 0x806f816f, 0x8e6f846f, 471 0xbef600ff, 0x01000000,
509 0x8e76826f, 0xbef600ff, 472 0xbefc0080, 0xe0510000,
510 0x01000000, 0xbefc006f, 473 0x781d0000, 0xe0510100,
511 0xc031003a, 0x00000078, 474 0x781d0000, 0x807cff7c,
512 0x80f8c078, 0xbf8cc07f, 475 0x00000200, 0x8078ff78,
513 0x80fc907c, 0xbf800000, 476 0x00000200, 0xbf0a6f7c,
514 0xbe802d00, 0xbe822d02, 477 0xbf85fff6, 0xbef80080,
515 0xbe842d04, 0xbe862d06, 478 0xbefe00c1, 0xbeff00c1,
516 0xbe882d08, 0xbe8a2d0a, 479 0xb8ef2a05, 0x806f816f,
517 0xbe8c2d0c, 0xbe8e2d0e, 480 0x8e6f826f, 0x8e76886f,
518 0xbf06807c, 0xbf84fff0, 481 0xbef600ff, 0x01000000,
482 0xbeee0078, 0x8078ff78,
483 0x00000400, 0xbefc0084,
484 0xbf11087c, 0x806fff6f,
485 0x00008000, 0xe0524000,
486 0x781d0000, 0xe0524100,
487 0x781d0100, 0xe0524200,
488 0x781d0200, 0xe0524300,
489 0x781d0300, 0xbf8c0f70,
490 0x7e000300, 0x7e020301,
491 0x7e040302, 0x7e060303,
492 0x807c847c, 0x8078ff78,
493 0x00000400, 0xbf0a6f7c,
494 0xbf85ffee, 0xbf9c0000,
495 0xe0524000, 0x6e1d0000,
496 0xe0524100, 0x6e1d0100,
497 0xe0524200, 0x6e1d0200,
498 0xe0524300, 0x6e1d0300,
519 0xb8f82a05, 0x80788178, 499 0xb8f82a05, 0x80788178,
520 0x8e788a78, 0xb8ee1605, 500 0x8e788a78, 0xb8ee1605,
521 0x806e816e, 0x8e6e866e, 501 0x806e816e, 0x8e6e866e,
522 0x80786e78, 0xbef60084, 502 0x80786e78, 0x80f8c078,
503 0xb8ef1605, 0x806f816f,
504 0x8e6f846f, 0x8e76826f,
523 0xbef600ff, 0x01000000, 505 0xbef600ff, 0x01000000,
524 0xc0211bfa, 0x00000078, 506 0xbefc006f, 0xc031003a,
525 0x80788478, 0xc0211b3a, 507 0x00000078, 0x80f8c078,
508 0xbf8cc07f, 0x80fc907c,
509 0xbf800000, 0xbe802d00,
510 0xbe822d02, 0xbe842d04,
511 0xbe862d06, 0xbe882d08,
512 0xbe8a2d0a, 0xbe8c2d0c,
513 0xbe8e2d0e, 0xbf06807c,
514 0xbf84fff0, 0xb8f82a05,
515 0x80788178, 0x8e788a78,
516 0xb8ee1605, 0x806e816e,
517 0x8e6e866e, 0x80786e78,
518 0xbef60084, 0xbef600ff,
519 0x01000000, 0xc0211bfa,
526 0x00000078, 0x80788478, 520 0x00000078, 0x80788478,
527 0xc0211b7a, 0x00000078, 521 0xc0211b3a, 0x00000078,
528 0x80788478, 0xc0211eba, 522 0x80788478, 0xc0211b7a,
529 0x00000078, 0x80788478, 523 0x00000078, 0x80788478,
530 0xc0211efa, 0x00000078, 524 0xc0211c3a, 0x00000078,
531 0x80788478, 0xc0211c3a, 525 0x80788478, 0xc0211c7a,
532 0x00000078, 0x80788478, 526 0x00000078, 0x80788478,
533 0xc0211c7a, 0x00000078, 527 0xc0211eba, 0x00000078,
534 0x80788478, 0xc0211a3a, 528 0x80788478, 0xc0211efa,
535 0x00000078, 0x80788478, 529 0x00000078, 0x80788478,
536 0xc0211a7a, 0x00000078, 530 0xc0211a3a, 0x00000078,
537 0x80788478, 0xc0211cfa, 531 0x80788478, 0xc0211a7a,
538 0x00000078, 0x80788478, 532 0x00000078, 0x80788478,
539 0xbf8cc07f, 0xbefc006f, 533 0xc0211cfa, 0x00000078,
540 0xbefe007a, 0xbeff007b, 534 0x80788478, 0xbf8cc07f,
541 0x866f71ff, 0x000003ff, 535 0xbefc006f, 0xbefe0070,
542 0xb96f4803, 0x866f71ff, 536 0xbeff0071, 0x866f7bff,
543 0xfffff800, 0x8f6f8b6f, 537 0x000003ff, 0xb96f4803,
544 0xb96fa2c3, 0xb973f801, 538 0x866f7bff, 0xfffff800,
545 0xb8ee2a05, 0x806e816e, 539 0x8f6f8b6f, 0xb96fa2c3,
546 0x8e6e8a6e, 0xb8ef1605, 540 0xb973f801, 0xb8ee2a05,
547 0x806f816f, 0x8e6f866f, 541 0x806e816e, 0x8e6e8a6e,
548 0x806e6f6e, 0x806e746e, 542 0xb8ef1605, 0x806f816f,
549 0x826f8075, 0x866fff6f, 543 0x8e6f866f, 0x806e6f6e,
550 0x0000ffff, 0xc0071cb7, 544 0x806e746e, 0x826f8075,
551 0x00000040, 0xc00b1d37, 545 0x866fff6f, 0x0000ffff,
552 0x00000048, 0xc0031e77, 546 0xc00b1c37, 0x00000050,
553 0x00000058, 0xc0071eb7, 547 0xc00b1d37, 0x00000060,
554 0x0000005c, 0xbf8cc07f, 548 0xc0031e77, 0x00000074,
555 0x866fff6d, 0xf0000000, 549 0xbf8cc07f, 0x866fff6d,
556 0x8f6f9c6f, 0x8e6f906f, 550 0xf8000000, 0x8f6f9b6f,
557 0xbeee0080, 0x876e6f6e, 551 0x8e6f906f, 0xbeee0080,
558 0x866fff6d, 0x08000000, 552 0x876e6f6e, 0x866fff6d,
559 0x8f6f9b6f, 0x8e6f8f6f, 553 0x04000000, 0x8f6f9a6f,
560 0x876e6f6e, 0x866fff70, 554 0x8e6f8f6f, 0x876e6f6e,
561 0x00800000, 0x8f6f976f, 555 0x866fff7a, 0x00800000,
562 0xb96ef807, 0x866dff6d, 556 0x8f6f976f, 0xb96ef807,
563 0x0000ffff, 0x86fe7e7e, 557 0x866dff6d, 0x0000ffff,
564 0x86ea6a6a, 0x8f6e8370, 558 0x86fe7e7e, 0x86ea6a6a,
565 0xb96ee0c2, 0xbf800002, 559 0x8f6e837a, 0xb96ee0c2,
566 0xb9700002, 0xbf8a0000, 560 0xbf800002, 0xb97a0002,
567 0x95806f6c, 0xbf810000, 561 0xbf8a0000, 0x95806f6c,
562 0xbf810000, 0x00000000,
568}; 563};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index abe1a5da29fb..a47f5b933120 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
282 s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? 282 s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
283end 283end
284 284
285 //check whether there is mem_viol
286 s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
287 s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
288 s_cbranch_scc0 L_NO_PC_REWIND
289
290 //if so, need rewind PC assuming GDS operation gets NACKed
291 s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
292 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
293 s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
294 s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
295 s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
296
297L_NO_PC_REWIND:
298 s_mov_b32 s_save_tmp, 0 //clear saveCtx bit 285 s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
299 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit 286 s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
300 287
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 0bb9c577b3a2..6bae2e022c6e 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
150var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG 150var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
151var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 151var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
152 152
153var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used 153var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
154var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME 154var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
155var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME 155var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
156var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME 156var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
157 157
158var s_save_spi_init_lo = exec_lo 158var s_save_spi_init_lo = exec_lo
159var s_save_spi_init_hi = exec_hi 159var s_save_spi_init_hi = exec_hi
@@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
162var s_save_pc_hi = ttmp1 162var s_save_pc_hi = ttmp1
163var s_save_exec_lo = ttmp2 163var s_save_exec_lo = ttmp2
164var s_save_exec_hi = ttmp3 164var s_save_exec_hi = ttmp3
165var s_save_tmp = ttmp4 165var s_save_tmp = ttmp14
166var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine 166var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
167var s_save_xnack_mask_lo = ttmp6 167var s_save_xnack_mask_lo = ttmp6
168var s_save_xnack_mask_hi = ttmp7 168var s_save_xnack_mask_hi = ttmp7
169var s_save_buf_rsrc0 = ttmp8 169var s_save_buf_rsrc0 = ttmp8
@@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
171var s_save_buf_rsrc2 = ttmp10 171var s_save_buf_rsrc2 = ttmp10
172var s_save_buf_rsrc3 = ttmp11 172var s_save_buf_rsrc3 = ttmp11
173var s_save_status = ttmp12 173var s_save_status = ttmp12
174var s_save_mem_offset = ttmp14 174var s_save_mem_offset = ttmp4
175var s_save_alloc_size = s_save_trapsts //conflict 175var s_save_alloc_size = s_save_trapsts //conflict
176var s_save_m0 = ttmp15 176var s_save_m0 = ttmp5
177var s_save_ttmps_lo = s_save_tmp //no conflict 177var s_save_ttmps_lo = s_save_tmp //no conflict
178var s_save_ttmps_hi = s_save_trapsts //no conflict 178var s_save_ttmps_hi = s_save_trapsts //no conflict
179 179
@@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
207 207
208var s_restore_pc_lo = ttmp0 208var s_restore_pc_lo = ttmp0
209var s_restore_pc_hi = ttmp1 209var s_restore_pc_hi = ttmp1
210var s_restore_exec_lo = ttmp14 210var s_restore_exec_lo = ttmp4
211var s_restore_exec_hi = ttmp15 211var s_restore_exec_hi = ttmp5
212var s_restore_status = ttmp4 212var s_restore_status = ttmp14
213var s_restore_trapsts = ttmp5 213var s_restore_trapsts = ttmp15
214var s_restore_xnack_mask_lo = xnack_mask_lo 214var s_restore_xnack_mask_lo = xnack_mask_lo
215var s_restore_xnack_mask_hi = xnack_mask_hi 215var s_restore_xnack_mask_hi = xnack_mask_hi
216var s_restore_buf_rsrc0 = ttmp8 216var s_restore_buf_rsrc0 = ttmp8
@@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
266 266
267L_HALT_WAVE: 267L_HALT_WAVE:
268 // If STATUS.HALT is set then this fault must come from SQC instruction fetch. 268 // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
269 // We cannot prevent further faults so just terminate the wavefront. 269 // We cannot prevent further faults. Spin wait until context saved.
270 s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK 270 s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
271 s_cbranch_scc0 L_NOT_ALREADY_HALTED 271 s_cbranch_scc0 L_NOT_ALREADY_HALTED
272 s_endpgm 272
273L_WAIT_CTX_SAVE:
274 s_sleep 0x10
275 s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
276 s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
277 s_cbranch_scc0 L_WAIT_CTX_SAVE
278
273L_NOT_ALREADY_HALTED: 279L_NOT_ALREADY_HALTED:
274 s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK 280 s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
275 281
@@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
293 // Read second-level TBA/TMA from first-level TMA and jump if available. 299 // Read second-level TBA/TMA from first-level TMA and jump if available.
294 // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) 300 // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
295 // ttmp12 holds SQ_WAVE_STATUS 301 // ttmp12 holds SQ_WAVE_STATUS
296 s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) 302 s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
297 s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) 303 s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
298 s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 304 s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
299 s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA 305 s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
300 s_waitcnt lgkmcnt(0) 306 s_waitcnt lgkmcnt(0)
301 s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA 307 s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
302 s_waitcnt lgkmcnt(0) 308 s_waitcnt lgkmcnt(0)
303 s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] 309 s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
304 s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set 310 s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -405,7 +411,7 @@ end
405 else 411 else
406 end 412 end
407 413
408 // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic 414 // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
409 // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 415 // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
410 get_vgpr_size_bytes(s_save_ttmps_lo) 416 get_vgpr_size_bytes(s_save_ttmps_lo)
411 get_sgpr_size_bytes(s_save_ttmps_hi) 417 get_sgpr_size_bytes(s_save_ttmps_hi)
@@ -413,13 +419,11 @@ end
413 s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo 419 s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
414 s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 420 s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
415 s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF 421 s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
416 s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 422 s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
417 ack_sqc_store_workaround()
418 s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
419 ack_sqc_store_workaround() 423 ack_sqc_store_workaround()
420 s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 424 s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
421 ack_sqc_store_workaround() 425 ack_sqc_store_workaround()
422 s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 426 s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
423 ack_sqc_store_workaround() 427 ack_sqc_store_workaround()
424 428
425 /* setup Resource Contants */ 429 /* setup Resource Contants */
@@ -1093,7 +1097,7 @@ end
1093 //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore 1097 //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
1094 s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode 1098 s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
1095 1099
1096 // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic 1100 // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
1097 // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 1101 // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
1098 get_vgpr_size_bytes(s_restore_ttmps_lo) 1102 get_vgpr_size_bytes(s_restore_ttmps_lo)
1099 get_sgpr_size_bytes(s_restore_ttmps_hi) 1103 get_sgpr_size_bytes(s_restore_ttmps_hi)
@@ -1101,10 +1105,9 @@ end
1101 s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 1105 s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
1102 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 1106 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
1103 s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF 1107 s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
1104 s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 1108 s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
1105 s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 1109 s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
1106 s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 1110 s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
1107 s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
1108 s_waitcnt lgkmcnt(0) 1111 s_waitcnt lgkmcnt(0)
1109 1112
1110 //reuse s_restore_m0 as a temp register 1113 //reuse s_restore_m0 as a temp register
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..ea82828fdc76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
213 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 213 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
214 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 214 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
215 q_properties->type = KFD_QUEUE_TYPE_SDMA; 215 q_properties->type = KFD_QUEUE_TYPE_SDMA;
216 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
217 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
216 else 218 else
217 return -ENOTSUPP; 219 return -ENOTSUPP;
218 220
@@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
522 struct kfd_process_device *pdd; 524 struct kfd_process_device *pdd;
523 525
524 dev = kfd_device_by_id(args->gpu_id); 526 dev = kfd_device_by_id(args->gpu_id);
525 if (dev == NULL) 527 if (!dev)
526 return -EINVAL; 528 return -EINVAL;
527 529
528 mutex_lock(&p->mutex); 530 mutex_lock(&p->mutex);
@@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1272 if (args->size != kfd_doorbell_process_slice(dev)) 1274 if (args->size != kfd_doorbell_process_slice(dev))
1273 return -EINVAL; 1275 return -EINVAL;
1274 offset = kfd_get_process_doorbells(dev, p); 1276 offset = kfd_get_process_doorbells(dev, p);
1277 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1278 if (args->size != PAGE_SIZE)
1279 return -EINVAL;
1280 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1281 if (!offset)
1282 return -ENOMEM;
1275 } 1283 }
1276 1284
1277 mutex_lock(&p->mutex); 1285 mutex_lock(&p->mutex);
@@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1301 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1309 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1302 args->mmap_offset = offset; 1310 args->mmap_offset = offset;
1303 1311
1312 /* MMIO is mapped through kfd device
1313 * Generate a kfd mmap offset
1314 */
1315 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1316 args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
1317 args->mmap_offset <<= PAGE_SHIFT;
1318 }
1319
1304 return 0; 1320 return 0;
1305 1321
1306err_free: 1322err_free:
@@ -1551,6 +1567,32 @@ copy_from_user_failed:
1551 return err; 1567 return err;
1552} 1568}
1553 1569
1570static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1571 struct kfd_process *p, void *data)
1572{
1573 int retval;
1574 struct kfd_ioctl_alloc_queue_gws_args *args = data;
1575 struct kfd_dev *dev;
1576
1577 if (!hws_gws_support)
1578 return -EINVAL;
1579
1580 dev = kfd_device_by_id(args->gpu_id);
1581 if (!dev) {
1582 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
1583 return -EINVAL;
1584 }
1585 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
1586 return -EINVAL;
1587
1588 mutex_lock(&p->mutex);
1589 retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1590 mutex_unlock(&p->mutex);
1591
1592 args->first_gws = 0;
1593 return retval;
1594}
1595
1554static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1596static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1555 struct kfd_process *p, void *data) 1597 struct kfd_process *p, void *data)
1556{ 1598{
@@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1753 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 1795 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1754 kfd_ioctl_import_dmabuf, 0), 1796 kfd_ioctl_import_dmabuf, 0),
1755 1797
1798 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1799 kfd_ioctl_alloc_queue_gws, 0),
1756}; 1800};
1757 1801
1758#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1802#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1845,6 +1889,39 @@ err_i1:
1845 return retcode; 1889 return retcode;
1846} 1890}
1847 1891
1892static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1893 struct vm_area_struct *vma)
1894{
1895 phys_addr_t address;
1896 int ret;
1897
1898 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1899 return -EINVAL;
1900
1901 address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1902
1903 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1904 VM_DONTDUMP | VM_PFNMAP;
1905
1906 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1907
1908 pr_debug("Process %d mapping mmio page\n"
1909 " target user address == 0x%08llX\n"
1910 " physical address == 0x%08llX\n"
1911 " vm_flags == 0x%04lX\n"
1912 " size == 0x%04lX\n",
1913 process->pasid, (unsigned long long) vma->vm_start,
1914 address, vma->vm_flags, PAGE_SIZE);
1915
1916 ret = io_remap_pfn_range(vma,
1917 vma->vm_start,
1918 address >> PAGE_SHIFT,
1919 PAGE_SIZE,
1920 vma->vm_page_prot);
1921 return ret;
1922}
1923
1924
1848static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1925static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1849{ 1926{
1850 struct kfd_process *process; 1927 struct kfd_process *process;
@@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1875 if (!dev) 1952 if (!dev)
1876 return -ENODEV; 1953 return -ENODEV;
1877 return kfd_reserved_mem_mmap(dev, process, vma); 1954 return kfd_reserved_mem_mmap(dev, process, vma);
1955 case KFD_MMAP_TYPE_MMIO:
1956 if (!dev)
1957 return -ENODEV;
1958 return kfd_mmio_mmap(dev, process, vma);
1878 } 1959 }
1879 1960
1880 return -EFAULT; 1961 return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2e7c44955f43..59f8ca4297db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
134#define polaris10_cache_info carrizo_cache_info 134#define polaris10_cache_info carrizo_cache_info
135#define polaris11_cache_info carrizo_cache_info 135#define polaris11_cache_info carrizo_cache_info
136#define polaris12_cache_info carrizo_cache_info 136#define polaris12_cache_info carrizo_cache_info
137#define vegam_cache_info carrizo_cache_info
137/* TODO - check & update Vega10 cache details */ 138/* TODO - check & update Vega10 cache details */
138#define vega10_cache_info carrizo_cache_info 139#define vega10_cache_info carrizo_cache_info
139#define raven_cache_info carrizo_cache_info 140#define raven_cache_info carrizo_cache_info
@@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
372 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 373 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
373 props->weight = 20; 374 props->weight = 20;
374 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) 375 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
375 props->weight = 15; 376 props->weight = 15 * iolink->num_hops_xgmi;
376 else 377 else
377 props->weight = node_distance(id_from, id_to); 378 props->weight = node_distance(id_from, id_to);
378 379
@@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
652 pcache_info = polaris12_cache_info; 653 pcache_info = polaris12_cache_info;
653 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); 654 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
654 break; 655 break;
656 case CHIP_VEGAM:
657 pcache_info = vegam_cache_info;
658 num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
659 break;
655 case CHIP_VEGA10: 660 case CHIP_VEGA10:
656 case CHIP_VEGA12: 661 case CHIP_VEGA12:
657 case CHIP_VEGA20: 662 case CHIP_VEGA20:
@@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1092 1097
1093static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 1098static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
1094 struct kfd_dev *kdev, 1099 struct kfd_dev *kdev,
1100 struct kfd_dev *peer_kdev,
1095 struct crat_subtype_iolink *sub_type_hdr, 1101 struct crat_subtype_iolink *sub_type_hdr,
1096 uint32_t proximity_domain_from, 1102 uint32_t proximity_domain_from,
1097 uint32_t proximity_domain_to) 1103 uint32_t proximity_domain_to)
@@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
1110 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 1116 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
1111 sub_type_hdr->proximity_domain_from = proximity_domain_from; 1117 sub_type_hdr->proximity_domain_from = proximity_domain_from;
1112 sub_type_hdr->proximity_domain_to = proximity_domain_to; 1118 sub_type_hdr->proximity_domain_to = proximity_domain_to;
1119 sub_type_hdr->num_hops_xgmi =
1120 amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
1113 return 0; 1121 return 0;
1114} 1122}
1115 1123
@@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
1287 (char *)sub_type_hdr + 1295 (char *)sub_type_hdr +
1288 sizeof(struct crat_subtype_iolink)); 1296 sizeof(struct crat_subtype_iolink));
1289 ret = kfd_fill_gpu_xgmi_link_to_gpu( 1297 ret = kfd_fill_gpu_xgmi_link_to_gpu(
1290 &avail_size, kdev, 1298 &avail_size, kdev, peer_dev->gpu,
1291 (struct crat_subtype_iolink *)sub_type_hdr, 1299 (struct crat_subtype_iolink *)sub_type_hdr,
1292 proximity_domain, nid); 1300 proximity_domain, nid);
1293 if (ret < 0) 1301 if (ret < 0)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 7c3f192fe25f..d54ceebd346b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -274,7 +274,8 @@ struct crat_subtype_iolink {
274 uint32_t minimum_bandwidth_mbs; 274 uint32_t minimum_bandwidth_mbs;
275 uint32_t maximum_bandwidth_mbs; 275 uint32_t maximum_bandwidth_mbs;
276 uint32_t recommended_transfer_size; 276 uint32_t recommended_transfer_size;
277 uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH]; 277 uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
278 uint8_t num_hops_xgmi;
278}; 279};
279 280
280/* 281/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 765b58a17dc7..9d1b026e29e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
54 .needs_iommu_device = true, 54 .needs_iommu_device = true,
55 .needs_pci_atomics = false, 55 .needs_pci_atomics = false,
56 .num_sdma_engines = 2, 56 .num_sdma_engines = 2,
57 .num_xgmi_sdma_engines = 0,
57 .num_sdma_queues_per_engine = 2, 58 .num_sdma_queues_per_engine = 2,
58}; 59};
59 60
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
71 .needs_iommu_device = true, 72 .needs_iommu_device = true,
72 .needs_pci_atomics = false, 73 .needs_pci_atomics = false,
73 .num_sdma_engines = 2, 74 .num_sdma_engines = 2,
75 .num_xgmi_sdma_engines = 0,
74 .num_sdma_queues_per_engine = 2, 76 .num_sdma_queues_per_engine = 2,
75}; 77};
76 78
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
87 .needs_iommu_device = true, 89 .needs_iommu_device = true,
88 .needs_pci_atomics = true, 90 .needs_pci_atomics = true,
89 .num_sdma_engines = 1, 91 .num_sdma_engines = 1,
92 .num_xgmi_sdma_engines = 0,
90 .num_sdma_queues_per_engine = 2, 93 .num_sdma_queues_per_engine = 2,
91}; 94};
92#endif 95#endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
105 .needs_iommu_device = false, 108 .needs_iommu_device = false,
106 .needs_pci_atomics = false, 109 .needs_pci_atomics = false,
107 .num_sdma_engines = 2, 110 .num_sdma_engines = 2,
111 .num_xgmi_sdma_engines = 0,
108 .num_sdma_queues_per_engine = 2, 112 .num_sdma_queues_per_engine = 2,
109}; 113};
110 114
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
121 .needs_iommu_device = false, 125 .needs_iommu_device = false,
122 .needs_pci_atomics = true, 126 .needs_pci_atomics = true,
123 .num_sdma_engines = 2, 127 .num_sdma_engines = 2,
128 .num_xgmi_sdma_engines = 0,
124 .num_sdma_queues_per_engine = 2, 129 .num_sdma_queues_per_engine = 2,
125}; 130};
126 131
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
137 .needs_iommu_device = false, 142 .needs_iommu_device = false,
138 .needs_pci_atomics = true, 143 .needs_pci_atomics = true,
139 .num_sdma_engines = 2, 144 .num_sdma_engines = 2,
145 .num_xgmi_sdma_engines = 0,
140 .num_sdma_queues_per_engine = 2, 146 .num_sdma_queues_per_engine = 2,
141}; 147};
142 148
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
153 .needs_iommu_device = false, 159 .needs_iommu_device = false,
154 .needs_pci_atomics = false, 160 .needs_pci_atomics = false,
155 .num_sdma_engines = 2, 161 .num_sdma_engines = 2,
162 .num_xgmi_sdma_engines = 0,
156 .num_sdma_queues_per_engine = 2, 163 .num_sdma_queues_per_engine = 2,
157}; 164};
158 165
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
170 .needs_iommu_device = false, 177 .needs_iommu_device = false,
171 .needs_pci_atomics = true, 178 .needs_pci_atomics = true,
172 .num_sdma_engines = 2, 179 .num_sdma_engines = 2,
180 .num_xgmi_sdma_engines = 0,
173 .num_sdma_queues_per_engine = 2, 181 .num_sdma_queues_per_engine = 2,
174}; 182};
175 183
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
186 .needs_iommu_device = false, 194 .needs_iommu_device = false,
187 .needs_pci_atomics = false, 195 .needs_pci_atomics = false,
188 .num_sdma_engines = 2, 196 .num_sdma_engines = 2,
197 .num_xgmi_sdma_engines = 0,
189 .num_sdma_queues_per_engine = 2, 198 .num_sdma_queues_per_engine = 2,
190}; 199};
191 200
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
202 .needs_iommu_device = false, 211 .needs_iommu_device = false,
203 .needs_pci_atomics = true, 212 .needs_pci_atomics = true,
204 .num_sdma_engines = 2, 213 .num_sdma_engines = 2,
214 .num_xgmi_sdma_engines = 0,
205 .num_sdma_queues_per_engine = 2, 215 .num_sdma_queues_per_engine = 2,
206}; 216};
207 217
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
218 .needs_iommu_device = false, 228 .needs_iommu_device = false,
219 .needs_pci_atomics = true, 229 .needs_pci_atomics = true,
220 .num_sdma_engines = 2, 230 .num_sdma_engines = 2,
231 .num_xgmi_sdma_engines = 0,
232 .num_sdma_queues_per_engine = 2,
233};
234
235static const struct kfd_device_info vegam_device_info = {
236 .asic_family = CHIP_VEGAM,
237 .max_pasid_bits = 16,
238 .max_no_of_hqd = 24,
239 .doorbell_size = 4,
240 .ih_ring_entry_size = 4 * sizeof(uint32_t),
241 .event_interrupt_class = &event_interrupt_class_cik,
242 .num_of_watch_points = 4,
243 .mqd_size_aligned = MQD_SIZE_ALIGNED,
244 .supports_cwsr = true,
245 .needs_iommu_device = false,
246 .needs_pci_atomics = true,
247 .num_sdma_engines = 2,
248 .num_xgmi_sdma_engines = 0,
221 .num_sdma_queues_per_engine = 2, 249 .num_sdma_queues_per_engine = 2,
222}; 250};
223 251
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
234 .needs_iommu_device = false, 262 .needs_iommu_device = false,
235 .needs_pci_atomics = false, 263 .needs_pci_atomics = false,
236 .num_sdma_engines = 2, 264 .num_sdma_engines = 2,
265 .num_xgmi_sdma_engines = 0,
237 .num_sdma_queues_per_engine = 2, 266 .num_sdma_queues_per_engine = 2,
238}; 267};
239 268
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
250 .needs_iommu_device = false, 279 .needs_iommu_device = false,
251 .needs_pci_atomics = false, 280 .needs_pci_atomics = false,
252 .num_sdma_engines = 2, 281 .num_sdma_engines = 2,
282 .num_xgmi_sdma_engines = 0,
253 .num_sdma_queues_per_engine = 2, 283 .num_sdma_queues_per_engine = 2,
254}; 284};
255 285
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
266 .needs_iommu_device = false, 296 .needs_iommu_device = false,
267 .needs_pci_atomics = false, 297 .needs_pci_atomics = false,
268 .num_sdma_engines = 2, 298 .num_sdma_engines = 2,
299 .num_xgmi_sdma_engines = 0,
269 .num_sdma_queues_per_engine = 2, 300 .num_sdma_queues_per_engine = 2,
270}; 301};
271 302
@@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
282 .needs_iommu_device = false, 313 .needs_iommu_device = false,
283 .needs_pci_atomics = false, 314 .needs_pci_atomics = false,
284 .num_sdma_engines = 2, 315 .num_sdma_engines = 2,
316 .num_xgmi_sdma_engines = 0,
285 .num_sdma_queues_per_engine = 8, 317 .num_sdma_queues_per_engine = 8,
286}; 318};
287 319
@@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
373 { 0x6995, &polaris12_device_info }, /* Polaris12 */ 405 { 0x6995, &polaris12_device_info }, /* Polaris12 */
374 { 0x6997, &polaris12_device_info }, /* Polaris12 */ 406 { 0x6997, &polaris12_device_info }, /* Polaris12 */
375 { 0x699F, &polaris12_device_info }, /* Polaris12 */ 407 { 0x699F, &polaris12_device_info }, /* Polaris12 */
408 { 0x694C, &vegam_device_info }, /* VegaM */
409 { 0x694E, &vegam_device_info }, /* VegaM */
410 { 0x694F, &vegam_device_info }, /* VegaM */
376 { 0x6860, &vega10_device_info }, /* Vega10 */ 411 { 0x6860, &vega10_device_info }, /* Vega10 */
377 { 0x6861, &vega10_device_info }, /* Vega10 */ 412 { 0x6861, &vega10_device_info }, /* Vega10 */
378 { 0x6862, &vega10_device_info }, /* Vega10 */ 413 { 0x6862, &vega10_device_info }, /* Vega10 */
@@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
518 } else 553 } else
519 kfd->max_proc_per_quantum = hws_max_conc_proc; 554 kfd->max_proc_per_quantum = hws_max_conc_proc;
520 555
556 /* Allocate global GWS that is shared by all KFD processes */
557 if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
558 amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
559 dev_err(kfd_device, "Could not allocate %d gws\n",
560 amdgpu_amdkfd_get_num_gws(kfd->kgd));
561 goto out;
562 }
521 /* calculate max size of mqds needed for queues */ 563 /* calculate max size of mqds needed for queues */
522 size = max_num_of_queues_per_device * 564 size = max_num_of_queues_per_device *
523 kfd->device_info->mqd_size_aligned; 565 kfd->device_info->mqd_size_aligned;
@@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
541 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, 583 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
542 false)) { 584 false)) {
543 dev_err(kfd_device, "Could not allocate %d bytes\n", size); 585 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
544 goto out; 586 goto alloc_gtt_mem_failure;
545 } 587 }
546 588
547 dev_info(kfd_device, "Allocated %d bytes on gart\n", size); 589 dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -611,6 +653,9 @@ kfd_doorbell_error:
611 kfd_gtt_sa_fini(kfd); 653 kfd_gtt_sa_fini(kfd);
612kfd_gtt_sa_init_error: 654kfd_gtt_sa_init_error:
613 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); 655 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
656alloc_gtt_mem_failure:
657 if (hws_gws_support)
658 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
614 dev_err(kfd_device, 659 dev_err(kfd_device,
615 "device %x:%x NOT added due to errors\n", 660 "device %x:%x NOT added due to errors\n",
616 kfd->pdev->vendor, kfd->pdev->device); 661 kfd->pdev->vendor, kfd->pdev->device);
@@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
628 kfd_doorbell_fini(kfd); 673 kfd_doorbell_fini(kfd);
629 kfd_gtt_sa_fini(kfd); 674 kfd_gtt_sa_fini(kfd);
630 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); 675 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
676 if (hws_gws_support)
677 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
631 } 678 }
632 679
633 kfree(kfd); 680 kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ae381450601c..ece35c7a77b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
60 struct qcm_process_device *qpd); 60 struct qcm_process_device *qpd);
61 61
62static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62static void deallocate_sdma_queue(struct device_queue_manager *dqm,
63 unsigned int sdma_queue_id); 63 struct queue *q);
64 64
65static void kfd_process_hw_exception(struct work_struct *work); 65static void kfd_process_hw_exception(struct work_struct *work);
66 66
67static inline 67static inline
68enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
69{ 69{
70 if (type == KFD_QUEUE_TYPE_SDMA) 70 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
71 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_SDMA;
72 return KFD_MQD_TYPE_CP; 72 return KFD_MQD_TYPE_CP;
73} 73}
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
107 return dqm->dev->device_info->num_sdma_engines; 107 return dqm->dev->device_info->num_sdma_engines;
108} 108}
109 109
110static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
111{
112 return dqm->dev->device_info->num_xgmi_sdma_engines;
113}
114
110unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 115unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
111{ 116{
112 return dqm->dev->device_info->num_sdma_engines 117 return dqm->dev->device_info->num_sdma_engines
113 * dqm->dev->device_info->num_sdma_queues_per_engine; 118 * dqm->dev->device_info->num_sdma_queues_per_engine;
114} 119}
115 120
121unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
122{
123 return dqm->dev->device_info->num_xgmi_sdma_engines
124 * dqm->dev->device_info->num_sdma_queues_per_engine;
125}
126
116void program_sh_mem_settings(struct device_queue_manager *dqm, 127void program_sh_mem_settings(struct device_queue_manager *dqm,
117 struct qcm_process_device *qpd) 128 struct qcm_process_device *qpd)
118{ 129{
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
133 * preserve the user mode ABI. 144 * preserve the user mode ABI.
134 */ 145 */
135 q->doorbell_id = q->properties.queue_id; 146 q->doorbell_id = q->properties.queue_id;
136 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 147 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
148 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
137 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 149 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
138 * doorbell assignments based on the engine and queue id. 150 * doorbell assignments based on the engine and queue id.
139 * The doobell index distance between RLC (2*i) and (2*i+1) 151 * The doobell index distance between RLC (2*i) and (2*i+1)
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
174 struct kfd_dev *dev = qpd->dqm->dev; 186 struct kfd_dev *dev = qpd->dqm->dev;
175 187
176 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 188 if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
177 q->properties.type == KFD_QUEUE_TYPE_SDMA) 189 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
190 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
178 return; 191 return;
179 192
180 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 193 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
289 302
290 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 303 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
291 retval = create_compute_queue_nocpsch(dqm, q, qpd); 304 retval = create_compute_queue_nocpsch(dqm, q, qpd);
292 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 305 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
306 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
293 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 307 retval = create_sdma_queue_nocpsch(dqm, q, qpd);
294 else 308 else
295 retval = -EINVAL; 309 retval = -EINVAL;
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
307 321
308 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 322 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
309 dqm->sdma_queue_count++; 323 dqm->sdma_queue_count++;
324 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
325 dqm->xgmi_sdma_queue_count++;
310 326
311 /* 327 /*
312 * Unconditionally increment this counter, regardless of the queue's 328 * Unconditionally increment this counter, regardless of the queue's
@@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
368 struct mqd_manager *mqd_mgr; 384 struct mqd_manager *mqd_mgr;
369 int retval; 385 int retval;
370 386
371 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 387 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
372 if (!mqd_mgr)
373 return -ENOMEM;
374 388
375 retval = allocate_hqd(dqm, q); 389 retval = allocate_hqd(dqm, q);
376 if (retval) 390 if (retval)
@@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
425 int retval; 439 int retval;
426 struct mqd_manager *mqd_mgr; 440 struct mqd_manager *mqd_mgr;
427 441
428 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 442 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
429 get_mqd_type_from_queue_type(q->properties.type)); 443 q->properties.type)];
430 if (!mqd_mgr)
431 return -ENOMEM;
432 444
433 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 445 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
434 deallocate_hqd(dqm, q); 446 deallocate_hqd(dqm, q);
435 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 447 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
436 dqm->sdma_queue_count--; 448 dqm->sdma_queue_count--;
437 deallocate_sdma_queue(dqm, q->sdma_id); 449 deallocate_sdma_queue(dqm, q);
450 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
451 dqm->xgmi_sdma_queue_count--;
452 deallocate_sdma_queue(dqm, q);
438 } else { 453 } else {
439 pr_debug("q->properties.type %d is invalid\n", 454 pr_debug("q->properties.type %d is invalid\n",
440 q->properties.type); 455 q->properties.type);
@@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
501 retval = -ENODEV; 516 retval = -ENODEV;
502 goto out_unlock; 517 goto out_unlock;
503 } 518 }
504 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 519 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
505 get_mqd_type_from_queue_type(q->properties.type)); 520 q->properties.type)];
506 if (!mqd_mgr) {
507 retval = -ENOMEM;
508 goto out_unlock;
509 }
510 /* 521 /*
511 * Eviction state logic: we only mark active queues as evicted 522 * Eviction state logic: we only mark active queues as evicted
512 * to avoid the overhead of restoring inactive queues later 523 * to avoid the overhead of restoring inactive queues later
@@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
529 } 540 }
530 } else if (prev_active && 541 } else if (prev_active &&
531 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 542 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
532 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 543 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
544 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
533 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 545 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
534 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 546 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
535 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 547 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
556 retval = map_queues_cpsch(dqm); 568 retval = map_queues_cpsch(dqm);
557 else if (q->properties.is_active && 569 else if (q->properties.is_active &&
558 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 570 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
559 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 571 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
572 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
560 if (WARN(q->process->mm != current->mm, 573 if (WARN(q->process->mm != current->mm,
561 "should only run in user thread")) 574 "should only run in user thread"))
562 retval = -EFAULT; 575 retval = -EFAULT;
@@ -571,27 +584,6 @@ out_unlock:
571 return retval; 584 return retval;
572} 585}
573 586
574static struct mqd_manager *get_mqd_manager(
575 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
576{
577 struct mqd_manager *mqd_mgr;
578
579 if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
580 return NULL;
581
582 pr_debug("mqd type %d\n", type);
583
584 mqd_mgr = dqm->mqd_mgrs[type];
585 if (!mqd_mgr) {
586 mqd_mgr = mqd_manager_init(type, dqm->dev);
587 if (!mqd_mgr)
588 pr_err("mqd manager is NULL");
589 dqm->mqd_mgrs[type] = mqd_mgr;
590 }
591
592 return mqd_mgr;
593}
594
595static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 587static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
596 struct qcm_process_device *qpd) 588 struct qcm_process_device *qpd)
597{ 589{
@@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
612 list_for_each_entry(q, &qpd->queues_list, list) { 604 list_for_each_entry(q, &qpd->queues_list, list) {
613 if (!q->properties.is_active) 605 if (!q->properties.is_active)
614 continue; 606 continue;
615 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 607 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
616 get_mqd_type_from_queue_type(q->properties.type)); 608 q->properties.type)];
617 if (!mqd_mgr) { /* should not be here */
618 pr_err("Cannot evict queue, mqd mgr is NULL\n");
619 retval = -ENOMEM;
620 goto out;
621 }
622 q->properties.is_evicted = true; 609 q->properties.is_evicted = true;
623 q->properties.is_active = false; 610 q->properties.is_active = false;
624 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 611 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
@@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
717 list_for_each_entry(q, &qpd->queues_list, list) { 704 list_for_each_entry(q, &qpd->queues_list, list) {
718 if (!q->properties.is_evicted) 705 if (!q->properties.is_evicted)
719 continue; 706 continue;
720 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 707 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
721 get_mqd_type_from_queue_type(q->properties.type)); 708 q->properties.type)];
722 if (!mqd_mgr) { /* should not be here */
723 pr_err("Cannot restore queue, mqd mgr is NULL\n");
724 retval = -ENOMEM;
725 goto out;
726 }
727 q->properties.is_evicted = false; 709 q->properties.is_evicted = false;
728 q->properties.is_active = true; 710 q->properties.is_active = true;
729 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 711 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
@@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
812 retval = dqm->asic_ops.update_qpd(dqm, qpd); 794 retval = dqm->asic_ops.update_qpd(dqm, qpd);
813 795
814 dqm->processes_count++; 796 dqm->processes_count++;
815 kfd_inc_compute_active(dqm->dev);
816 797
817 dqm_unlock(dqm); 798 dqm_unlock(dqm);
818 799
800 /* Outside the DQM lock because under the DQM lock we can't do
801 * reclaim or take other locks that others hold while reclaiming.
802 */
803 kfd_inc_compute_active(dqm->dev);
804
819 return retval; 805 return retval;
820} 806}
821 807
@@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
836 list_del(&cur->list); 822 list_del(&cur->list);
837 kfree(cur); 823 kfree(cur);
838 dqm->processes_count--; 824 dqm->processes_count--;
839 kfd_dec_compute_active(dqm->dev);
840 goto out; 825 goto out;
841 } 826 }
842 } 827 }
@@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
844 retval = 1; 829 retval = 1;
845out: 830out:
846 dqm_unlock(dqm); 831 dqm_unlock(dqm);
832
833 /* Outside the DQM lock because under the DQM lock we can't do
834 * reclaim or take other locks that others hold while reclaiming.
835 */
836 if (!retval)
837 kfd_dec_compute_active(dqm->dev);
838
847 return retval; 839 return retval;
848} 840}
849 841
@@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
879 INIT_LIST_HEAD(&dqm->queues); 871 INIT_LIST_HEAD(&dqm->queues);
880 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 872 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
881 dqm->sdma_queue_count = 0; 873 dqm->sdma_queue_count = 0;
874 dqm->xgmi_sdma_queue_count = 0;
882 875
883 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 876 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
884 int pipe_offset = pipe * get_queues_per_pipe(dqm); 877 int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
890 } 883 }
891 884
892 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 885 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
893 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 886 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
887 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
894 888
895 return 0; 889 return 0;
896} 890}
@@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
921} 915}
922 916
923static int allocate_sdma_queue(struct device_queue_manager *dqm, 917static int allocate_sdma_queue(struct device_queue_manager *dqm,
924 unsigned int *sdma_queue_id) 918 struct queue *q)
925{ 919{
926 int bit; 920 int bit;
927 921
928 if (dqm->sdma_bitmap == 0) 922 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
929 return -ENOMEM; 923 if (dqm->sdma_bitmap == 0)
924 return -ENOMEM;
925 bit = __ffs64(dqm->sdma_bitmap);
926 dqm->sdma_bitmap &= ~(1ULL << bit);
927 q->sdma_id = bit;
928 q->properties.sdma_engine_id = q->sdma_id %
929 get_num_sdma_engines(dqm);
930 q->properties.sdma_queue_id = q->sdma_id /
931 get_num_sdma_engines(dqm);
932 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
933 if (dqm->xgmi_sdma_bitmap == 0)
934 return -ENOMEM;
935 bit = __ffs64(dqm->xgmi_sdma_bitmap);
936 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
937 q->sdma_id = bit;
938 /* sdma_engine_id is sdma id including
939 * both PCIe-optimized SDMAs and XGMI-
940 * optimized SDMAs. The calculation below
941 * assumes the first N engines are always
942 * PCIe-optimized ones
943 */
944 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
945 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
946 q->properties.sdma_queue_id = q->sdma_id /
947 get_num_xgmi_sdma_engines(dqm);
948 }
930 949
931 bit = ffs(dqm->sdma_bitmap) - 1; 950 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
932 dqm->sdma_bitmap &= ~(1 << bit); 951 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
933 *sdma_queue_id = bit;
934 952
935 return 0; 953 return 0;
936} 954}
937 955
938static void deallocate_sdma_queue(struct device_queue_manager *dqm, 956static void deallocate_sdma_queue(struct device_queue_manager *dqm,
939 unsigned int sdma_queue_id) 957 struct queue *q)
940{ 958{
941 if (sdma_queue_id >= get_num_sdma_queues(dqm)) 959 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
942 return; 960 if (q->sdma_id >= get_num_sdma_queues(dqm))
943 dqm->sdma_bitmap |= (1 << sdma_queue_id); 961 return;
962 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
963 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
964 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
965 return;
966 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
967 }
944} 968}
945 969
946static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 970static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
950 struct mqd_manager *mqd_mgr; 974 struct mqd_manager *mqd_mgr;
951 int retval; 975 int retval;
952 976
953 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 977 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
954 if (!mqd_mgr)
955 return -ENOMEM;
956 978
957 retval = allocate_sdma_queue(dqm, &q->sdma_id); 979 retval = allocate_sdma_queue(dqm, q);
958 if (retval) 980 if (retval)
959 return retval; 981 return retval;
960 982
961 q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
962 q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
963
964 retval = allocate_doorbell(qpd, q); 983 retval = allocate_doorbell(qpd, q);
965 if (retval) 984 if (retval)
966 goto out_deallocate_sdma_queue; 985 goto out_deallocate_sdma_queue;
967 986
968 pr_debug("SDMA id is: %d\n", q->sdma_id);
969 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
970 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
971
972 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 987 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
973 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 988 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
974 &q->gart_mqd_addr, &q->properties); 989 &q->gart_mqd_addr, &q->properties);
@@ -987,7 +1002,7 @@ out_uninit_mqd:
987out_deallocate_doorbell: 1002out_deallocate_doorbell:
988 deallocate_doorbell(qpd, q); 1003 deallocate_doorbell(qpd, q);
989out_deallocate_sdma_queue: 1004out_deallocate_sdma_queue:
990 deallocate_sdma_queue(dqm, q->sdma_id); 1005 deallocate_sdma_queue(dqm, q);
991 1006
992 return retval; 1007 return retval;
993} 1008}
@@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
1045 INIT_LIST_HEAD(&dqm->queues); 1060 INIT_LIST_HEAD(&dqm->queues);
1046 dqm->queue_count = dqm->processes_count = 0; 1061 dqm->queue_count = dqm->processes_count = 0;
1047 dqm->sdma_queue_count = 0; 1062 dqm->sdma_queue_count = 0;
1063 dqm->xgmi_sdma_queue_count = 0;
1048 dqm->active_runlist = false; 1064 dqm->active_runlist = false;
1049 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 1065 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
1066 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
1050 1067
1051 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1068 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1052 1069
@@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1161 int retval; 1178 int retval;
1162 struct mqd_manager *mqd_mgr; 1179 struct mqd_manager *mqd_mgr;
1163 1180
1164 retval = 0;
1165
1166 dqm_lock(dqm);
1167
1168 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1181 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1169 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1182 pr_warn("Can't create new usermode queue because %d queues were already created\n",
1170 dqm->total_queue_count); 1183 dqm->total_queue_count);
1171 retval = -EPERM; 1184 retval = -EPERM;
1172 goto out_unlock; 1185 goto out;
1173 } 1186 }
1174 1187
1175 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1188 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1176 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1189 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1190 retval = allocate_sdma_queue(dqm, q);
1177 if (retval) 1191 if (retval)
1178 goto out_unlock; 1192 goto out;
1179 q->properties.sdma_queue_id =
1180 q->sdma_id / get_num_sdma_engines(dqm);
1181 q->properties.sdma_engine_id =
1182 q->sdma_id % get_num_sdma_engines(dqm);
1183 } 1193 }
1184 1194
1185 retval = allocate_doorbell(qpd, q); 1195 retval = allocate_doorbell(qpd, q);
1186 if (retval) 1196 if (retval)
1187 goto out_deallocate_sdma_queue; 1197 goto out_deallocate_sdma_queue;
1188 1198
1189 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1199 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1190 get_mqd_type_from_queue_type(q->properties.type)); 1200 q->properties.type)];
1191
1192 if (!mqd_mgr) {
1193 retval = -ENOMEM;
1194 goto out_deallocate_doorbell;
1195 }
1196 /* 1201 /*
1197 * Eviction state logic: we only mark active queues as evicted 1202 * Eviction state logic: we only mark active queues as evicted
1198 * to avoid the overhead of restoring inactive queues later 1203 * to avoid the overhead of restoring inactive queues later
@@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1201 q->properties.is_evicted = (q->properties.queue_size > 0 && 1206 q->properties.is_evicted = (q->properties.queue_size > 0 &&
1202 q->properties.queue_percent > 0 && 1207 q->properties.queue_percent > 0 &&
1203 q->properties.queue_address != 0); 1208 q->properties.queue_address != 0);
1204
1205 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1209 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1206
1207 q->properties.tba_addr = qpd->tba_addr; 1210 q->properties.tba_addr = qpd->tba_addr;
1208 q->properties.tma_addr = qpd->tma_addr; 1211 q->properties.tma_addr = qpd->tma_addr;
1209 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1212 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1211 if (retval) 1214 if (retval)
1212 goto out_deallocate_doorbell; 1215 goto out_deallocate_doorbell;
1213 1216
1217 dqm_lock(dqm);
1218
1214 list_add(&q->list, &qpd->queues_list); 1219 list_add(&q->list, &qpd->queues_list);
1215 qpd->queue_count++; 1220 qpd->queue_count++;
1216 if (q->properties.is_active) { 1221 if (q->properties.is_active) {
@@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1221 1226
1222 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1227 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1223 dqm->sdma_queue_count++; 1228 dqm->sdma_queue_count++;
1229 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1230 dqm->xgmi_sdma_queue_count++;
1224 /* 1231 /*
1225 * Unconditionally increment this counter, regardless of the queue's 1232 * Unconditionally increment this counter, regardless of the queue's
1226 * type or whether the queue is active. 1233 * type or whether the queue is active.
@@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1236out_deallocate_doorbell: 1243out_deallocate_doorbell:
1237 deallocate_doorbell(qpd, q); 1244 deallocate_doorbell(qpd, q);
1238out_deallocate_sdma_queue: 1245out_deallocate_sdma_queue:
1239 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1246 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1240 deallocate_sdma_queue(dqm, q->sdma_id); 1247 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1241out_unlock: 1248 deallocate_sdma_queue(dqm, q);
1242 dqm_unlock(dqm); 1249out:
1243
1244 return retval; 1250 return retval;
1245} 1251}
1246 1252
@@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1268 return 0; 1274 return 0;
1269} 1275}
1270 1276
1271static int unmap_sdma_queues(struct device_queue_manager *dqm, 1277static int unmap_sdma_queues(struct device_queue_manager *dqm)
1272 unsigned int sdma_engine)
1273{ 1278{
1274 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1279 int i, retval = 0;
1275 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1280
1276 sdma_engine); 1281 for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
1282 dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
1283 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1284 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
1285 if (retval)
1286 return retval;
1287 }
1288 return retval;
1277} 1289}
1278 1290
1279/* dqm->lock mutex has to be locked before calling this function */ 1291/* dqm->lock mutex has to be locked before calling this function */
@@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1309 if (!dqm->active_runlist) 1321 if (!dqm->active_runlist)
1310 return retval; 1322 return retval;
1311 1323
1312 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1324 pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
1313 dqm->sdma_queue_count); 1325 dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
1314 1326
1315 if (dqm->sdma_queue_count > 0) { 1327 if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
1316 unmap_sdma_queues(dqm, 0); 1328 unmap_sdma_queues(dqm);
1317 unmap_sdma_queues(dqm, 1);
1318 }
1319 1329
1320 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1330 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1321 filter, filter_param, false, 0); 1331 filter, filter_param, false, 0);
@@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1379 1389
1380 } 1390 }
1381 1391
1382 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1392 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1383 get_mqd_type_from_queue_type(q->properties.type)); 1393 q->properties.type)];
1384 if (!mqd_mgr) {
1385 retval = -ENOMEM;
1386 goto failed;
1387 }
1388 1394
1389 deallocate_doorbell(qpd, q); 1395 deallocate_doorbell(qpd, q);
1390 1396
1391 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1397 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1392 dqm->sdma_queue_count--; 1398 dqm->sdma_queue_count--;
1393 deallocate_sdma_queue(dqm, q->sdma_id); 1399 deallocate_sdma_queue(dqm, q);
1400 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1401 dqm->xgmi_sdma_queue_count--;
1402 deallocate_sdma_queue(dqm, q);
1394 } 1403 }
1395 1404
1396 list_del(&q->list); 1405 list_del(&q->list);
@@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1403 qpd->reset_wavefronts = true; 1412 qpd->reset_wavefronts = true;
1404 } 1413 }
1405 1414
1406 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1407
1408 /* 1415 /*
1409 * Unconditionally decrement this counter, regardless of the queue's 1416 * Unconditionally decrement this counter, regardless of the queue's
1410 * type 1417 * type
@@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1415 1422
1416 dqm_unlock(dqm); 1423 dqm_unlock(dqm);
1417 1424
1425 /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
1426 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1427
1418 return retval; 1428 return retval;
1419 1429
1420failed:
1421failed_try_destroy_debugged_queue: 1430failed_try_destroy_debugged_queue:
1422 1431
1423 dqm_unlock(dqm); 1432 dqm_unlock(dqm);
@@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
1520 struct queue *q, *next; 1529 struct queue *q, *next;
1521 struct device_process_node *cur, *next_dpn; 1530 struct device_process_node *cur, *next_dpn;
1522 int retval = 0; 1531 int retval = 0;
1532 bool found = false;
1523 1533
1524 dqm_lock(dqm); 1534 dqm_lock(dqm);
1525 1535
@@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
1538 list_del(&cur->list); 1548 list_del(&cur->list);
1539 kfree(cur); 1549 kfree(cur);
1540 dqm->processes_count--; 1550 dqm->processes_count--;
1541 kfd_dec_compute_active(dqm->dev); 1551 found = true;
1542 break; 1552 break;
1543 } 1553 }
1544 } 1554 }
1545 1555
1546 dqm_unlock(dqm); 1556 dqm_unlock(dqm);
1557
1558 /* Outside the DQM lock because under the DQM lock we can't do
1559 * reclaim or take other locks that others hold while reclaiming.
1560 */
1561 if (found)
1562 kfd_dec_compute_active(dqm->dev);
1563
1547 return retval; 1564 return retval;
1548} 1565}
1549 1566
@@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
1564 goto dqm_unlock; 1581 goto dqm_unlock;
1565 } 1582 }
1566 1583
1567 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 1584 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
1568 if (!mqd_mgr) {
1569 r = -ENOMEM;
1570 goto dqm_unlock;
1571 }
1572 1585
1573 if (!mqd_mgr->get_wave_state) { 1586 if (!mqd_mgr->get_wave_state) {
1574 r = -EINVAL; 1587 r = -EINVAL;
@@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
1593 struct device_process_node *cur, *next_dpn; 1606 struct device_process_node *cur, *next_dpn;
1594 enum kfd_unmap_queues_filter filter = 1607 enum kfd_unmap_queues_filter filter =
1595 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1608 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1609 bool found = false;
1596 1610
1597 retval = 0; 1611 retval = 0;
1598 1612
@@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
1611 list_for_each_entry(q, &qpd->queues_list, list) { 1625 list_for_each_entry(q, &qpd->queues_list, list) {
1612 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1626 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1613 dqm->sdma_queue_count--; 1627 dqm->sdma_queue_count--;
1614 deallocate_sdma_queue(dqm, q->sdma_id); 1628 deallocate_sdma_queue(dqm, q);
1629 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1630 dqm->xgmi_sdma_queue_count--;
1631 deallocate_sdma_queue(dqm, q);
1615 } 1632 }
1616 1633
1617 if (q->properties.is_active) 1634 if (q->properties.is_active)
@@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
1626 list_del(&cur->list); 1643 list_del(&cur->list);
1627 kfree(cur); 1644 kfree(cur);
1628 dqm->processes_count--; 1645 dqm->processes_count--;
1629 kfd_dec_compute_active(dqm->dev); 1646 found = true;
1630 break; 1647 break;
1631 } 1648 }
1632 } 1649 }
@@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
1638 qpd->reset_wavefronts = false; 1655 qpd->reset_wavefronts = false;
1639 } 1656 }
1640 1657
1641 /* lastly, free mqd resources */ 1658 dqm_unlock(dqm);
1659
1660 /* Outside the DQM lock because under the DQM lock we can't do
1661 * reclaim or take other locks that others hold while reclaiming.
1662 */
1663 if (found)
1664 kfd_dec_compute_active(dqm->dev);
1665
1666 /* Lastly, free mqd resources.
1667 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
1668 */
1642 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1669 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1643 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1670 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1644 get_mqd_type_from_queue_type(q->properties.type)); 1671 q->properties.type)];
1645 if (!mqd_mgr) {
1646 retval = -ENOMEM;
1647 goto out;
1648 }
1649 list_del(&q->list); 1672 list_del(&q->list);
1650 qpd->queue_count--; 1673 qpd->queue_count--;
1651 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1674 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1652 } 1675 }
1653 1676
1654out: 1677 return retval;
1655 dqm_unlock(dqm); 1678}
1679
1680static int init_mqd_managers(struct device_queue_manager *dqm)
1681{
1682 int i, j;
1683 struct mqd_manager *mqd_mgr;
1684
1685 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1686 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1687 if (!mqd_mgr) {
1688 pr_err("mqd manager [%d] initialization failed\n", i);
1689 goto out_free;
1690 }
1691 dqm->mqd_mgrs[i] = mqd_mgr;
1692 }
1693
1694 return 0;
1695
1696out_free:
1697 for (j = 0; j < i; j++) {
1698 kfree(dqm->mqd_mgrs[j]);
1699 dqm->mqd_mgrs[j] = NULL;
1700 }
1701
1702 return -ENOMEM;
1703}
1704
1705/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1706static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1707{
1708 int retval;
1709 struct kfd_dev *dev = dqm->dev;
1710 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1711 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1712 dev->device_info->num_sdma_engines *
1713 dev->device_info->num_sdma_queues_per_engine +
1714 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1715
1716 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1717 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1718 (void *)&(mem_obj->cpu_ptr), true);
1719
1656 return retval; 1720 return retval;
1657} 1721}
1658 1722
@@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1693 dqm->ops.stop = stop_cpsch; 1757 dqm->ops.stop = stop_cpsch;
1694 dqm->ops.destroy_queue = destroy_queue_cpsch; 1758 dqm->ops.destroy_queue = destroy_queue_cpsch;
1695 dqm->ops.update_queue = update_queue; 1759 dqm->ops.update_queue = update_queue;
1696 dqm->ops.get_mqd_manager = get_mqd_manager;
1697 dqm->ops.register_process = register_process; 1760 dqm->ops.register_process = register_process;
1698 dqm->ops.unregister_process = unregister_process; 1761 dqm->ops.unregister_process = unregister_process;
1699 dqm->ops.uninitialize = uninitialize; 1762 dqm->ops.uninitialize = uninitialize;
@@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1713 dqm->ops.create_queue = create_queue_nocpsch; 1776 dqm->ops.create_queue = create_queue_nocpsch;
1714 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1777 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1715 dqm->ops.update_queue = update_queue; 1778 dqm->ops.update_queue = update_queue;
1716 dqm->ops.get_mqd_manager = get_mqd_manager;
1717 dqm->ops.register_process = register_process; 1779 dqm->ops.register_process = register_process;
1718 dqm->ops.unregister_process = unregister_process; 1780 dqm->ops.unregister_process = unregister_process;
1719 dqm->ops.initialize = initialize_nocpsch; 1781 dqm->ops.initialize = initialize_nocpsch;
@@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1749 case CHIP_POLARIS10: 1811 case CHIP_POLARIS10:
1750 case CHIP_POLARIS11: 1812 case CHIP_POLARIS11:
1751 case CHIP_POLARIS12: 1813 case CHIP_POLARIS12:
1814 case CHIP_VEGAM:
1752 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1815 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1753 break; 1816 break;
1754 1817
@@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1764 goto out_free; 1827 goto out_free;
1765 } 1828 }
1766 1829
1830 if (init_mqd_managers(dqm))
1831 goto out_free;
1832
1833 if (allocate_hiq_sdma_mqd(dqm)) {
1834 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1835 goto out_free;
1836 }
1837
1767 if (!dqm->ops.initialize(dqm)) 1838 if (!dqm->ops.initialize(dqm))
1768 return dqm; 1839 return dqm;
1769 1840
@@ -1772,9 +1843,17 @@ out_free:
1772 return NULL; 1843 return NULL;
1773} 1844}
1774 1845
1846void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
1847{
1848 WARN(!mqd, "No hiq sdma mqd trunk to free");
1849
1850 amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1851}
1852
1775void device_queue_manager_uninit(struct device_queue_manager *dqm) 1853void device_queue_manager_uninit(struct device_queue_manager *dqm)
1776{ 1854{
1777 dqm->ops.uninitialize(dqm); 1855 dqm->ops.uninitialize(dqm);
1856 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
1778 kfree(dqm); 1857 kfree(dqm);
1779} 1858}
1780 1859
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 70e38a2e23b9..88b4c007696e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -48,8 +48,6 @@ struct device_process_node {
48 * 48 *
49 * @update_queue: Queue update routine. 49 * @update_queue: Queue update routine.
50 * 50 *
51 * @get_mqd_manager: Returns the mqd manager according to the mqd type.
52 *
53 * @exeute_queues: Dispatches the queues list to the H/W. 51 * @exeute_queues: Dispatches the queues list to the H/W.
54 * 52 *
55 * @register_process: This routine associates a specific process with device. 53 * @register_process: This routine associates a specific process with device.
@@ -97,10 +95,6 @@ struct device_queue_manager_ops {
97 int (*update_queue)(struct device_queue_manager *dqm, 95 int (*update_queue)(struct device_queue_manager *dqm,
98 struct queue *q); 96 struct queue *q);
99 97
100 struct mqd_manager * (*get_mqd_manager)
101 (struct device_queue_manager *dqm,
102 enum KFD_MQD_TYPE type);
103
104 int (*register_process)(struct device_queue_manager *dqm, 98 int (*register_process)(struct device_queue_manager *dqm,
105 struct qcm_process_device *qpd); 99 struct qcm_process_device *qpd);
106 100
@@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
158 void (*init_sdma_vm)(struct device_queue_manager *dqm, 152 void (*init_sdma_vm)(struct device_queue_manager *dqm,
159 struct queue *q, 153 struct queue *q,
160 struct qcm_process_device *qpd); 154 struct qcm_process_device *qpd);
155 struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
156 struct kfd_dev *dev);
161}; 157};
162 158
163/** 159/**
@@ -185,10 +181,12 @@ struct device_queue_manager {
185 unsigned int processes_count; 181 unsigned int processes_count;
186 unsigned int queue_count; 182 unsigned int queue_count;
187 unsigned int sdma_queue_count; 183 unsigned int sdma_queue_count;
184 unsigned int xgmi_sdma_queue_count;
188 unsigned int total_queue_count; 185 unsigned int total_queue_count;
189 unsigned int next_pipe_to_allocate; 186 unsigned int next_pipe_to_allocate;
190 unsigned int *allocated_queues; 187 unsigned int *allocated_queues;
191 unsigned int sdma_bitmap; 188 uint64_t sdma_bitmap;
189 uint64_t xgmi_sdma_bitmap;
192 unsigned int vmid_bitmap; 190 unsigned int vmid_bitmap;
193 uint64_t pipelines_addr; 191 uint64_t pipelines_addr;
194 struct kfd_mem_obj *pipeline_mem; 192 struct kfd_mem_obj *pipeline_mem;
@@ -201,6 +199,7 @@ struct device_queue_manager {
201 /* hw exception */ 199 /* hw exception */
202 bool is_hws_hang; 200 bool is_hws_hang;
203 struct work_struct hw_exception_work; 201 struct work_struct hw_exception_work;
202 struct kfd_mem_obj hiq_sdma_mqd;
204}; 203};
205 204
206void device_queue_manager_init_cik( 205void device_queue_manager_init_cik(
@@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
219unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); 218unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
220unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); 219unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
221unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); 220unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
221unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
222 222
223static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) 223static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
224{ 224{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index aed4c21417bf..0d26506798cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
48 asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; 48 asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
49 asic_ops->update_qpd = update_qpd_cik; 49 asic_ops->update_qpd = update_qpd_cik;
50 asic_ops->init_sdma_vm = init_sdma_vm; 50 asic_ops->init_sdma_vm = init_sdma_vm;
51 asic_ops->mqd_manager_init = mqd_manager_init_cik;
51} 52}
52 53
53void device_queue_manager_init_cik_hawaii( 54void device_queue_manager_init_cik_hawaii(
@@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
56 asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; 57 asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
57 asic_ops->update_qpd = update_qpd_cik_hawaii; 58 asic_ops->update_qpd = update_qpd_cik_hawaii;
58 asic_ops->init_sdma_vm = init_sdma_vm_hawaii; 59 asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
60 asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
59} 61}
60 62
61static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 63static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 417515332c35..e9fe39382371 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
37{ 37{
38 asic_ops->update_qpd = update_qpd_v9; 38 asic_ops->update_qpd = update_qpd_v9;
39 asic_ops->init_sdma_vm = init_sdma_vm_v9; 39 asic_ops->init_sdma_vm = init_sdma_vm_v9;
40 asic_ops->mqd_manager_init = mqd_manager_init_v9;
40} 41}
41 42
42static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) 43static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index c3a5dcfe877a..3a7cb2f88366 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
54 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; 54 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
55 asic_ops->update_qpd = update_qpd_vi; 55 asic_ops->update_qpd = update_qpd_vi;
56 asic_ops->init_sdma_vm = init_sdma_vm; 56 asic_ops->init_sdma_vm = init_sdma_vm;
57 asic_ops->mqd_manager_init = mqd_manager_init_vi;
57} 58}
58 59
59void device_queue_manager_init_vi_tonga( 60void device_queue_manager_init_vi_tonga(
@@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
62 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; 63 asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
63 asic_ops->update_qpd = update_qpd_vi_tonga; 64 asic_ops->update_qpd = update_qpd_vi_tonga;
64 asic_ops->init_sdma_vm = init_sdma_vm_tonga; 65 asic_ops->init_sdma_vm = init_sdma_vm_tonga;
66 asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
65} 67}
66 68
67static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 69static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6e1d41c5bf86..d674d4b3340f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
983 return; /* Presumably process exited. */ 983 return; /* Presumably process exited. */
984 memset(&memory_exception_data, 0, sizeof(memory_exception_data)); 984 memset(&memory_exception_data, 0, sizeof(memory_exception_data));
985 memory_exception_data.gpu_id = dev->id; 985 memory_exception_data.gpu_id = dev->id;
986 memory_exception_data.failure.imprecise = 1; 986 memory_exception_data.failure.imprecise = true;
987 /* Set failure reason */ 987 /* Set failure reason */
988 if (info) { 988 if (info) {
989 memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; 989 memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 213ea5454d11..22a8e88b6a67 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
398 case CHIP_POLARIS10: 398 case CHIP_POLARIS10:
399 case CHIP_POLARIS11: 399 case CHIP_POLARIS11:
400 case CHIP_POLARIS12: 400 case CHIP_POLARIS12:
401 case CHIP_VEGAM:
401 kfd_init_apertures_vi(pdd, id); 402 kfd_init_apertures_vi(pdd, id);
402 break; 403 break;
403 case CHIP_VEGA10: 404 case CHIP_VEGA10:
@@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
435 436
436 return 0; 437 return 0;
437} 438}
438
439
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index f1596881f20a..1cc03b3ddbb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
58 kq->nop_packet = nop.u32all; 58 kq->nop_packet = nop.u32all;
59 switch (type) { 59 switch (type) {
60 case KFD_QUEUE_TYPE_DIQ: 60 case KFD_QUEUE_TYPE_DIQ:
61 kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
62 break;
61 case KFD_QUEUE_TYPE_HIQ: 63 case KFD_QUEUE_TYPE_HIQ:
62 kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, 64 kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
63 KFD_MQD_TYPE_HIQ);
64 break; 65 break;
65 default: 66 default:
66 pr_err("Invalid queue type %d\n", type); 67 pr_err("Invalid queue type %d\n", type);
@@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
314 case CHIP_POLARIS10: 315 case CHIP_POLARIS10:
315 case CHIP_POLARIS11: 316 case CHIP_POLARIS11:
316 case CHIP_POLARIS12: 317 case CHIP_POLARIS12:
318 case CHIP_VEGAM:
317 kernel_queue_init_vi(&kq->ops_asic_specific); 319 kernel_queue_init_vi(&kq->ops_asic_specific);
318 break; 320 break;
319 321
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 33830b1a5a54..07f02f8e4fe4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
153 153
154 packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, 154 packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
155 sizeof(struct pm4_mes_map_queues)); 155 sizeof(struct pm4_mes_map_queues));
156 packet->bitfields2.alloc_format =
157 alloc_format__mes_map_queues__one_per_pipe_vi;
158 packet->bitfields2.num_queues = 1; 156 packet->bitfields2.num_queues = 1;
159 packet->bitfields2.queue_sel = 157 packet->bitfields2.queue_sel =
160 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; 158 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
161 159
162 packet->bitfields2.engine_sel = 160 packet->bitfields2.engine_sel =
163 engine_sel__mes_map_queues__compute_vi; 161 engine_sel__mes_map_queues__compute_vi;
162 packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
164 packet->bitfields2.queue_type = 163 packet->bitfields2.queue_type =
165 queue_type__mes_map_queues__normal_compute_vi; 164 queue_type__mes_map_queues__normal_compute_vi;
166 165
@@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
175 queue_type__mes_map_queues__debug_interface_queue_vi; 174 queue_type__mes_map_queues__debug_interface_queue_vi;
176 break; 175 break;
177 case KFD_QUEUE_TYPE_SDMA: 176 case KFD_QUEUE_TYPE_SDMA:
177 case KFD_QUEUE_TYPE_SDMA_XGMI:
178 packet->bitfields2.engine_sel = q->properties.sdma_engine_id + 178 packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
179 engine_sel__mes_map_queues__sdma0_vi; 179 engine_sel__mes_map_queues__sdma0_vi;
180 use_static = false; /* no static queues under SDMA */ 180 use_static = false; /* no static queues under SDMA */
@@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
221 engine_sel__mes_unmap_queues__compute; 221 engine_sel__mes_unmap_queues__compute;
222 break; 222 break;
223 case KFD_QUEUE_TYPE_SDMA: 223 case KFD_QUEUE_TYPE_SDMA:
224 case KFD_QUEUE_TYPE_SDMA_XGMI:
224 packet->bitfields2.engine_sel = 225 packet->bitfields2.engine_sel =
225 engine_sel__mes_unmap_queues__sdma0 + sdma_engine; 226 engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
226 break; 227 break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index bf20c6d32ef3..2adaf40027eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
190 190
191 packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, 191 packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
192 sizeof(struct pm4_mes_map_queues)); 192 sizeof(struct pm4_mes_map_queues));
193 packet->bitfields2.alloc_format =
194 alloc_format__mes_map_queues__one_per_pipe_vi;
195 packet->bitfields2.num_queues = 1; 193 packet->bitfields2.num_queues = 1;
196 packet->bitfields2.queue_sel = 194 packet->bitfields2.queue_sel =
197 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; 195 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
@@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
212 queue_type__mes_map_queues__debug_interface_queue_vi; 210 queue_type__mes_map_queues__debug_interface_queue_vi;
213 break; 211 break;
214 case KFD_QUEUE_TYPE_SDMA: 212 case KFD_QUEUE_TYPE_SDMA:
213 case KFD_QUEUE_TYPE_SDMA_XGMI:
215 packet->bitfields2.engine_sel = q->properties.sdma_engine_id + 214 packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
216 engine_sel__mes_map_queues__sdma0_vi; 215 engine_sel__mes_map_queues__sdma0_vi;
217 use_static = false; /* no static queues under SDMA */ 216 use_static = false; /* no static queues under SDMA */
@@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
258 engine_sel__mes_unmap_queues__compute; 257 engine_sel__mes_unmap_queues__compute;
259 break; 258 break;
260 case KFD_QUEUE_TYPE_SDMA: 259 case KFD_QUEUE_TYPE_SDMA:
260 case KFD_QUEUE_TYPE_SDMA_XGMI:
261 packet->bitfields2.engine_sel = 261 packet->bitfields2.engine_sel =
262 engine_sel__mes_unmap_queues__sdma0 + sdma_engine; 262 engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
263 break; 263 break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index aed9b9b82213..9307811bc427 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -23,34 +23,54 @@
23 23
24#include "kfd_mqd_manager.h" 24#include "kfd_mqd_manager.h"
25#include "amdgpu_amdkfd.h" 25#include "amdgpu_amdkfd.h"
26#include "kfd_device_queue_manager.h"
26 27
27struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 28struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
28 struct kfd_dev *dev)
29{ 29{
30 switch (dev->device_info->asic_family) { 30 struct kfd_mem_obj *mqd_mem_obj = NULL;
31 case CHIP_KAVERI: 31
32 return mqd_manager_init_cik(type, dev); 32 mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
33 case CHIP_HAWAII: 33 if (!mqd_mem_obj)
34 return mqd_manager_init_cik_hawaii(type, dev); 34 return NULL;
35 case CHIP_CARRIZO: 35
36 return mqd_manager_init_vi(type, dev); 36 mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
37 case CHIP_TONGA: 37 mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
38 case CHIP_FIJI: 38 mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
39 case CHIP_POLARIS10: 39
40 case CHIP_POLARIS11: 40 return mqd_mem_obj;
41 case CHIP_POLARIS12: 41}
42 return mqd_manager_init_vi_tonga(type, dev); 42
43 case CHIP_VEGA10: 43struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
44 case CHIP_VEGA12: 44 struct queue_properties *q)
45 case CHIP_VEGA20: 45{
46 case CHIP_RAVEN: 46 struct kfd_mem_obj *mqd_mem_obj = NULL;
47 return mqd_manager_init_v9(type, dev); 47 uint64_t offset;
48 default:
49 WARN(1, "Unexpected ASIC family %u",
50 dev->device_info->asic_family);
51 }
52 48
53 return NULL; 49 mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
50 if (!mqd_mem_obj)
51 return NULL;
52
53 offset = (q->sdma_engine_id *
54 dev->device_info->num_sdma_queues_per_engine +
55 q->sdma_queue_id) *
56 dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
57
58 offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
59
60 mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
61 + offset);
62 mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
63 mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
64 dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
65
66 return mqd_mem_obj;
67}
68
69void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
70 struct kfd_mem_obj *mqd_mem_obj)
71{
72 WARN_ON(!mqd_mem_obj->gtt_mem);
73 kfree(mqd_mem_obj);
54} 74}
55 75
56void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, 76void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index f8261313ae7b..56af256a191b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -99,8 +99,16 @@ struct mqd_manager {
99 99
100 struct mutex mqd_mutex; 100 struct mutex mqd_mutex;
101 struct kfd_dev *dev; 101 struct kfd_dev *dev;
102 uint32_t mqd_size;
102}; 103};
103 104
105struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
106
107struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
108 struct queue_properties *q);
109void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
110 struct kfd_mem_obj *mqd_mem_obj);
111
104void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, 112void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
105 const uint32_t *cu_mask, uint32_t cu_mask_count, 113 const uint32_t *cu_mask, uint32_t cu_mask_count,
106 uint32_t *se_mask); 114 uint32_t *se_mask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index ae90a99909ef..6e8509ec29d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
66 m->compute_static_thread_mgmt_se3); 66 m->compute_static_thread_mgmt_se3);
67} 67}
68 68
69static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
70 struct queue_properties *q)
71{
72 struct kfd_mem_obj *mqd_mem_obj;
73
74 if (q->type == KFD_QUEUE_TYPE_HIQ)
75 return allocate_hiq_mqd(kfd);
76
77 if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
78 &mqd_mem_obj))
79 return NULL;
80
81 return mqd_mem_obj;
82}
83
84
69static int init_mqd(struct mqd_manager *mm, void **mqd, 85static int init_mqd(struct mqd_manager *mm, void **mqd,
70 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 86 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
71 struct queue_properties *q) 87 struct queue_properties *q)
@@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
73 uint64_t addr; 89 uint64_t addr;
74 struct cik_mqd *m; 90 struct cik_mqd *m;
75 int retval; 91 int retval;
92 struct kfd_dev *kfd = mm->dev;
76 93
77 retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), 94 *mqd_mem_obj = allocate_mqd(kfd, q);
78 mqd_mem_obj); 95 if (!*mqd_mem_obj)
79
80 if (retval != 0)
81 return -ENOMEM; 96 return -ENOMEM;
82 97
83 m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; 98 m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
136{ 151{
137 int retval; 152 int retval;
138 struct cik_sdma_rlc_registers *m; 153 struct cik_sdma_rlc_registers *m;
154 struct kfd_dev *dev = mm->dev;
139 155
140 retval = kfd_gtt_sa_allocate(mm->dev, 156 *mqd_mem_obj = allocate_sdma_mqd(dev, q);
141 sizeof(struct cik_sdma_rlc_registers), 157 if (!*mqd_mem_obj)
142 mqd_mem_obj);
143
144 if (retval != 0)
145 return -ENOMEM; 158 return -ENOMEM;
146 159
147 m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; 160 m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
@@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
163 kfd_gtt_sa_free(mm->dev, mqd_mem_obj); 176 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
164} 177}
165 178
166static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
167 struct kfd_mem_obj *mqd_mem_obj)
168{
169 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
170}
171 179
172static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, 180static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
173 uint32_t queue_id, struct queue_properties *p, 181 uint32_t queue_id, struct queue_properties *p,
@@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
400 mqd->update_mqd = update_mqd; 408 mqd->update_mqd = update_mqd;
401 mqd->destroy_mqd = destroy_mqd; 409 mqd->destroy_mqd = destroy_mqd;
402 mqd->is_occupied = is_occupied; 410 mqd->is_occupied = is_occupied;
411 mqd->mqd_size = sizeof(struct cik_mqd);
403#if defined(CONFIG_DEBUG_FS) 412#if defined(CONFIG_DEBUG_FS)
404 mqd->debugfs_show_mqd = debugfs_show_mqd; 413 mqd->debugfs_show_mqd = debugfs_show_mqd;
405#endif 414#endif
406 break; 415 break;
407 case KFD_MQD_TYPE_HIQ: 416 case KFD_MQD_TYPE_HIQ:
408 mqd->init_mqd = init_mqd_hiq; 417 mqd->init_mqd = init_mqd_hiq;
418 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
419 mqd->load_mqd = load_mqd;
420 mqd->update_mqd = update_mqd_hiq;
421 mqd->destroy_mqd = destroy_mqd;
422 mqd->is_occupied = is_occupied;
423 mqd->mqd_size = sizeof(struct cik_mqd);
424#if defined(CONFIG_DEBUG_FS)
425 mqd->debugfs_show_mqd = debugfs_show_mqd;
426#endif
427 break;
428 case KFD_MQD_TYPE_DIQ:
429 mqd->init_mqd = init_mqd_hiq;
409 mqd->uninit_mqd = uninit_mqd; 430 mqd->uninit_mqd = uninit_mqd;
410 mqd->load_mqd = load_mqd; 431 mqd->load_mqd = load_mqd;
411 mqd->update_mqd = update_mqd_hiq; 432 mqd->update_mqd = update_mqd_hiq;
412 mqd->destroy_mqd = destroy_mqd; 433 mqd->destroy_mqd = destroy_mqd;
413 mqd->is_occupied = is_occupied; 434 mqd->is_occupied = is_occupied;
435 mqd->mqd_size = sizeof(struct cik_mqd);
414#if defined(CONFIG_DEBUG_FS) 436#if defined(CONFIG_DEBUG_FS)
415 mqd->debugfs_show_mqd = debugfs_show_mqd; 437 mqd->debugfs_show_mqd = debugfs_show_mqd;
416#endif 438#endif
417 break; 439 break;
418 case KFD_MQD_TYPE_SDMA: 440 case KFD_MQD_TYPE_SDMA:
419 mqd->init_mqd = init_mqd_sdma; 441 mqd->init_mqd = init_mqd_sdma;
420 mqd->uninit_mqd = uninit_mqd_sdma; 442 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
421 mqd->load_mqd = load_mqd_sdma; 443 mqd->load_mqd = load_mqd_sdma;
422 mqd->update_mqd = update_mqd_sdma; 444 mqd->update_mqd = update_mqd_sdma;
423 mqd->destroy_mqd = destroy_mqd_sdma; 445 mqd->destroy_mqd = destroy_mqd_sdma;
424 mqd->is_occupied = is_occupied_sdma; 446 mqd->is_occupied = is_occupied_sdma;
447 mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
425#if defined(CONFIG_DEBUG_FS) 448#if defined(CONFIG_DEBUG_FS)
426 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 449 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
427#endif 450#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 9dbba609450e..4750338199b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
67 m->compute_static_thread_mgmt_se3); 67 m->compute_static_thread_mgmt_se3);
68} 68}
69 69
70static int init_mqd(struct mqd_manager *mm, void **mqd, 70static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
71 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 71 struct queue_properties *q)
72 struct queue_properties *q)
73{ 72{
74 int retval; 73 int retval;
75 uint64_t addr; 74 struct kfd_mem_obj *mqd_mem_obj = NULL;
76 struct v9_mqd *m; 75
77 struct kfd_dev *kfd = mm->dev; 76 if (q->type == KFD_QUEUE_TYPE_HIQ)
77 return allocate_hiq_mqd(kfd);
78 78
79 /* From V9, for CWSR, the control stack is located on the next page 79 /* From V9, for CWSR, the control stack is located on the next page
80 * boundary after the mqd, we will use the gtt allocation function 80 * boundary after the mqd, we will use the gtt allocation function
81 * instead of sub-allocation function. 81 * instead of sub-allocation function.
82 */ 82 */
83 if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { 83 if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
84 *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 84 mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
85 if (!*mqd_mem_obj) 85 if (!mqd_mem_obj)
86 return -ENOMEM; 86 return NULL;
87 retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, 87 retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
88 ALIGN(q->ctl_stack_size, PAGE_SIZE) + 88 ALIGN(q->ctl_stack_size, PAGE_SIZE) +
89 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), 89 ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
90 &((*mqd_mem_obj)->gtt_mem), 90 &(mqd_mem_obj->gtt_mem),
91 &((*mqd_mem_obj)->gpu_addr), 91 &(mqd_mem_obj->gpu_addr),
92 (void *)&((*mqd_mem_obj)->cpu_ptr), true); 92 (void *)&(mqd_mem_obj->cpu_ptr), true);
93 } else 93 } else {
94 retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), 94 retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
95 mqd_mem_obj); 95 &mqd_mem_obj);
96 if (retval != 0) 96 }
97
98 if (retval) {
99 kfree(mqd_mem_obj);
100 return NULL;
101 }
102
103 return mqd_mem_obj;
104
105}
106
107static int init_mqd(struct mqd_manager *mm, void **mqd,
108 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
109 struct queue_properties *q)
110{
111 int retval;
112 uint64_t addr;
113 struct v9_mqd *m;
114 struct kfd_dev *kfd = mm->dev;
115
116 *mqd_mem_obj = allocate_mqd(kfd, q);
117 if (!*mqd_mem_obj)
97 return -ENOMEM; 118 return -ENOMEM;
98 119
99 m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; 120 m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
328{ 349{
329 int retval; 350 int retval;
330 struct v9_sdma_mqd *m; 351 struct v9_sdma_mqd *m;
352 struct kfd_dev *dev = mm->dev;
331 353
332 354 *mqd_mem_obj = allocate_sdma_mqd(dev, q);
333 retval = kfd_gtt_sa_allocate(mm->dev, 355 if (!*mqd_mem_obj)
334 sizeof(struct v9_sdma_mqd),
335 mqd_mem_obj);
336
337 if (retval != 0)
338 return -ENOMEM; 356 return -ENOMEM;
339 357
340 m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; 358 m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
350 return retval; 368 return retval;
351} 369}
352 370
353static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
354 struct kfd_mem_obj *mqd_mem_obj)
355{
356 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
357}
358
359static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, 371static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
360 uint32_t pipe_id, uint32_t queue_id, 372 uint32_t pipe_id, uint32_t queue_id,
361 struct queue_properties *p, struct mm_struct *mms) 373 struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
459 mqd->destroy_mqd = destroy_mqd; 471 mqd->destroy_mqd = destroy_mqd;
460 mqd->is_occupied = is_occupied; 472 mqd->is_occupied = is_occupied;
461 mqd->get_wave_state = get_wave_state; 473 mqd->get_wave_state = get_wave_state;
474 mqd->mqd_size = sizeof(struct v9_mqd);
462#if defined(CONFIG_DEBUG_FS) 475#if defined(CONFIG_DEBUG_FS)
463 mqd->debugfs_show_mqd = debugfs_show_mqd; 476 mqd->debugfs_show_mqd = debugfs_show_mqd;
464#endif 477#endif
465 break; 478 break;
466 case KFD_MQD_TYPE_HIQ: 479 case KFD_MQD_TYPE_HIQ:
467 mqd->init_mqd = init_mqd_hiq; 480 mqd->init_mqd = init_mqd_hiq;
481 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
482 mqd->load_mqd = load_mqd;
483 mqd->update_mqd = update_mqd_hiq;
484 mqd->destroy_mqd = destroy_mqd;
485 mqd->is_occupied = is_occupied;
486 mqd->mqd_size = sizeof(struct v9_mqd);
487#if defined(CONFIG_DEBUG_FS)
488 mqd->debugfs_show_mqd = debugfs_show_mqd;
489#endif
490 break;
491 case KFD_MQD_TYPE_DIQ:
492 mqd->init_mqd = init_mqd_hiq;
468 mqd->uninit_mqd = uninit_mqd; 493 mqd->uninit_mqd = uninit_mqd;
469 mqd->load_mqd = load_mqd; 494 mqd->load_mqd = load_mqd;
470 mqd->update_mqd = update_mqd_hiq; 495 mqd->update_mqd = update_mqd_hiq;
471 mqd->destroy_mqd = destroy_mqd; 496 mqd->destroy_mqd = destroy_mqd;
472 mqd->is_occupied = is_occupied; 497 mqd->is_occupied = is_occupied;
498 mqd->mqd_size = sizeof(struct v9_mqd);
473#if defined(CONFIG_DEBUG_FS) 499#if defined(CONFIG_DEBUG_FS)
474 mqd->debugfs_show_mqd = debugfs_show_mqd; 500 mqd->debugfs_show_mqd = debugfs_show_mqd;
475#endif 501#endif
476 break; 502 break;
477 case KFD_MQD_TYPE_SDMA: 503 case KFD_MQD_TYPE_SDMA:
478 mqd->init_mqd = init_mqd_sdma; 504 mqd->init_mqd = init_mqd_sdma;
479 mqd->uninit_mqd = uninit_mqd_sdma; 505 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
480 mqd->load_mqd = load_mqd_sdma; 506 mqd->load_mqd = load_mqd_sdma;
481 mqd->update_mqd = update_mqd_sdma; 507 mqd->update_mqd = update_mqd_sdma;
482 mqd->destroy_mqd = destroy_mqd_sdma; 508 mqd->destroy_mqd = destroy_mqd_sdma;
483 mqd->is_occupied = is_occupied_sdma; 509 mqd->is_occupied = is_occupied_sdma;
510 mqd->mqd_size = sizeof(struct v9_sdma_mqd);
484#if defined(CONFIG_DEBUG_FS) 511#if defined(CONFIG_DEBUG_FS)
485 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 512 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
486#endif 513#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 6469b3456f00..b550dea9b10a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
68 m->compute_static_thread_mgmt_se3); 68 m->compute_static_thread_mgmt_se3);
69} 69}
70 70
71static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
72 struct queue_properties *q)
73{
74 struct kfd_mem_obj *mqd_mem_obj;
75
76 if (q->type == KFD_QUEUE_TYPE_HIQ)
77 return allocate_hiq_mqd(kfd);
78
79 if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
80 &mqd_mem_obj))
81 return NULL;
82
83 return mqd_mem_obj;
84}
85
71static int init_mqd(struct mqd_manager *mm, void **mqd, 86static int init_mqd(struct mqd_manager *mm, void **mqd,
72 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, 87 struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
73 struct queue_properties *q) 88 struct queue_properties *q)
@@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
75 int retval; 90 int retval;
76 uint64_t addr; 91 uint64_t addr;
77 struct vi_mqd *m; 92 struct vi_mqd *m;
93 struct kfd_dev *kfd = mm->dev;
78 94
79 retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), 95 *mqd_mem_obj = allocate_mqd(kfd, q);
80 mqd_mem_obj); 96 if (!*mqd_mem_obj)
81 if (retval != 0)
82 return -ENOMEM; 97 return -ENOMEM;
83 98
84 m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; 99 m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
329{ 344{
330 int retval; 345 int retval;
331 struct vi_sdma_mqd *m; 346 struct vi_sdma_mqd *m;
347 struct kfd_dev *dev = mm->dev;
332 348
333 349 *mqd_mem_obj = allocate_sdma_mqd(dev, q);
334 retval = kfd_gtt_sa_allocate(mm->dev, 350 if (!*mqd_mem_obj)
335 sizeof(struct vi_sdma_mqd),
336 mqd_mem_obj);
337
338 if (retval != 0)
339 return -ENOMEM; 351 return -ENOMEM;
340 352
341 m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; 353 m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
343 memset(m, 0, sizeof(struct vi_sdma_mqd)); 355 memset(m, 0, sizeof(struct vi_sdma_mqd));
344 356
345 *mqd = m; 357 *mqd = m;
346 if (gart_addr != NULL) 358 if (gart_addr)
347 *gart_addr = (*mqd_mem_obj)->gpu_addr; 359 *gart_addr = (*mqd_mem_obj)->gpu_addr;
348 360
349 retval = mm->update_mqd(mm, m, q); 361 retval = mm->update_mqd(mm, m, q);
@@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
351 return retval; 363 return retval;
352} 364}
353 365
354static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
355 struct kfd_mem_obj *mqd_mem_obj)
356{
357 kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
358}
359
360static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, 366static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
361 uint32_t pipe_id, uint32_t queue_id, 367 uint32_t pipe_id, uint32_t queue_id,
362 struct queue_properties *p, struct mm_struct *mms) 368 struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
459 mqd->destroy_mqd = destroy_mqd; 465 mqd->destroy_mqd = destroy_mqd;
460 mqd->is_occupied = is_occupied; 466 mqd->is_occupied = is_occupied;
461 mqd->get_wave_state = get_wave_state; 467 mqd->get_wave_state = get_wave_state;
468 mqd->mqd_size = sizeof(struct vi_mqd);
462#if defined(CONFIG_DEBUG_FS) 469#if defined(CONFIG_DEBUG_FS)
463 mqd->debugfs_show_mqd = debugfs_show_mqd; 470 mqd->debugfs_show_mqd = debugfs_show_mqd;
464#endif 471#endif
465 break; 472 break;
466 case KFD_MQD_TYPE_HIQ: 473 case KFD_MQD_TYPE_HIQ:
467 mqd->init_mqd = init_mqd_hiq; 474 mqd->init_mqd = init_mqd_hiq;
475 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
476 mqd->load_mqd = load_mqd;
477 mqd->update_mqd = update_mqd_hiq;
478 mqd->destroy_mqd = destroy_mqd;
479 mqd->is_occupied = is_occupied;
480 mqd->mqd_size = sizeof(struct vi_mqd);
481#if defined(CONFIG_DEBUG_FS)
482 mqd->debugfs_show_mqd = debugfs_show_mqd;
483#endif
484 break;
485 case KFD_MQD_TYPE_DIQ:
486 mqd->init_mqd = init_mqd_hiq;
468 mqd->uninit_mqd = uninit_mqd; 487 mqd->uninit_mqd = uninit_mqd;
469 mqd->load_mqd = load_mqd; 488 mqd->load_mqd = load_mqd;
470 mqd->update_mqd = update_mqd_hiq; 489 mqd->update_mqd = update_mqd_hiq;
471 mqd->destroy_mqd = destroy_mqd; 490 mqd->destroy_mqd = destroy_mqd;
472 mqd->is_occupied = is_occupied; 491 mqd->is_occupied = is_occupied;
492 mqd->mqd_size = sizeof(struct vi_mqd);
473#if defined(CONFIG_DEBUG_FS) 493#if defined(CONFIG_DEBUG_FS)
474 mqd->debugfs_show_mqd = debugfs_show_mqd; 494 mqd->debugfs_show_mqd = debugfs_show_mqd;
475#endif 495#endif
476 break; 496 break;
477 case KFD_MQD_TYPE_SDMA: 497 case KFD_MQD_TYPE_SDMA:
478 mqd->init_mqd = init_mqd_sdma; 498 mqd->init_mqd = init_mqd_sdma;
479 mqd->uninit_mqd = uninit_mqd_sdma; 499 mqd->uninit_mqd = uninit_mqd_hiq_sdma;
480 mqd->load_mqd = load_mqd_sdma; 500 mqd->load_mqd = load_mqd_sdma;
481 mqd->update_mqd = update_mqd_sdma; 501 mqd->update_mqd = update_mqd_sdma;
482 mqd->destroy_mqd = destroy_mqd_sdma; 502 mqd->destroy_mqd = destroy_mqd_sdma;
483 mqd->is_occupied = is_occupied_sdma; 503 mqd->is_occupied = is_occupied_sdma;
504 mqd->mqd_size = sizeof(struct vi_sdma_mqd);
484#if defined(CONFIG_DEBUG_FS) 505#if defined(CONFIG_DEBUG_FS)
485 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; 506 mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
486#endif 507#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 045a229436a0..808194663a7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
48 48
49 process_count = pm->dqm->processes_count; 49 process_count = pm->dqm->processes_count;
50 queue_count = pm->dqm->queue_count; 50 queue_count = pm->dqm->queue_count;
51 compute_queue_count = queue_count - pm->dqm->sdma_queue_count; 51 compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
52 pm->dqm->xgmi_sdma_queue_count;
52 53
53 /* check if there is over subscription 54 /* check if there is over subscription
54 * Note: the arbitration between the number of VMIDs and 55 * Note: the arbitration between the number of VMIDs and
@@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
227 case CHIP_POLARIS10: 228 case CHIP_POLARIS10:
228 case CHIP_POLARIS11: 229 case CHIP_POLARIS11:
229 case CHIP_POLARIS12: 230 case CHIP_POLARIS12:
231 case CHIP_VEGAM:
230 pm->pmf = &kfd_vi_pm_funcs; 232 pm->pmf = &kfd_vi_pm_funcs;
231 break; 233 break;
232 case CHIP_VEGA10: 234 case CHIP_VEGA10:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index f2bcf5c092ea..49ab66b703fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
176 176
177 union { 177 union {
178 struct { 178 struct {
179 uint32_t num_gws:6; 179 uint32_t num_gws:7;
180 uint32_t reserved7:1;
181 uint32_t sdma_enable:1; 180 uint32_t sdma_enable:1;
182 uint32_t num_oac:4; 181 uint32_t num_oac:4;
183 uint32_t reserved8:4; 182 uint32_t reserved8:4;
@@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
255queue_type__mes_map_queues__low_latency_static_queue_vi = 3 254queue_type__mes_map_queues__low_latency_static_queue_vi = 3
256}; 255};
257 256
258enum mes_map_queues_alloc_format_enum {
259 alloc_format__mes_map_queues__one_per_pipe_vi = 0,
260alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
261};
262
263enum mes_map_queues_engine_sel_enum { 257enum mes_map_queues_engine_sel_enum {
264 engine_sel__mes_map_queues__compute_vi = 0, 258 engine_sel__mes_map_queues__compute_vi = 0,
265 engine_sel__mes_map_queues__sdma0_vi = 2, 259 engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
277 struct { 271 struct {
278 uint32_t reserved1:4; 272 uint32_t reserved1:4;
279 enum mes_map_queues_queue_sel_enum queue_sel:2; 273 enum mes_map_queues_queue_sel_enum queue_sel:2;
280 uint32_t reserved2:15; 274 uint32_t reserved5:6;
275 uint32_t gws_control_queue:1;
276 uint32_t reserved2:8;
281 enum mes_map_queues_queue_type_enum queue_type:3; 277 enum mes_map_queues_queue_type_enum queue_type:3;
282 enum mes_map_queues_alloc_format_enum alloc_format:2; 278 uint32_t reserved3:2;
283 enum mes_map_queues_engine_sel_enum engine_sel:3; 279 enum mes_map_queues_engine_sel_enum engine_sel:3;
284 uint32_t num_queues:3; 280 uint32_t num_queues:3;
285 } bitfields2; 281 } bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 7c8d9b357749..5466cfe1c3cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
216queue_type__mes_map_queues__low_latency_static_queue_vi = 3 216queue_type__mes_map_queues__low_latency_static_queue_vi = 3
217}; 217};
218 218
219enum mes_map_queues_alloc_format_vi_enum {
220 alloc_format__mes_map_queues__one_per_pipe_vi = 0,
221alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
222};
223
224enum mes_map_queues_engine_sel_vi_enum { 219enum mes_map_queues_engine_sel_vi_enum {
225 engine_sel__mes_map_queues__compute_vi = 0, 220 engine_sel__mes_map_queues__compute_vi = 0,
226 engine_sel__mes_map_queues__sdma0_vi = 2, 221 engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
240 enum mes_map_queues_queue_sel_vi_enum queue_sel:2; 235 enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
241 uint32_t reserved2:15; 236 uint32_t reserved2:15;
242 enum mes_map_queues_queue_type_vi_enum queue_type:3; 237 enum mes_map_queues_queue_type_vi_enum queue_type:3;
243 enum mes_map_queues_alloc_format_vi_enum alloc_format:2; 238 uint32_t reserved3:2;
244 enum mes_map_queues_engine_sel_vi_enum engine_sel:3; 239 enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
245 uint32_t num_queues:3; 240 uint32_t num_queues:3;
246 } bitfields2; 241 } bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 487d5da337c1..b61dc53f42d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,6 +59,7 @@
59#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) 59#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
60#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) 60#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
61#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) 61#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
62#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
62 63
63#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) 64#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
64#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ 65#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
@@ -160,6 +161,11 @@ extern int noretry;
160 */ 161 */
161extern int halt_if_hws_hang; 162extern int halt_if_hws_hang;
162 163
164/*
165 * Whether MEC FW support GWS barriers
166 */
167extern bool hws_gws_support;
168
163enum cache_policy { 169enum cache_policy {
164 cache_policy_coherent, 170 cache_policy_coherent,
165 cache_policy_noncoherent 171 cache_policy_noncoherent
@@ -188,6 +194,7 @@ struct kfd_device_info {
188 bool needs_iommu_device; 194 bool needs_iommu_device;
189 bool needs_pci_atomics; 195 bool needs_pci_atomics;
190 unsigned int num_sdma_engines; 196 unsigned int num_sdma_engines;
197 unsigned int num_xgmi_sdma_engines;
191 unsigned int num_sdma_queues_per_engine; 198 unsigned int num_sdma_queues_per_engine;
192}; 199};
193 200
@@ -258,7 +265,7 @@ struct kfd_dev {
258 bool interrupts_active; 265 bool interrupts_active;
259 266
260 /* Debug manager */ 267 /* Debug manager */
261 struct kfd_dbgmgr *dbgmgr; 268 struct kfd_dbgmgr *dbgmgr;
262 269
263 /* Firmware versions */ 270 /* Firmware versions */
264 uint16_t mec_fw_version; 271 uint16_t mec_fw_version;
@@ -282,6 +289,9 @@ struct kfd_dev {
282 289
283 /* Compute Profile ref. count */ 290 /* Compute Profile ref. count */
284 atomic_t compute_profile; 291 atomic_t compute_profile;
292
293 /* Global GWS resource shared b/t processes*/
294 void *gws;
285}; 295};
286 296
287enum kfd_mempool { 297enum kfd_mempool {
@@ -329,7 +339,8 @@ enum kfd_queue_type {
329 KFD_QUEUE_TYPE_COMPUTE, 339 KFD_QUEUE_TYPE_COMPUTE,
330 KFD_QUEUE_TYPE_SDMA, 340 KFD_QUEUE_TYPE_SDMA,
331 KFD_QUEUE_TYPE_HIQ, 341 KFD_QUEUE_TYPE_HIQ,
332 KFD_QUEUE_TYPE_DIQ 342 KFD_QUEUE_TYPE_DIQ,
343 KFD_QUEUE_TYPE_SDMA_XGMI
333}; 344};
334 345
335enum kfd_queue_format { 346enum kfd_queue_format {
@@ -444,6 +455,9 @@ struct queue_properties {
444 * 455 *
445 * @device: The kfd device that created this queue. 456 * @device: The kfd device that created this queue.
446 * 457 *
458 * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
459 * otherwise.
460 *
447 * This structure represents user mode compute queues. 461 * This structure represents user mode compute queues.
448 * It contains all the necessary data to handle such queues. 462 * It contains all the necessary data to handle such queues.
449 * 463 *
@@ -465,6 +479,7 @@ struct queue {
465 479
466 struct kfd_process *process; 480 struct kfd_process *process;
467 struct kfd_dev *device; 481 struct kfd_dev *device;
482 void *gws;
468}; 483};
469 484
470/* 485/*
@@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
475 KFD_MQD_TYPE_HIQ, /* for hiq */ 490 KFD_MQD_TYPE_HIQ, /* for hiq */
476 KFD_MQD_TYPE_CP, /* for cp queues and diq */ 491 KFD_MQD_TYPE_CP, /* for cp queues and diq */
477 KFD_MQD_TYPE_SDMA, /* for sdma queues */ 492 KFD_MQD_TYPE_SDMA, /* for sdma queues */
493 KFD_MQD_TYPE_DIQ, /* for diq */
478 KFD_MQD_TYPE_MAX 494 KFD_MQD_TYPE_MAX
479}; 495};
480 496
@@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
819void print_queue_properties(struct queue_properties *q); 835void print_queue_properties(struct queue_properties *q);
820void print_queue(struct queue *q); 836void print_queue(struct queue *q);
821 837
822struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
823 struct kfd_dev *dev);
824struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 838struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
825 struct kfd_dev *dev); 839 struct kfd_dev *dev);
826struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 840struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
@@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
859 struct queue_properties *p); 873 struct queue_properties *p);
860int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, 874int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
861 struct queue_properties *p); 875 struct queue_properties *p);
876int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
877 void *gws);
862struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 878struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
863 unsigned int qid); 879 unsigned int qid);
864int pqm_get_wave_state(struct process_queue_manager *pqm, 880int pqm_get_wave_state(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index fcaaf93681ac..c2c570e6e54f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -26,6 +26,7 @@
26#include "kfd_device_queue_manager.h" 26#include "kfd_device_queue_manager.h"
27#include "kfd_priv.h" 27#include "kfd_priv.h"
28#include "kfd_kernel_queue.h" 28#include "kfd_kernel_queue.h"
29#include "amdgpu_amdkfd.h"
29 30
30static inline struct process_queue_node *get_queue_by_qid( 31static inline struct process_queue_node *get_queue_by_qid(
31 struct process_queue_manager *pqm, unsigned int qid) 32 struct process_queue_manager *pqm, unsigned int qid)
@@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
74 pdd->already_dequeued = true; 75 pdd->already_dequeued = true;
75} 76}
76 77
78int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
79 void *gws)
80{
81 struct kfd_dev *dev = NULL;
82 struct process_queue_node *pqn;
83 struct kfd_process_device *pdd;
84 struct kgd_mem *mem = NULL;
85 int ret;
86
87 pqn = get_queue_by_qid(pqm, qid);
88 if (!pqn) {
89 pr_err("Queue id does not match any known queue\n");
90 return -EINVAL;
91 }
92
93 if (pqn->q)
94 dev = pqn->q->device;
95 if (WARN_ON(!dev))
96 return -ENODEV;
97
98 pdd = kfd_get_process_device_data(dev, pqm->process);
99 if (!pdd) {
100 pr_err("Process device data doesn't exist\n");
101 return -EINVAL;
102 }
103
104 /* Only allow one queue per process can have GWS assigned */
105 if (gws && pdd->qpd.num_gws)
106 return -EINVAL;
107
108 if (!gws && pdd->qpd.num_gws == 0)
109 return -EINVAL;
110
111 if (gws)
112 ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
113 gws, &mem);
114 else
115 ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
116 pqn->q->gws);
117 if (unlikely(ret))
118 return ret;
119
120 pqn->q->gws = mem;
121 pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
122
123 return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
124 pqn->q);
125}
126
77void kfd_process_dequeue_from_all_devices(struct kfd_process *p) 127void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
78{ 128{
79 struct kfd_process_device *pdd; 129 struct kfd_process_device *pdd;
@@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
186 236
187 switch (type) { 237 switch (type) {
188 case KFD_QUEUE_TYPE_SDMA: 238 case KFD_QUEUE_TYPE_SDMA:
189 if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { 239 case KFD_QUEUE_TYPE_SDMA_XGMI:
190 pr_err("Over-subscription is not allowed for SDMA.\n"); 240 if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
241 >= get_num_sdma_queues(dev->dqm)) ||
242 (type == KFD_QUEUE_TYPE_SDMA_XGMI &&
243 dev->dqm->xgmi_sdma_queue_count
244 >= get_num_xgmi_sdma_queues(dev->dqm))) {
245 pr_debug("Over-subscription is not allowed for SDMA.\n");
191 retval = -EPERM; 246 retval = -EPERM;
192 goto err_create_queue; 247 goto err_create_queue;
193 } 248 }
@@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
325 if (retval != -ETIME) 380 if (retval != -ETIME)
326 goto err_destroy_queue; 381 goto err_destroy_queue;
327 } 382 }
383
384 if (pqn->q->gws) {
385 amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
386 pqn->q->gws);
387 pdd->qpd.num_gws = 0;
388 }
389
328 kfree(pqn->q->properties.cu_mask); 390 kfree(pqn->q->properties.cu_mask);
329 pqn->q->properties.cu_mask = NULL; 391 pqn->q->properties.cu_mask = NULL;
330 uninit_queue(pqn->q); 392 uninit_queue(pqn->q);
@@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
446 q = pqn->q; 508 q = pqn->q;
447 switch (q->properties.type) { 509 switch (q->properties.type) {
448 case KFD_QUEUE_TYPE_SDMA: 510 case KFD_QUEUE_TYPE_SDMA:
511 case KFD_QUEUE_TYPE_SDMA_XGMI:
449 seq_printf(m, " SDMA queue on device %x\n", 512 seq_printf(m, " SDMA queue on device %x\n",
450 q->device->id); 513 q->device->id);
451 mqd_type = KFD_MQD_TYPE_SDMA; 514 mqd_type = KFD_MQD_TYPE_SDMA;
@@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
461 q->properties.type, q->device->id); 524 q->properties.type, q->device->id);
462 continue; 525 continue;
463 } 526 }
464 mqd_mgr = q->device->dqm->ops.get_mqd_manager( 527 mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
465 q->device->dqm, mqd_type);
466 } else if (pqn->kq) { 528 } else if (pqn->kq) {
467 q = pqn->kq->queue; 529 q = pqn->kq->queue;
468 mqd_mgr = pqn->kq->mqd_mgr; 530 mqd_mgr = pqn->kq->mqd_mgr;
@@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
470 case KFD_QUEUE_TYPE_DIQ: 532 case KFD_QUEUE_TYPE_DIQ:
471 seq_printf(m, " DIQ on device %x\n", 533 seq_printf(m, " DIQ on device %x\n",
472 pqn->kq->dev->id); 534 pqn->kq->dev->id);
473 mqd_type = KFD_MQD_TYPE_HIQ;
474 break; 535 break;
475 default: 536 default:
476 seq_printf(m, 537 seq_printf(m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 769dbc7be8cb..d241a8672599 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
454 dev->node_props.lds_size_in_kb); 454 dev->node_props.lds_size_in_kb);
455 sysfs_show_32bit_prop(buffer, "gds_size_in_kb", 455 sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
456 dev->node_props.gds_size_in_kb); 456 dev->node_props.gds_size_in_kb);
457 sysfs_show_32bit_prop(buffer, "num_gws",
458 dev->node_props.num_gws);
457 sysfs_show_32bit_prop(buffer, "wave_front_size", 459 sysfs_show_32bit_prop(buffer, "wave_front_size",
458 dev->node_props.wave_front_size); 460 dev->node_props.wave_front_size);
459 sysfs_show_32bit_prop(buffer, "array_count", 461 sysfs_show_32bit_prop(buffer, "array_count",
@@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
476 dev->node_props.drm_render_minor); 478 dev->node_props.drm_render_minor);
477 sysfs_show_64bit_prop(buffer, "hive_id", 479 sysfs_show_64bit_prop(buffer, "hive_id",
478 dev->node_props.hive_id); 480 dev->node_props.hive_id);
481 sysfs_show_32bit_prop(buffer, "num_sdma_engines",
482 dev->node_props.num_sdma_engines);
483 sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
484 dev->node_props.num_sdma_xgmi_engines);
479 485
480 if (dev->gpu) { 486 if (dev->gpu) {
481 log_max_watch_addr = 487 log_max_watch_addr =
@@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1078 local_mem_info.local_mem_size_public; 1084 local_mem_info.local_mem_size_public;
1079 1085
1080 buf[0] = gpu->pdev->devfn; 1086 buf[0] = gpu->pdev->devfn;
1081 buf[1] = gpu->pdev->subsystem_vendor; 1087 buf[1] = gpu->pdev->subsystem_vendor |
1082 buf[2] = gpu->pdev->subsystem_device; 1088 (gpu->pdev->subsystem_device << 16);
1089 buf[2] = pci_domain_nr(gpu->pdev->bus);
1083 buf[3] = gpu->pdev->device; 1090 buf[3] = gpu->pdev->device;
1084 buf[4] = gpu->pdev->bus->number; 1091 buf[4] = gpu->pdev->bus->number;
1085 buf[5] = lower_32_bits(local_mem_size); 1092 buf[5] = lower_32_bits(local_mem_size);
@@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
1281 gpu->shared_resources.drm_render_minor; 1288 gpu->shared_resources.drm_render_minor;
1282 1289
1283 dev->node_props.hive_id = gpu->hive_id; 1290 dev->node_props.hive_id = gpu->hive_id;
1291 dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
1292 dev->node_props.num_sdma_xgmi_engines =
1293 gpu->device_info->num_xgmi_sdma_engines;
1294 dev->node_props.num_gws = (hws_gws_support &&
1295 dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
1296 amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
1284 1297
1285 kfd_fill_mem_clk_max_info(dev); 1298 kfd_fill_mem_clk_max_info(dev);
1286 kfd_fill_iolink_non_crat_info(dev); 1299 kfd_fill_iolink_non_crat_info(dev);
@@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
1298 case CHIP_POLARIS10: 1311 case CHIP_POLARIS10:
1299 case CHIP_POLARIS11: 1312 case CHIP_POLARIS11:
1300 case CHIP_POLARIS12: 1313 case CHIP_POLARIS12:
1314 case CHIP_VEGAM:
1301 pr_debug("Adding doorbell packet type capability\n"); 1315 pr_debug("Adding doorbell packet type capability\n");
1302 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << 1316 dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1303 HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & 1317 HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 84710cfd23c2..276354aa0fcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -65,6 +65,7 @@ struct kfd_node_properties {
65 uint32_t max_waves_per_simd; 65 uint32_t max_waves_per_simd;
66 uint32_t lds_size_in_kb; 66 uint32_t lds_size_in_kb;
67 uint32_t gds_size_in_kb; 67 uint32_t gds_size_in_kb;
68 uint32_t num_gws;
68 uint32_t wave_front_size; 69 uint32_t wave_front_size;
69 uint32_t array_count; 70 uint32_t array_count;
70 uint32_t simd_arrays_per_engine; 71 uint32_t simd_arrays_per_engine;
@@ -78,6 +79,8 @@ struct kfd_node_properties {
78 uint32_t max_engine_clk_fcompute; 79 uint32_t max_engine_clk_fcompute;
79 uint32_t max_engine_clk_ccompute; 80 uint32_t max_engine_clk_ccompute;
80 int32_t drm_render_minor; 81 int32_t drm_render_minor;
82 uint32_t num_sdma_engines;
83 uint32_t num_sdma_xgmi_engines;
81 uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; 84 uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
82}; 85};
83 86
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0c25baded852..5c826faae240 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,6 @@ config DRM_AMD_DC
6 bool "AMD DC - Enable new display engine" 6 bool "AMD DC - Enable new display engine"
7 default y 7 default y
8 select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) 8 select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
9 select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
10 help 9 help
11 Choose this option if you want to use the new display engine 10 Choose this option if you want to use the new display engine
12 support for AMDGPU. This adds required support for Vega and 11 support for AMDGPU. This adds required support for Vega and
@@ -17,11 +16,6 @@ config DRM_AMD_DC_DCN1_0
17 help 16 help
18 RV family support for display engine 17 RV family support for display engine
19 18
20config DRM_AMD_DC_DCN1_01
21 def_bool n
22 help
23 RV2 family for display engine
24
25config DEBUG_KERNEL_DC 19config DEBUG_KERNEL_DC
26 bool "Enable kgdb break in DC" 20 bool "Enable kgdb break in DC"
27 depends on DRM_AMD_DC 21 depends on DRM_AMD_DC
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 995f9df66142..53b76e0de940 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -29,6 +29,7 @@
29#include "dm_services_types.h" 29#include "dm_services_types.h"
30#include "dc.h" 30#include "dc.h"
31#include "dc/inc/core_types.h" 31#include "dc/inc/core_types.h"
32#include "dal_asic_id.h"
32 33
33#include "vid.h" 34#include "vid.h"
34#include "amdgpu.h" 35#include "amdgpu.h"
@@ -615,6 +616,10 @@ error:
615static void amdgpu_dm_fini(struct amdgpu_device *adev) 616static void amdgpu_dm_fini(struct amdgpu_device *adev)
616{ 617{
617 amdgpu_dm_destroy_drm_device(&adev->dm); 618 amdgpu_dm_destroy_drm_device(&adev->dm);
619
620 /* DC Destroy TODO: Replace destroy DAL */
621 if (adev->dm.dc)
622 dc_destroy(&adev->dm.dc);
618 /* 623 /*
619 * TODO: pageflip, vlank interrupt 624 * TODO: pageflip, vlank interrupt
620 * 625 *
@@ -629,9 +634,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
629 mod_freesync_destroy(adev->dm.freesync_module); 634 mod_freesync_destroy(adev->dm.freesync_module);
630 adev->dm.freesync_module = NULL; 635 adev->dm.freesync_module = NULL;
631 } 636 }
632 /* DC Destroy TODO: Replace destroy DAL */
633 if (adev->dm.dc)
634 dc_destroy(&adev->dm.dc);
635 637
636 mutex_destroy(&adev->dm.dc_lock); 638 mutex_destroy(&adev->dm.dc_lock);
637 639
@@ -640,7 +642,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
640 642
641static int load_dmcu_fw(struct amdgpu_device *adev) 643static int load_dmcu_fw(struct amdgpu_device *adev)
642{ 644{
643 const char *fw_name_dmcu; 645 const char *fw_name_dmcu = NULL;
644 int r; 646 int r;
645 const struct dmcu_firmware_header_v1_0 *hdr; 647 const struct dmcu_firmware_header_v1_0 *hdr;
646 648
@@ -663,7 +665,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
663 case CHIP_VEGA20: 665 case CHIP_VEGA20:
664 return 0; 666 return 0;
665 case CHIP_RAVEN: 667 case CHIP_RAVEN:
666 fw_name_dmcu = FIRMWARE_RAVEN_DMCU; 668#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
669 if (ASICREV_IS_PICASSO(adev->external_rev_id))
670 fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
671 else if (ASICREV_IS_RAVEN2(adev->external_rev_id))
672 fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
673 else
674#endif
675 return 0;
667 break; 676 break;
668 default: 677 default:
669 DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); 678 DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
@@ -2584,7 +2593,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
2584 address->type = PLN_ADDR_TYPE_GRAPHICS; 2593 address->type = PLN_ADDR_TYPE_GRAPHICS;
2585 address->grph.addr.low_part = lower_32_bits(afb->address); 2594 address->grph.addr.low_part = lower_32_bits(afb->address);
2586 address->grph.addr.high_part = upper_32_bits(afb->address); 2595 address->grph.addr.high_part = upper_32_bits(afb->address);
2587 } else { 2596 } else if (format < SURFACE_PIXEL_FORMAT_INVALID) {
2588 uint64_t chroma_addr = afb->address + fb->offsets[1]; 2597 uint64_t chroma_addr = afb->address + fb->offsets[1];
2589 2598
2590 plane_size->video.luma_size.x = 0; 2599 plane_size->video.luma_size.x = 0;
@@ -2959,16 +2968,16 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
2959} 2968}
2960 2969
2961static enum dc_color_depth 2970static enum dc_color_depth
2962convert_color_depth_from_display_info(const struct drm_connector *connector) 2971convert_color_depth_from_display_info(const struct drm_connector *connector,
2972 const struct drm_connector_state *state)
2963{ 2973{
2964 struct dm_connector_state *dm_conn_state =
2965 to_dm_connector_state(connector->state);
2966 uint32_t bpc = connector->display_info.bpc; 2974 uint32_t bpc = connector->display_info.bpc;
2967 2975
2968 /* TODO: Remove this when there's support for max_bpc in drm */ 2976 if (state) {
2969 if (dm_conn_state && bpc > dm_conn_state->max_bpc) 2977 bpc = state->max_bpc;
2970 /* Round down to nearest even number. */ 2978 /* Round down to the nearest even number. */
2971 bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1); 2979 bpc = bpc - (bpc & 1);
2980 }
2972 2981
2973 switch (bpc) { 2982 switch (bpc) {
2974 case 0: 2983 case 0:
@@ -3086,11 +3095,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_
3086 3095
3087} 3096}
3088 3097
3089static void 3098static void fill_stream_properties_from_drm_display_mode(
3090fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, 3099 struct dc_stream_state *stream,
3091 const struct drm_display_mode *mode_in, 3100 const struct drm_display_mode *mode_in,
3092 const struct drm_connector *connector, 3101 const struct drm_connector *connector,
3093 const struct dc_stream_state *old_stream) 3102 const struct drm_connector_state *connector_state,
3103 const struct dc_stream_state *old_stream)
3094{ 3104{
3095 struct dc_crtc_timing *timing_out = &stream->timing; 3105 struct dc_crtc_timing *timing_out = &stream->timing;
3096 const struct drm_display_info *info = &connector->display_info; 3106 const struct drm_display_info *info = &connector->display_info;
@@ -3113,7 +3123,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
3113 3123
3114 timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE; 3124 timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;
3115 timing_out->display_color_depth = convert_color_depth_from_display_info( 3125 timing_out->display_color_depth = convert_color_depth_from_display_info(
3116 connector); 3126 connector, connector_state);
3117 timing_out->scan_type = SCANNING_TYPE_NODATA; 3127 timing_out->scan_type = SCANNING_TYPE_NODATA;
3118 timing_out->hdmi_vic = 0; 3128 timing_out->hdmi_vic = 0;
3119 3129
@@ -3310,6 +3320,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
3310{ 3320{
3311 struct drm_display_mode *preferred_mode = NULL; 3321 struct drm_display_mode *preferred_mode = NULL;
3312 struct drm_connector *drm_connector; 3322 struct drm_connector *drm_connector;
3323 const struct drm_connector_state *con_state =
3324 dm_state ? &dm_state->base : NULL;
3313 struct dc_stream_state *stream = NULL; 3325 struct dc_stream_state *stream = NULL;
3314 struct drm_display_mode mode = *drm_mode; 3326 struct drm_display_mode mode = *drm_mode;
3315 bool native_mode_found = false; 3327 bool native_mode_found = false;
@@ -3382,10 +3394,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
3382 */ 3394 */
3383 if (!scale || mode_refresh != preferred_refresh) 3395 if (!scale || mode_refresh != preferred_refresh)
3384 fill_stream_properties_from_drm_display_mode(stream, 3396 fill_stream_properties_from_drm_display_mode(stream,
3385 &mode, &aconnector->base, NULL); 3397 &mode, &aconnector->base, con_state, NULL);
3386 else 3398 else
3387 fill_stream_properties_from_drm_display_mode(stream, 3399 fill_stream_properties_from_drm_display_mode(stream,
3388 &mode, &aconnector->base, old_stream); 3400 &mode, &aconnector->base, con_state, old_stream);
3389 3401
3390 update_stream_scaling_settings(&mode, dm_state, stream); 3402 update_stream_scaling_settings(&mode, dm_state, stream);
3391 3403
@@ -3610,9 +3622,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
3610 } else if (property == adev->mode_info.underscan_property) { 3622 } else if (property == adev->mode_info.underscan_property) {
3611 dm_new_state->underscan_enable = val; 3623 dm_new_state->underscan_enable = val;
3612 ret = 0; 3624 ret = 0;
3613 } else if (property == adev->mode_info.max_bpc_property) {
3614 dm_new_state->max_bpc = val;
3615 ret = 0;
3616 } else if (property == adev->mode_info.abm_level_property) { 3625 } else if (property == adev->mode_info.abm_level_property) {
3617 dm_new_state->abm_level = val; 3626 dm_new_state->abm_level = val;
3618 ret = 0; 3627 ret = 0;
@@ -3658,9 +3667,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
3658 } else if (property == adev->mode_info.underscan_property) { 3667 } else if (property == adev->mode_info.underscan_property) {
3659 *val = dm_state->underscan_enable; 3668 *val = dm_state->underscan_enable;
3660 ret = 0; 3669 ret = 0;
3661 } else if (property == adev->mode_info.max_bpc_property) {
3662 *val = dm_state->max_bpc;
3663 ret = 0;
3664 } else if (property == adev->mode_info.abm_level_property) { 3670 } else if (property == adev->mode_info.abm_level_property) {
3665 *val = dm_state->abm_level; 3671 *val = dm_state->abm_level;
3666 ret = 0; 3672 ret = 0;
@@ -3717,7 +3723,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
3717 state->underscan_enable = false; 3723 state->underscan_enable = false;
3718 state->underscan_hborder = 0; 3724 state->underscan_hborder = 0;
3719 state->underscan_vborder = 0; 3725 state->underscan_vborder = 0;
3720 state->max_bpc = 8;
3721 3726
3722 __drm_atomic_helper_connector_reset(connector, &state->base); 3727 __drm_atomic_helper_connector_reset(connector, &state->base);
3723 } 3728 }
@@ -3743,7 +3748,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
3743 new_state->underscan_enable = state->underscan_enable; 3748 new_state->underscan_enable = state->underscan_enable;
3744 new_state->underscan_hborder = state->underscan_hborder; 3749 new_state->underscan_hborder = state->underscan_hborder;
3745 new_state->underscan_vborder = state->underscan_vborder; 3750 new_state->underscan_vborder = state->underscan_vborder;
3746 new_state->max_bpc = state->max_bpc;
3747 3751
3748 return &new_state->base; 3752 return &new_state->base;
3749} 3753}
@@ -4585,6 +4589,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
4585 amdgpu_dm_connector->num_modes = 4589 amdgpu_dm_connector->num_modes =
4586 drm_add_edid_modes(connector, edid); 4590 drm_add_edid_modes(connector, edid);
4587 4591
4592 /* sorting the probed modes before calling function
4593 * amdgpu_dm_get_native_mode() since EDID can have
4594 * more than one preferred mode. The modes that are
4595 * later in the probed mode list could be of higher
4596 * and preferred resolution. For example, 3840x2160
4597 * resolution in base EDID preferred timing and 4096x2160
4598 * preferred resolution in DID extension block later.
4599 */
4600 drm_mode_sort(&connector->probed_modes);
4588 amdgpu_dm_get_native_mode(connector); 4601 amdgpu_dm_get_native_mode(connector);
4589 } else { 4602 } else {
4590 amdgpu_dm_connector->num_modes = 0; 4603 amdgpu_dm_connector->num_modes = 0;
@@ -4664,9 +4677,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
4664 drm_object_attach_property(&aconnector->base.base, 4677 drm_object_attach_property(&aconnector->base.base,
4665 adev->mode_info.underscan_vborder_property, 4678 adev->mode_info.underscan_vborder_property,
4666 0); 4679 0);
4667 drm_object_attach_property(&aconnector->base.base, 4680
4668 adev->mode_info.max_bpc_property, 4681 drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
4669 0); 4682
4683 /* This defaults to the max in the range, but we want 8bpc. */
4684 aconnector->base.state->max_bpc = 8;
4685 aconnector->base.state->max_requested_bpc = 8;
4670 4686
4671 if (connector_type == DRM_MODE_CONNECTOR_eDP && 4687 if (connector_type == DRM_MODE_CONNECTOR_eDP &&
4672 dc_is_dmcu_initialized(adev->dm.dc)) { 4688 dc_is_dmcu_initialized(adev->dm.dc)) {
@@ -4945,12 +4961,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
4945 int x, y; 4961 int x, y;
4946 int xorigin = 0, yorigin = 0; 4962 int xorigin = 0, yorigin = 0;
4947 4963
4948 if (!crtc || !plane->state->fb) { 4964 position->enable = false;
4949 position->enable = false; 4965 position->x = 0;
4950 position->x = 0; 4966 position->y = 0;
4951 position->y = 0; 4967
4968 if (!crtc || !plane->state->fb)
4952 return 0; 4969 return 0;
4953 }
4954 4970
4955 if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || 4971 if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||
4956 (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { 4972 (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) {
@@ -4964,6 +4980,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
4964 x = plane->state->crtc_x; 4980 x = plane->state->crtc_x;
4965 y = plane->state->crtc_y; 4981 y = plane->state->crtc_y;
4966 4982
4983 if (x <= -amdgpu_crtc->max_cursor_width ||
4984 y <= -amdgpu_crtc->max_cursor_height)
4985 return 0;
4986
4967 if (crtc->primary->state) { 4987 if (crtc->primary->state) {
4968 /* avivo cursor are offset into the total surface */ 4988 /* avivo cursor are offset into the total surface */
4969 x += crtc->primary->state->src_x >> 16; 4989 x += crtc->primary->state->src_x >> 16;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 978ff14a7d45..b0ce44422e90 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -304,7 +304,6 @@ struct dm_connector_state {
304 enum amdgpu_rmx_type scaling; 304 enum amdgpu_rmx_type scaling;
305 uint8_t underscan_vborder; 305 uint8_t underscan_vborder;
306 uint8_t underscan_hborder; 306 uint8_t underscan_hborder;
307 uint8_t max_bpc;
308 bool underscan_enable; 307 bool underscan_enable;
309 bool freesync_capable; 308 bool freesync_capable;
310 uint8_t abm_level; 309 uint8_t abm_level;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index fd5266a58297..12bc7ee66b18 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1313,6 +1313,8 @@ static enum bp_result bios_parser_get_encoder_cap_info(
1313 ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0; 1313 ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;
1314 info->HDMI_6GB_EN = (record->encodercaps & 1314 info->HDMI_6GB_EN = (record->encodercaps &
1315 ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0; 1315 ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0;
1316 info->DP_IS_USB_C = (record->encodercaps &
1317 ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
1316 1318
1317 return BP_RESULT_OK; 1319 return BP_RESULT_OK;
1318} 1320}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 8196f3bb10c7..53deba42007a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
57 return true; 57 return true;
58#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 58#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
59 case DCN_VERSION_1_0: 59 case DCN_VERSION_1_0:
60 *h = dal_cmd_tbl_helper_dce112_get_table2();
61 return true;
62#endif
63
64#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
65 case DCN_VERSION_1_01: 60 case DCN_VERSION_1_01:
66 *h = dal_cmd_tbl_helper_dce112_get_table2(); 61 *h = dal_cmd_tbl_helper_dce112_get_table2();
67 return true; 62 return true;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 18c775a950cc..4e17af2b63dc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -169,9 +169,14 @@ static bool create_links(
169 link = link_create(&link_init_params); 169 link = link_create(&link_init_params);
170 170
171 if (link) { 171 if (link) {
172 dc->links[dc->link_count] = link; 172 if (dc->config.edp_not_connected &&
173 link->dc = dc; 173 link->connector_signal == SIGNAL_TYPE_EDP) {
174 ++dc->link_count; 174 link_destroy(&link);
175 } else {
176 dc->links[dc->link_count] = link;
177 link->dc = dc;
178 ++dc->link_count;
179 }
175 } 180 }
176 } 181 }
177 182
@@ -1136,10 +1141,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
1136 /* Program all planes within new context*/ 1141 /* Program all planes within new context*/
1137 for (i = 0; i < context->stream_count; i++) { 1142 for (i = 0; i < context->stream_count; i++) {
1138 const struct dc_link *link = context->streams[i]->link; 1143 const struct dc_link *link = context->streams[i]->link;
1139 struct dc_stream_status *status;
1140
1141 if (context->streams[i]->apply_seamless_boot_optimization)
1142 context->streams[i]->apply_seamless_boot_optimization = false;
1143 1144
1144 if (!context->streams[i]->mode_changed) 1145 if (!context->streams[i]->mode_changed)
1145 continue; 1146 continue;
@@ -1164,9 +1165,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
1164 } 1165 }
1165 } 1166 }
1166 1167
1167 status = dc_stream_get_status_from_state(context, context->streams[i]);
1168 context->streams[i]->out.otg_offset = status->primary_otg_inst;
1169
1170 CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", 1168 CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}",
1171 context->streams[i]->timing.h_addressable, 1169 context->streams[i]->timing.h_addressable,
1172 context->streams[i]->timing.v_addressable, 1170 context->streams[i]->timing.v_addressable,
@@ -1331,71 +1329,94 @@ static bool is_surface_in_context(
1331static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) 1329static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
1332{ 1330{
1333 union surface_update_flags *update_flags = &u->surface->update_flags; 1331 union surface_update_flags *update_flags = &u->surface->update_flags;
1332 enum surface_update_type update_type = UPDATE_TYPE_FAST;
1334 1333
1335 if (!u->plane_info) 1334 if (!u->plane_info)
1336 return UPDATE_TYPE_FAST; 1335 return UPDATE_TYPE_FAST;
1337 1336
1338 if (u->plane_info->color_space != u->surface->color_space) 1337 if (u->plane_info->color_space != u->surface->color_space) {
1339 update_flags->bits.color_space_change = 1; 1338 update_flags->bits.color_space_change = 1;
1339 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1340 }
1340 1341
1341 if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) 1342 if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) {
1342 update_flags->bits.horizontal_mirror_change = 1; 1343 update_flags->bits.horizontal_mirror_change = 1;
1344 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1345 }
1343 1346
1344 if (u->plane_info->rotation != u->surface->rotation) 1347 if (u->plane_info->rotation != u->surface->rotation) {
1345 update_flags->bits.rotation_change = 1; 1348 update_flags->bits.rotation_change = 1;
1349 elevate_update_type(&update_type, UPDATE_TYPE_FULL);
1350 }
1346 1351
1347 if (u->plane_info->format != u->surface->format) 1352 if (u->plane_info->format != u->surface->format) {
1348 update_flags->bits.pixel_format_change = 1; 1353 update_flags->bits.pixel_format_change = 1;
1354 elevate_update_type(&update_type, UPDATE_TYPE_FULL);
1355 }
1349 1356
1350 if (u->plane_info->stereo_format != u->surface->stereo_format) 1357 if (u->plane_info->stereo_format != u->surface->stereo_format) {
1351 update_flags->bits.stereo_format_change = 1; 1358 update_flags->bits.stereo_format_change = 1;
1359 elevate_update_type(&update_type, UPDATE_TYPE_FULL);
1360 }
1352 1361
1353 if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) 1362 if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) {
1354 update_flags->bits.per_pixel_alpha_change = 1; 1363 update_flags->bits.per_pixel_alpha_change = 1;
1364 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1365 }
1355 1366
1356 if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) 1367 if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) {
1357 update_flags->bits.global_alpha_change = 1; 1368 update_flags->bits.global_alpha_change = 1;
1369 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1370 }
1371
1372 if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) {
1373 update_flags->bits.sdr_white_level = 1;
1374 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1375 }
1358 1376
1359 if (u->plane_info->dcc.enable != u->surface->dcc.enable 1377 if (u->plane_info->dcc.enable != u->surface->dcc.enable
1360 || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks 1378 || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks
1361 || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) 1379 || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) {
1362 update_flags->bits.dcc_change = 1; 1380 update_flags->bits.dcc_change = 1;
1381 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1382 }
1363 1383
1364 if (resource_pixel_format_to_bpp(u->plane_info->format) != 1384 if (resource_pixel_format_to_bpp(u->plane_info->format) !=
1365 resource_pixel_format_to_bpp(u->surface->format)) 1385 resource_pixel_format_to_bpp(u->surface->format)) {
1366 /* different bytes per element will require full bandwidth 1386 /* different bytes per element will require full bandwidth
1367 * and DML calculation 1387 * and DML calculation
1368 */ 1388 */
1369 update_flags->bits.bpp_change = 1; 1389 update_flags->bits.bpp_change = 1;
1390 elevate_update_type(&update_type, UPDATE_TYPE_FULL);
1391 }
1370 1392
1371 if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch 1393 if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch
1372 || u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch 1394 || u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch
1373 || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) 1395 || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) {
1374 update_flags->bits.plane_size_change = 1; 1396 update_flags->bits.plane_size_change = 1;
1397 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1398 }
1375 1399
1376 1400
1377 if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, 1401 if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
1378 sizeof(union dc_tiling_info)) != 0) { 1402 sizeof(union dc_tiling_info)) != 0) {
1379 update_flags->bits.swizzle_change = 1; 1403 update_flags->bits.swizzle_change = 1;
1404 elevate_update_type(&update_type, UPDATE_TYPE_MED);
1405
1380 /* todo: below are HW dependent, we should add a hook to 1406 /* todo: below are HW dependent, we should add a hook to
1381 * DCE/N resource and validated there. 1407 * DCE/N resource and validated there.
1382 */ 1408 */
1383 if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) 1409 if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
1384 /* swizzled mode requires RQ to be setup properly, 1410 /* swizzled mode requires RQ to be setup properly,
1385 * thus need to run DML to calculate RQ settings 1411 * thus need to run DML to calculate RQ settings
1386 */ 1412 */
1387 update_flags->bits.bandwidth_change = 1; 1413 update_flags->bits.bandwidth_change = 1;
1414 elevate_update_type(&update_type, UPDATE_TYPE_FULL);
1415 }
1388 } 1416 }
1389 1417
1390 if (update_flags->bits.rotation_change 1418 /* This should be UPDATE_TYPE_FAST if nothing has changed. */
1391 || update_flags->bits.stereo_format_change 1419 return update_type;
1392 || update_flags->bits.pixel_format_change
1393 || update_flags->bits.bpp_change
1394 || update_flags->bits.bandwidth_change
1395 || update_flags->bits.output_tf_change)
1396 return UPDATE_TYPE_FULL;
1397
1398 return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST;
1399} 1420}
1400 1421
1401static enum surface_update_type get_scaling_info_update_type( 1422static enum surface_update_type get_scaling_info_update_type(
@@ -1475,6 +1496,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
1475 type = get_scaling_info_update_type(u); 1496 type = get_scaling_info_update_type(u);
1476 elevate_update_type(&overall_type, type); 1497 elevate_update_type(&overall_type, type);
1477 1498
1499 if (u->flip_addr)
1500 update_flags->bits.addr_update = 1;
1501
1478 if (u->in_transfer_func) 1502 if (u->in_transfer_func)
1479 update_flags->bits.in_transfer_func_change = 1; 1503 update_flags->bits.in_transfer_func_change = 1;
1480 1504
@@ -1792,10 +1816,15 @@ static void commit_planes_for_stream(struct dc *dc,
1792 if (dc->optimize_seamless_boot && surface_count > 0) { 1816 if (dc->optimize_seamless_boot && surface_count > 0) {
1793 /* Optimize seamless boot flag keeps clocks and watermarks high until 1817 /* Optimize seamless boot flag keeps clocks and watermarks high until
1794 * first flip. After first flip, optimization is required to lower 1818 * first flip. After first flip, optimization is required to lower
1795 * bandwidth. 1819 * bandwidth. Important to note that it is expected UEFI will
1820 * only light up a single display on POST, therefore we only expect
1821 * one stream with seamless boot flag set.
1796 */ 1822 */
1797 dc->optimize_seamless_boot = false; 1823 if (stream->apply_seamless_boot_optimization) {
1798 dc->optimized_required = true; 1824 stream->apply_seamless_boot_optimization = false;
1825 dc->optimize_seamless_boot = false;
1826 dc->optimized_required = true;
1827 }
1799 } 1828 }
1800 1829
1801 if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) { 1830 if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 83d121510ef5..ca50ede37183 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -45,8 +45,10 @@ enum dc_color_space_type {
45 COLOR_SPACE_RGB_LIMITED_TYPE, 45 COLOR_SPACE_RGB_LIMITED_TYPE,
46 COLOR_SPACE_YCBCR601_TYPE, 46 COLOR_SPACE_YCBCR601_TYPE,
47 COLOR_SPACE_YCBCR709_TYPE, 47 COLOR_SPACE_YCBCR709_TYPE,
48 COLOR_SPACE_YCBCR2020_TYPE,
48 COLOR_SPACE_YCBCR601_LIMITED_TYPE, 49 COLOR_SPACE_YCBCR601_LIMITED_TYPE,
49 COLOR_SPACE_YCBCR709_LIMITED_TYPE 50 COLOR_SPACE_YCBCR709_LIMITED_TYPE,
51 COLOR_SPACE_YCBCR709_BLACK_TYPE,
50}; 52};
51 53
52static const struct tg_color black_color_format[] = { 54static const struct tg_color black_color_format[] = {
@@ -80,7 +82,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
80 { COLOR_SPACE_YCBCR709_TYPE, 82 { COLOR_SPACE_YCBCR709_TYPE,
81 { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA, 83 { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
82 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, 84 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
83
84 /* TODO: correct values below */ 85 /* TODO: correct values below */
85 { COLOR_SPACE_YCBCR601_LIMITED_TYPE, 86 { COLOR_SPACE_YCBCR601_LIMITED_TYPE,
86 { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, 87 { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
@@ -88,6 +89,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
88 { COLOR_SPACE_YCBCR709_LIMITED_TYPE, 89 { COLOR_SPACE_YCBCR709_LIMITED_TYPE,
89 { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, 90 { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
90 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, 91 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
92 { COLOR_SPACE_YCBCR2020_TYPE,
93 { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
94 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
95 { COLOR_SPACE_YCBCR709_BLACK_TYPE,
96 { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
97 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
91}; 98};
92 99
93static bool is_rgb_type( 100static bool is_rgb_type(
@@ -149,6 +156,16 @@ static bool is_ycbcr709_type(
149 return ret; 156 return ret;
150} 157}
151 158
159static bool is_ycbcr2020_type(
160 enum dc_color_space color_space)
161{
162 bool ret = false;
163
164 if (color_space == COLOR_SPACE_2020_YCBCR)
165 ret = true;
166 return ret;
167}
168
152static bool is_ycbcr709_limited_type( 169static bool is_ycbcr709_limited_type(
153 enum dc_color_space color_space) 170 enum dc_color_space color_space)
154{ 171{
@@ -174,7 +191,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)
174 type = COLOR_SPACE_YCBCR601_LIMITED_TYPE; 191 type = COLOR_SPACE_YCBCR601_LIMITED_TYPE;
175 else if (is_ycbcr709_limited_type(color_space)) 192 else if (is_ycbcr709_limited_type(color_space))
176 type = COLOR_SPACE_YCBCR709_LIMITED_TYPE; 193 type = COLOR_SPACE_YCBCR709_LIMITED_TYPE;
177 194 else if (is_ycbcr2020_type(color_space))
195 type = COLOR_SPACE_YCBCR2020_TYPE;
196 else if (color_space == COLOR_SPACE_YCBCR709)
197 type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
198 else if (color_space == COLOR_SPACE_YCBCR709_BLACK)
199 type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
178 return type; 200 return type;
179} 201}
180 202
@@ -206,6 +228,7 @@ void color_space_to_black_color(
206 switch (colorspace) { 228 switch (colorspace) {
207 case COLOR_SPACE_YCBCR601: 229 case COLOR_SPACE_YCBCR601:
208 case COLOR_SPACE_YCBCR709: 230 case COLOR_SPACE_YCBCR709:
231 case COLOR_SPACE_YCBCR709_BLACK:
209 case COLOR_SPACE_YCBCR601_LIMITED: 232 case COLOR_SPACE_YCBCR601_LIMITED:
210 case COLOR_SPACE_YCBCR709_LIMITED: 233 case COLOR_SPACE_YCBCR709_LIMITED:
211 case COLOR_SPACE_2020_YCBCR: 234 case COLOR_SPACE_2020_YCBCR:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b37ecc3ede61..e7236539f867 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -704,6 +704,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
704 704
705 if (new_connection_type != dc_connection_none) { 705 if (new_connection_type != dc_connection_none) {
706 link->type = new_connection_type; 706 link->type = new_connection_type;
707 link->link_state_valid = false;
707 708
708 /* From Disconnected-to-Connected. */ 709 /* From Disconnected-to-Connected. */
709 switch (link->connector_signal) { 710 switch (link->connector_signal) {
@@ -906,10 +907,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
906 sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; 907 sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
907 908
908 /* Connectivity log: detection */ 909 /* Connectivity log: detection */
909 for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) { 910 for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {
910 CONN_DATA_DETECT(link, 911 CONN_DATA_DETECT(link,
911 &sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE], 912 &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
912 EDID_BLOCK_SIZE, 913 DC_EDID_BLOCK_SIZE,
913 "%s: [Block %d] ", sink->edid_caps.display_name, i); 914 "%s: [Block %d] ", sink->edid_caps.display_name, i);
914 } 915 }
915 916
@@ -2631,6 +2632,8 @@ void core_link_enable_stream(
2631 stream->phy_pix_clk, 2632 stream->phy_pix_clk,
2632 pipe_ctx->stream_res.audio != NULL); 2633 pipe_ctx->stream_res.audio != NULL);
2633 2634
2635 pipe_ctx->stream->link->link_state_valid = true;
2636
2634 if (dc_is_dvi_signal(pipe_ctx->stream->signal)) 2637 if (dc_is_dvi_signal(pipe_ctx->stream->signal))
2635 pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute( 2638 pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute(
2636 pipe_ctx->stream_res.stream_enc, 2639 pipe_ctx->stream_res.stream_enc,
@@ -2713,17 +2716,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
2713{ 2716{
2714 struct dc *core_dc = pipe_ctx->stream->ctx->dc; 2717 struct dc *core_dc = pipe_ctx->stream->ctx->dc;
2715 struct dc_stream_state *stream = pipe_ctx->stream; 2718 struct dc_stream_state *stream = pipe_ctx->stream;
2719 struct dc_link *link = stream->sink->link;
2716 2720
2717 core_dc->hwss.blank_stream(pipe_ctx); 2721 core_dc->hwss.blank_stream(pipe_ctx);
2718 2722
2719 if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) 2723 if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
2720 deallocate_mst_payload(pipe_ctx); 2724 deallocate_mst_payload(pipe_ctx);
2721 2725
2722 if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) 2726 if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
2723 dal_ddc_service_write_scdc_data( 2727 struct ext_hdmi_settings settings = {0};
2724 stream->link->ddc, 0, 2728 enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
2725 stream->timing.flags.LTE_340MCSC_SCRAMBLE);
2726 2729
2730 unsigned short masked_chip_caps = link->chip_caps &
2731 EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
2732 //Need to inform that sink is going to use legacy HDMI mode.
2733 dal_ddc_service_write_scdc_data(
2734 link->ddc,
2735 165000,//vbios only handles 165Mhz.
2736 false);
2737 if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
2738 /* DP159, Retimer settings */
2739 if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
2740 write_i2c_retimer_setting(pipe_ctx,
2741 false, false, &settings);
2742 else
2743 write_i2c_default_retimer_setting(pipe_ctx,
2744 false, false);
2745 } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
2746 /* PI3EQX1204, Redriver settings */
2747 write_i2c_redriver_setting(pipe_ctx, false);
2748 }
2749 }
2727 core_dc->hwss.disable_stream(pipe_ctx, option); 2750 core_dc->hwss.disable_stream(pipe_ctx, option);
2728 2751
2729 disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); 2752 disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index eac7186e4f08..b7952f39f3fc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -93,10 +93,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
93#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 93#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
94 case FAMILY_RV: 94 case FAMILY_RV:
95 dc_version = DCN_VERSION_1_0; 95 dc_version = DCN_VERSION_1_0;
96#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
97 if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) 96 if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))
98 dc_version = DCN_VERSION_1_01; 97 dc_version = DCN_VERSION_1_01;
99#endif
100 break; 98 break;
101#endif 99#endif
102 default: 100 default:
@@ -147,9 +145,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
147 145
148#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 146#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
149 case DCN_VERSION_1_0: 147 case DCN_VERSION_1_0:
150#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
151 case DCN_VERSION_1_01: 148 case DCN_VERSION_1_01:
152#endif
153 res_pool = dcn10_create_resource_pool(init_data, dc); 149 res_pool = dcn10_create_resource_pool(init_data, dc);
154 break; 150 break;
155#endif 151#endif
@@ -1184,24 +1180,27 @@ static int acquire_first_split_pipe(
1184 int i; 1180 int i;
1185 1181
1186 for (i = 0; i < pool->pipe_count; i++) { 1182 for (i = 0; i < pool->pipe_count; i++) {
1187 struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; 1183 struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i];
1188 1184
1189 if (pipe_ctx->top_pipe && 1185 if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) &&
1190 pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { 1186 split_pipe->top_pipe->plane_state == split_pipe->plane_state) {
1191 pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe; 1187 split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe;
1192 if (pipe_ctx->bottom_pipe) 1188 if (split_pipe->bottom_pipe)
1193 pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe; 1189 split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe;
1194 1190
1195 memset(pipe_ctx, 0, sizeof(*pipe_ctx)); 1191 if (split_pipe->top_pipe->plane_state)
1196 pipe_ctx->stream_res.tg = pool->timing_generators[i]; 1192 resource_build_scaling_params(split_pipe->top_pipe);
1197 pipe_ctx->plane_res.hubp = pool->hubps[i]; 1193
1198 pipe_ctx->plane_res.ipp = pool->ipps[i]; 1194 memset(split_pipe, 0, sizeof(*split_pipe));
1199 pipe_ctx->plane_res.dpp = pool->dpps[i]; 1195 split_pipe->stream_res.tg = pool->timing_generators[i];
1200 pipe_ctx->stream_res.opp = pool->opps[i]; 1196 split_pipe->plane_res.hubp = pool->hubps[i];
1201 pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; 1197 split_pipe->plane_res.ipp = pool->ipps[i];
1202 pipe_ctx->pipe_idx = i; 1198 split_pipe->plane_res.dpp = pool->dpps[i];
1203 1199 split_pipe->stream_res.opp = pool->opps[i];
1204 pipe_ctx->stream = stream; 1200 split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst;
1201 split_pipe->pipe_idx = i;
1202
1203 split_pipe->stream = stream;
1205 return i; 1204 return i;
1206 } 1205 }
1207 } 1206 }
@@ -1647,46 +1646,6 @@ static int acquire_first_free_pipe(
1647 return -1; 1646 return -1;
1648} 1647}
1649 1648
1650static struct stream_encoder *find_first_free_match_stream_enc_for_link(
1651 struct resource_context *res_ctx,
1652 const struct resource_pool *pool,
1653 struct dc_stream_state *stream)
1654{
1655 int i;
1656 int j = -1;
1657 struct dc_link *link = stream->link;
1658
1659 for (i = 0; i < pool->stream_enc_count; i++) {
1660 if (!res_ctx->is_stream_enc_acquired[i] &&
1661 pool->stream_enc[i]) {
1662 /* Store first available for MST second display
1663 * in daisy chain use case */
1664 j = i;
1665 if (pool->stream_enc[i]->id ==
1666 link->link_enc->preferred_engine)
1667 return pool->stream_enc[i];
1668 }
1669 }
1670
1671 /*
1672 * below can happen in cases when stream encoder is acquired:
1673 * 1) for second MST display in chain, so preferred engine already
1674 * acquired;
1675 * 2) for another link, which preferred engine already acquired by any
1676 * MST configuration.
1677 *
1678 * If signal is of DP type and preferred engine not found, return last available
1679 *
1680 * TODO - This is just a patch up and a generic solution is
1681 * required for non DP connectors.
1682 */
1683
1684 if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
1685 return pool->stream_enc[j];
1686
1687 return NULL;
1688}
1689
1690static struct audio *find_first_free_audio( 1649static struct audio *find_first_free_audio(
1691 struct resource_context *res_ctx, 1650 struct resource_context *res_ctx,
1692 const struct resource_pool *pool, 1651 const struct resource_pool *pool,
@@ -1998,7 +1957,7 @@ enum dc_status resource_map_pool_resources(
1998 pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; 1957 pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
1999 1958
2000 pipe_ctx->stream_res.stream_enc = 1959 pipe_ctx->stream_res.stream_enc =
2001 find_first_free_match_stream_enc_for_link( 1960 dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(
2002 &context->res_ctx, pool, stream); 1961 &context->res_ctx, pool, stream);
2003 1962
2004 if (!pipe_ctx->stream_res.stream_enc) 1963 if (!pipe_ctx->stream_res.stream_enc)
@@ -2354,7 +2313,18 @@ static void set_avi_info_frame(
2354 break; 2313 break;
2355 } 2314 }
2356 } 2315 }
2316 /* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/
2357 hdmi_info.bits.VIC0_VIC7 = vic; 2317 hdmi_info.bits.VIC0_VIC7 = vic;
2318 if (vic >= 128)
2319 hdmi_info.bits.header.version = 3;
2320 /* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1),
2321 * the Source shall use 20 AVI InfoFrame Version 4
2322 */
2323 if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED &&
2324 hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) {
2325 hdmi_info.bits.header.version = 4;
2326 hdmi_info.bits.header.length = 14;
2327 }
2358 2328
2359 /* pixel repetition 2329 /* pixel repetition
2360 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel 2330 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
@@ -2373,12 +2343,19 @@ static void set_avi_info_frame(
2373 hdmi_info.bits.bar_right = (stream->timing.h_total 2343 hdmi_info.bits.bar_right = (stream->timing.h_total
2374 - stream->timing.h_border_right + 1); 2344 - stream->timing.h_border_right + 1);
2375 2345
2346 /* Additional Colorimetry Extension
2347 * Used in conduction with C0-C1 and EC0-EC2
2348 * 0 = DCI-P3 RGB (D65)
2349 * 1 = DCI-P3 RGB (theater)
2350 */
2351 hdmi_info.bits.ACE0_ACE3 = 0;
2352
2376 /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ 2353 /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */
2377 check_sum = &hdmi_info.packet_raw_data.sb[0]; 2354 check_sum = &hdmi_info.packet_raw_data.sb[0];
2378 2355
2379 *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; 2356 *check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version;
2380 2357
2381 for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) 2358 for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++)
2382 *check_sum += hdmi_info.packet_raw_data.sb[byte_index]; 2359 *check_sum += hdmi_info.packet_raw_data.sb[byte_index];
2383 2360
2384 /* one byte complement */ 2361 /* one byte complement */
@@ -2425,21 +2402,6 @@ static void set_spd_info_packet(
2425 *info_packet = stream->vrr_infopacket; 2402 *info_packet = stream->vrr_infopacket;
2426} 2403}
2427 2404
2428static void set_dp_sdp_info_packet(
2429 struct dc_info_packet *info_packet,
2430 struct dc_stream_state *stream)
2431{
2432 /* SPD info packet for custom sdp message */
2433
2434 /* Return if false. If true,
2435 * set the corresponding bit in the info packet
2436 */
2437 if (!stream->dpsdp_infopacket.valid)
2438 return;
2439
2440 *info_packet = stream->dpsdp_infopacket;
2441}
2442
2443static void set_hdr_static_info_packet( 2405static void set_hdr_static_info_packet(
2444 struct dc_info_packet *info_packet, 2406 struct dc_info_packet *info_packet,
2445 struct dc_stream_state *stream) 2407 struct dc_stream_state *stream)
@@ -2495,7 +2457,6 @@ void dc_resource_state_copy_construct(
2495 2457
2496 if (cur_pipe->bottom_pipe) 2458 if (cur_pipe->bottom_pipe)
2497 cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; 2459 cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
2498
2499 } 2460 }
2500 2461
2501 for (i = 0; i < dst_ctx->stream_count; i++) { 2462 for (i = 0; i < dst_ctx->stream_count; i++) {
@@ -2536,7 +2497,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
2536 info->spd.valid = false; 2497 info->spd.valid = false;
2537 info->hdrsmd.valid = false; 2498 info->hdrsmd.valid = false;
2538 info->vsc.valid = false; 2499 info->vsc.valid = false;
2539 info->dpsdp.valid = false;
2540 2500
2541 signal = pipe_ctx->stream->signal; 2501 signal = pipe_ctx->stream->signal;
2542 2502
@@ -2556,8 +2516,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
2556 set_spd_info_packet(&info->spd, pipe_ctx->stream); 2516 set_spd_info_packet(&info->spd, pipe_ctx->stream);
2557 2517
2558 set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream); 2518 set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream);
2559
2560 set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream);
2561 } 2519 }
2562 2520
2563 patch_gamut_packet_checksum(&info->gamut); 2521 patch_gamut_packet_checksum(&info->gamut);
@@ -2644,6 +2602,10 @@ bool pipe_need_reprogram(
2644 if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream)) 2602 if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream))
2645 return true; 2603 return true;
2646 2604
2605 if (false == pipe_ctx_old->stream->link->link_state_valid &&
2606 false == pipe_ctx_old->stream->dpms_off)
2607 return true;
2608
2647 return false; 2609 return false;
2648} 2610}
2649 2611
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 96e97d25d639..b723ffc8ea25 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -47,8 +47,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
47 47
48 if (dc_is_dvi_signal(stream->signal)) { 48 if (dc_is_dvi_signal(stream->signal)) {
49 if (stream->ctx->dc->caps.dual_link_dvi && 49 if (stream->ctx->dc->caps.dual_link_dvi &&
50 (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && 50 (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK &&
51 sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) 51 sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
52 stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; 52 stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;
53 else 53 else
54 stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; 54 stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
@@ -371,42 +371,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
371 return 0; 371 return 0;
372} 372}
373 373
374static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx,
375 const uint8_t *custom_sdp_message,
376 unsigned int sdp_message_size)
377{
378 uint8_t i;
379 struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
380
381 /* set valid info */
382 info->dpsdp.valid = true;
383
384 /* set sdp message header */
385 info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */
386 info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */
387 info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */
388 info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */
389
390 /* set sdp message data */
391 for (i = 0; i < 32; i++)
392 info->dpsdp.sb[i] = (custom_sdp_message[i+4]);
393
394}
395
396static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx)
397{
398 struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
399
400 /* in-valid info */
401 info->dpsdp.valid = false;
402}
403
404bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, 374bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
405 const uint8_t *custom_sdp_message, 375 const uint8_t *custom_sdp_message,
406 unsigned int sdp_message_size) 376 unsigned int sdp_message_size)
407{ 377{
408 int i; 378 int i;
409 struct dc *core_dc; 379 struct dc *dc;
410 struct resource_context *res_ctx; 380 struct resource_context *res_ctx;
411 381
412 if (stream == NULL) { 382 if (stream == NULL) {
@@ -414,8 +384,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
414 return false; 384 return false;
415 } 385 }
416 386
417 core_dc = stream->ctx->dc; 387 dc = stream->ctx->dc;
418 res_ctx = &core_dc->current_state->res_ctx; 388 res_ctx = &dc->current_state->res_ctx;
419 389
420 for (i = 0; i < MAX_PIPES; i++) { 390 for (i = 0; i < MAX_PIPES; i++) {
421 struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; 391 struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
@@ -423,11 +393,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
423 if (pipe_ctx->stream != stream) 393 if (pipe_ctx->stream != stream)
424 continue; 394 continue;
425 395
426 build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size); 396 if (dc->hwss.send_immediate_sdp_message != NULL)
427 397 dc->hwss.send_immediate_sdp_message(pipe_ctx,
428 core_dc->hwss.update_info_frame(pipe_ctx); 398 custom_sdp_message,
399 sdp_message_size);
400 else
401 DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n",
402 __func__);
429 403
430 invalid_dp_sdp_info_frame(pipe_ctx);
431 } 404 }
432 405
433 return true; 406 return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 44e4b0465587..566111ff463e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
39#include "inc/hw/dmcu.h" 39#include "inc/hw/dmcu.h"
40#include "dml/display_mode_lib.h" 40#include "dml/display_mode_lib.h"
41 41
42#define DC_VER "3.2.27" 42#define DC_VER "3.2.31"
43 43
44#define MAX_SURFACES 3 44#define MAX_SURFACES 3
45#define MAX_PLANES 6 45#define MAX_PLANES 6
@@ -205,6 +205,7 @@ struct dc_config {
205 bool disable_fractional_pwm; 205 bool disable_fractional_pwm;
206 bool allow_seamless_boot_optimization; 206 bool allow_seamless_boot_optimization;
207 bool power_down_display_on_boot; 207 bool power_down_display_on_boot;
208 bool edp_not_connected;
208}; 209};
209 210
210enum visual_confirm { 211enum visual_confirm {
@@ -540,12 +541,14 @@ struct dc_plane_status {
540union surface_update_flags { 541union surface_update_flags {
541 542
542 struct { 543 struct {
544 uint32_t addr_update:1;
543 /* Medium updates */ 545 /* Medium updates */
544 uint32_t dcc_change:1; 546 uint32_t dcc_change:1;
545 uint32_t color_space_change:1; 547 uint32_t color_space_change:1;
546 uint32_t horizontal_mirror_change:1; 548 uint32_t horizontal_mirror_change:1;
547 uint32_t per_pixel_alpha_change:1; 549 uint32_t per_pixel_alpha_change:1;
548 uint32_t global_alpha_change:1; 550 uint32_t global_alpha_change:1;
551 uint32_t sdr_white_level:1;
549 uint32_t rotation_change:1; 552 uint32_t rotation_change:1;
550 uint32_t swizzle_change:1; 553 uint32_t swizzle_change:1;
551 uint32_t scaling_change:1; 554 uint32_t scaling_change:1;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 5e6c5eff49cf..2d0acf109360 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -297,7 +297,7 @@ void generic_reg_wait(const struct dc_context *ctx,
297 int i; 297 int i;
298 298
299 /* something is terribly wrong if time out is > 200ms. (5Hz) */ 299 /* something is terribly wrong if time out is > 200ms. (5Hz) */
300 ASSERT(delay_between_poll_us * time_out_num_tries <= 200000); 300 ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000);
301 301
302 for (i = 0; i <= time_out_num_tries; i++) { 302 for (i = 0; i <= time_out_num_tries; i++) {
303 if (i) { 303 if (i) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index da55d623647a..c91b8aad78c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -534,6 +534,7 @@ enum dc_color_space {
534 COLOR_SPACE_DOLBYVISION, 534 COLOR_SPACE_DOLBYVISION,
535 COLOR_SPACE_APPCTRL, 535 COLOR_SPACE_APPCTRL,
536 COLOR_SPACE_CUSTOMPOINTS, 536 COLOR_SPACE_CUSTOMPOINTS,
537 COLOR_SPACE_YCBCR709_BLACK,
537}; 538};
538 539
539enum dc_dither_option { 540enum dc_dither_option {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 7b9429e30d82..094009127e25 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -75,6 +75,7 @@ struct dc_link {
75 enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ 75 enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */
76 bool is_hpd_filter_disabled; 76 bool is_hpd_filter_disabled;
77 bool dp_ss_off; 77 bool dp_ss_off;
78 bool link_state_valid;
78 79
79 /* caps is the same as reported_link_cap. link_traing use 80 /* caps is the same as reported_link_cap. link_traing use
80 * reported_link_cap. Will clean up. TODO 81 * reported_link_cap. Will clean up. TODO
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6c2a3d9a4c2e..92a670894c05 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -104,7 +104,7 @@ struct dc_context {
104 104
105 105
106#define DC_MAX_EDID_BUFFER_SIZE 1024 106#define DC_MAX_EDID_BUFFER_SIZE 1024
107#define EDID_BLOCK_SIZE 128 107#define DC_EDID_BLOCK_SIZE 128
108#define MAX_SURFACE_NUM 4 108#define MAX_SURFACE_NUM 4
109#define NUM_PIXEL_FORMATS 10 109#define NUM_PIXEL_FORMATS 10
110 110
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index da96229db53a..2959c3c9390b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -473,6 +473,8 @@ void dce_abm_destroy(struct abm **abm)
473{ 473{
474 struct dce_abm *abm_dce = TO_DCE_ABM(*abm); 474 struct dce_abm *abm_dce = TO_DCE_ABM(*abm);
475 475
476 abm_dce->base.funcs->set_abm_immediate_disable(*abm);
477
476 kfree(abm_dce); 478 kfree(abm_dce);
477 *abm = NULL; 479 *abm = NULL;
478} 480}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 963686380738..6b2e207777f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -241,6 +241,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state(
241 return low_req_clk; 241 return low_req_clk;
242} 242}
243 243
244/* TODO: remove use the two broken down functions */
244static int dce_set_clock( 245static int dce_set_clock(
245 struct clk_mgr *clk_mgr, 246 struct clk_mgr *clk_mgr,
246 int requested_clk_khz) 247 int requested_clk_khz)
@@ -336,6 +337,75 @@ int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz)
336 return actual_clock; 337 return actual_clock;
337} 338}
338 339
340int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz)
341{
342 struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr);
343 struct bp_set_dce_clock_parameters dce_clk_params;
344 struct dc_bios *bp = clk_mgr->ctx->dc_bios;
345 struct dc *core_dc = clk_mgr->ctx->dc;
346 struct dmcu *dmcu = core_dc->res_pool->dmcu;
347 int actual_clock = requested_clk_khz;
348 /* Prepare to program display clock*/
349 memset(&dce_clk_params, 0, sizeof(dce_clk_params));
350
351 /* Make sure requested clock isn't lower than minimum threshold*/
352 if (requested_clk_khz > 0)
353 requested_clk_khz = max(requested_clk_khz,
354 clk_mgr_dce->dentist_vco_freq_khz / 62);
355
356 dce_clk_params.target_clock_frequency = requested_clk_khz;
357 dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
358 dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK;
359
360 bp->funcs->set_dce_clock(bp, &dce_clk_params);
361 actual_clock = dce_clk_params.target_clock_frequency;
362
363 /*
364 * from power down, we need mark the clock state as ClocksStateNominal
365 * from HWReset, so when resume we will call pplib voltage regulator.
366 */
367 if (requested_clk_khz == 0)
368 clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
369
370
371 if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
372 if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
373 if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
374 dmcu->funcs->set_psr_wait_loop(dmcu,
375 actual_clock / 1000 / 7);
376 }
377 }
378
379 clk_mgr_dce->dfs_bypass_disp_clk = actual_clock;
380 return actual_clock;
381
382}
383
384int dce112_set_dprefclk(struct clk_mgr *clk_mgr)
385{
386 struct bp_set_dce_clock_parameters dce_clk_params;
387 struct dc_bios *bp = clk_mgr->ctx->dc_bios;
388
389 memset(&dce_clk_params, 0, sizeof(dce_clk_params));
390
391 /*Program DP ref Clock*/
392 /*VBIOS will determine DPREFCLK frequency, so we don't set it*/
393 dce_clk_params.target_clock_frequency = 0;
394 dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
395 dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
396 if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev))
397 dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
398 (dce_clk_params.pll_id ==
399 CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
400 else
401 dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
402
403 bp->funcs->set_dce_clock(bp, &dce_clk_params);
404
405 /* Returns the dp_refclk that was set */
406 return dce_clk_params.target_clock_frequency;
407}
408
339static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce) 409static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce)
340{ 410{
341 struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; 411 struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug;
@@ -782,22 +852,22 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr,
782 dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); 852 dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context);
783} 853}
784 854
785static const struct clk_mgr_funcs dce120_funcs = { 855static struct clk_mgr_funcs dce120_funcs = {
786 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, 856 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
787 .update_clocks = dce12_update_clocks 857 .update_clocks = dce12_update_clocks
788}; 858};
789 859
790static const struct clk_mgr_funcs dce112_funcs = { 860static struct clk_mgr_funcs dce112_funcs = {
791 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, 861 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
792 .update_clocks = dce112_update_clocks 862 .update_clocks = dce112_update_clocks
793}; 863};
794 864
795static const struct clk_mgr_funcs dce110_funcs = { 865static struct clk_mgr_funcs dce110_funcs = {
796 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, 866 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
797 .update_clocks = dce11_update_clocks, 867 .update_clocks = dce11_update_clocks,
798}; 868};
799 869
800static const struct clk_mgr_funcs dce_funcs = { 870static struct clk_mgr_funcs dce_funcs = {
801 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, 871 .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
802 .update_clocks = dce_update_clocks 872 .update_clocks = dce_update_clocks
803}; 873};
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
index c8f8c442142a..cca0c95d8cc8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
@@ -39,6 +39,11 @@
39#define CLK_COMMON_REG_LIST_DCN_BASE() \ 39#define CLK_COMMON_REG_LIST_DCN_BASE() \
40 SR(DENTIST_DISPCLK_CNTL) 40 SR(DENTIST_DISPCLK_CNTL)
41 41
42#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \
43 .MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \
44 .MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \
45 .MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67
46
42#define CLK_SF(reg_name, field_name, post_fix)\ 47#define CLK_SF(reg_name, field_name, post_fix)\
43 .field_name = reg_name ## __ ## field_name ## post_fix 48 .field_name = reg_name ## __ ## field_name ## post_fix
44 49
@@ -50,23 +55,39 @@
50 CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\ 55 CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\
51 CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh) 56 CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh)
52 57
58#define CLK_MASK_SH_LIST_RV1(mask_sh) \
59 CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
60 CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\
61 CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\
62 CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh),
63
64
53#define CLK_REG_FIELD_LIST(type) \ 65#define CLK_REG_FIELD_LIST(type) \
54 type DPREFCLK_SRC_SEL; \ 66 type DPREFCLK_SRC_SEL; \
55 type DENTIST_DPREFCLK_WDIVIDER; \ 67 type DENTIST_DPREFCLK_WDIVIDER; \
56 type DENTIST_DISPCLK_WDIVIDER; \ 68 type DENTIST_DISPCLK_WDIVIDER; \
57 type DENTIST_DISPCLK_CHG_DONE; 69 type DENTIST_DISPCLK_CHG_DONE;
58 70
71#define VBIOS_SMU_REG_FIELD_LIST(type) \
72 type CONTENT;
73
59struct clk_mgr_shift { 74struct clk_mgr_shift {
60 CLK_REG_FIELD_LIST(uint8_t) 75 CLK_REG_FIELD_LIST(uint8_t)
76 VBIOS_SMU_REG_FIELD_LIST(uint32_t)
61}; 77};
62 78
63struct clk_mgr_mask { 79struct clk_mgr_mask {
64 CLK_REG_FIELD_LIST(uint32_t) 80 CLK_REG_FIELD_LIST(uint32_t)
81 VBIOS_SMU_REG_FIELD_LIST(uint32_t)
65}; 82};
66 83
67struct clk_mgr_registers { 84struct clk_mgr_registers {
68 uint32_t DPREFCLK_CNTL; 85 uint32_t DPREFCLK_CNTL;
69 uint32_t DENTIST_DISPCLK_CNTL; 86 uint32_t DENTIST_DISPCLK_CNTL;
87
88 uint32_t MP1_SMN_C2PMSG_67;
89 uint32_t MP1_SMN_C2PMSG_83;
90 uint32_t MP1_SMN_C2PMSG_91;
70}; 91};
71 92
72struct state_dependent_clocks { 93struct state_dependent_clocks {
@@ -168,6 +189,8 @@ void dce110_fill_display_configs(
168 struct dm_pp_display_configuration *pp_display_cfg); 189 struct dm_pp_display_configuration *pp_display_cfg);
169 190
170int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz); 191int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz);
192int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz);
193int dce112_set_dprefclk(struct clk_mgr *clk_mgr);
171 194
172struct clk_mgr *dce_clk_mgr_create( 195struct clk_mgr *dce_clk_mgr_create(
173 struct dc_context *ctx, 196 struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index f70437aae8e0..df422440845b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -183,8 +183,8 @@ static bool calculate_fb_and_fractional_fb_divider(
183*RETURNS: 183*RETURNS:
184* It fills the PLLSettings structure with PLL Dividers values 184* It fills the PLLSettings structure with PLL Dividers values
185* if calculated values are within required tolerance 185* if calculated values are within required tolerance
186* It returns - true if eror is within tolerance 186* It returns - true if error is within tolerance
187* - false if eror is not within tolerance 187* - false if error is not within tolerance
188*/ 188*/
189static bool calc_fb_divider_checking_tolerance( 189static bool calc_fb_divider_checking_tolerance(
190 struct calc_pll_clock_source *calc_pll_cs, 190 struct calc_pll_clock_source *calc_pll_cs,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index cd26161bcc4d..526aab438374 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -268,6 +268,8 @@ static bool setup_engine(
268 struct dce_i2c_hw *dce_i2c_hw) 268 struct dce_i2c_hw *dce_i2c_hw)
269{ 269{
270 uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; 270 uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
271 /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
272 REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
271 273
272 if (dce_i2c_hw->setup_limit != 0) 274 if (dce_i2c_hw->setup_limit != 0)
273 i2c_setup_limit = dce_i2c_hw->setup_limit; 275 i2c_setup_limit = dce_i2c_hw->setup_limit;
@@ -322,8 +324,6 @@ static void release_engine(
322 324
323 set_speed(dce_i2c_hw, dce_i2c_hw->original_speed); 325 set_speed(dce_i2c_hw, dce_i2c_hw->original_speed);
324 326
325 /* Release I2C */
326 REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1);
327 327
328 /* Reset HW engine */ 328 /* Reset HW engine */
329 { 329 {
@@ -343,6 +343,9 @@ static void release_engine(
343 /* HW I2c engine - clock gating feature */ 343 /* HW I2c engine - clock gating feature */
344 if (!dce_i2c_hw->engine_keep_power_up_count) 344 if (!dce_i2c_hw->engine_keep_power_up_count)
345 REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0); 345 REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0);
346 /* Release I2C after reset, so HW or DMCU could use it */
347 REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1,
348 DC_I2C_SW_USE_I2C_REG_REQ, 0);
346 349
347} 350}
348 351
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index 575500755b2e..f718e3d396f2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -105,6 +105,7 @@ enum {
105 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\ 105 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\
106 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\ 106 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\
107 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\ 107 I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\
108 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\
108 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\ 109 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\
109 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\ 110 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\
110 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\ 111 I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\
@@ -146,6 +147,7 @@ struct dce_i2c_shift {
146 uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; 147 uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
147 uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY; 148 uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
148 uint8_t DC_I2C_SW_DONE_USING_I2C_REG; 149 uint8_t DC_I2C_SW_DONE_USING_I2C_REG;
150 uint8_t DC_I2C_SW_USE_I2C_REG_REQ;
149 uint8_t DC_I2C_NO_QUEUED_SW_GO; 151 uint8_t DC_I2C_NO_QUEUED_SW_GO;
150 uint8_t DC_I2C_SW_PRIORITY; 152 uint8_t DC_I2C_SW_PRIORITY;
151 uint8_t DC_I2C_SOFT_RESET; 153 uint8_t DC_I2C_SOFT_RESET;
@@ -184,6 +186,7 @@ struct dce_i2c_mask {
184 uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; 186 uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
185 uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY; 187 uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
186 uint32_t DC_I2C_SW_DONE_USING_I2C_REG; 188 uint32_t DC_I2C_SW_DONE_USING_I2C_REG;
189 uint32_t DC_I2C_SW_USE_I2C_REG_REQ;
187 uint32_t DC_I2C_NO_QUEUED_SW_GO; 190 uint32_t DC_I2C_NO_QUEUED_SW_GO;
188 uint32_t DC_I2C_SW_PRIORITY; 191 uint32_t DC_I2C_SW_PRIORITY;
189 uint32_t DC_I2C_SOFT_RESET; 192 uint32_t DC_I2C_SOFT_RESET;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 14309fe6f2e6..61fe2596fdb3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -418,6 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
418 break; 418 break;
419 case COLOR_SPACE_YCBCR709: 419 case COLOR_SPACE_YCBCR709:
420 case COLOR_SPACE_YCBCR709_LIMITED: 420 case COLOR_SPACE_YCBCR709_LIMITED:
421 case COLOR_SPACE_YCBCR709_BLACK:
421 misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ 422 misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
422 misc1 = misc1 & ~0x80; /* bit7 = 0*/ 423 misc1 = misc1 & ~0x80; /* bit7 = 0*/
423 dynamic_range_ycbcr = 1; /*bt709*/ 424 dynamic_range_ycbcr = 1; /*bt709*/
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index e938bf9986d3..d7a531e9700f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -867,13 +867,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s
867 return DC_FAIL_SURFACE_VALIDATE; 867 return DC_FAIL_SURFACE_VALIDATE;
868} 868}
869 869
870struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
871 struct resource_context *res_ctx,
872 const struct resource_pool *pool,
873 struct dc_stream_state *stream)
874{
875 int i;
876 int j = -1;
877 struct dc_link *link = stream->link;
878
879 for (i = 0; i < pool->stream_enc_count; i++) {
880 if (!res_ctx->is_stream_enc_acquired[i] &&
881 pool->stream_enc[i]) {
882 /* Store first available for MST second display
883 * in daisy chain use case
884 */
885 j = i;
886 if (pool->stream_enc[i]->id ==
887 link->link_enc->preferred_engine)
888 return pool->stream_enc[i];
889 }
890 }
891
892 /*
893 * below can happen in cases when stream encoder is acquired:
894 * 1) for second MST display in chain, so preferred engine already
895 * acquired;
896 * 2) for another link, which preferred engine already acquired by any
897 * MST configuration.
898 *
899 * If signal is of DP type and preferred engine not found, return last available
900 *
901 * TODO - This is just a patch up and a generic solution is
902 * required for non DP connectors.
903 */
904
905 if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
906 return pool->stream_enc[j];
907
908 return NULL;
909}
910
870static const struct resource_funcs dce100_res_pool_funcs = { 911static const struct resource_funcs dce100_res_pool_funcs = {
871 .destroy = dce100_destroy_resource_pool, 912 .destroy = dce100_destroy_resource_pool,
872 .link_enc_create = dce100_link_encoder_create, 913 .link_enc_create = dce100_link_encoder_create,
873 .validate_bandwidth = dce100_validate_bandwidth, 914 .validate_bandwidth = dce100_validate_bandwidth,
874 .validate_plane = dce100_validate_plane, 915 .validate_plane = dce100_validate_plane,
875 .add_stream_to_ctx = dce100_add_stream_to_ctx, 916 .add_stream_to_ctx = dce100_add_stream_to_ctx,
876 .validate_global = dce100_validate_global 917 .validate_global = dce100_validate_global,
918 .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
877}; 919};
878 920
879static bool construct( 921static bool construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
index 2f366d66635d..fecab7c560f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
@@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx(
46 struct dc_state *new_ctx, 46 struct dc_state *new_ctx,
47 struct dc_stream_state *dc_stream); 47 struct dc_stream_state *dc_stream);
48 48
49struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
50 struct resource_context *res_ctx,
51 const struct resource_pool *pool,
52 struct dc_stream_state *stream);
53
49#endif /* DCE100_RESOURCE_H_ */ 54#endif /* DCE100_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 7ac50ab1b762..69f215967af3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -242,6 +242,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params,
242 prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED; 242 prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;
243 243
244 switch (plane_state->format) { 244 switch (plane_state->format) {
245 case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
246 prescale_params->scale = 0x2082;
247 break;
245 case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: 248 case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
246 case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: 249 case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
247 prescale_params->scale = 0x2020; 250 prescale_params->scale = 0x2020;
@@ -1296,6 +1299,11 @@ static enum dc_status dce110_enable_stream_timing(
1296 pipe_ctx->stream_res.tg->funcs->program_timing( 1299 pipe_ctx->stream_res.tg->funcs->program_timing(
1297 pipe_ctx->stream_res.tg, 1300 pipe_ctx->stream_res.tg,
1298 &stream->timing, 1301 &stream->timing,
1302 0,
1303 0,
1304 0,
1305 0,
1306 pipe_ctx->stream->signal,
1299 true); 1307 true);
1300 } 1308 }
1301 1309
@@ -1488,10 +1496,11 @@ static void disable_vga_and_power_gate_all_controllers(
1488 } 1496 }
1489} 1497}
1490 1498
1491static struct dc_link *get_link_for_edp(struct dc *dc) 1499static struct dc_link *get_edp_link(struct dc *dc)
1492{ 1500{
1493 int i; 1501 int i;
1494 1502
1503 // report any eDP links, even unconnected DDI's
1495 for (i = 0; i < dc->link_count; i++) { 1504 for (i = 0; i < dc->link_count; i++) {
1496 if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) 1505 if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
1497 return dc->links[i]; 1506 return dc->links[i];
@@ -1499,23 +1508,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc)
1499 return NULL; 1508 return NULL;
1500} 1509}
1501 1510
1502static struct dc_link *get_link_for_edp_to_turn_off( 1511static struct dc_link *get_edp_link_with_sink(
1503 struct dc *dc, 1512 struct dc *dc,
1504 struct dc_state *context) 1513 struct dc_state *context)
1505{ 1514{
1506 int i; 1515 int i;
1507 struct dc_link *link = NULL; 1516 struct dc_link *link = NULL;
1508 1517
1509 /* check if eDP panel is suppose to be set mode, if yes, no need to disable */
1510 for (i = 0; i < context->stream_count; i++) {
1511 if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
1512 if (context->streams[i]->dpms_off == true)
1513 return context->streams[i]->sink->link;
1514 else
1515 return NULL;
1516 }
1517 }
1518
1519 /* check if there is an eDP panel not in use */ 1518 /* check if there is an eDP panel not in use */
1520 for (i = 0; i < dc->link_count; i++) { 1519 for (i = 0; i < dc->link_count; i++) {
1521 if (dc->links[i]->local_sink && 1520 if (dc->links[i]->local_sink &&
@@ -1538,59 +1537,53 @@ static struct dc_link *get_link_for_edp_to_turn_off(
1538void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) 1537void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
1539{ 1538{
1540 int i; 1539 int i;
1541 struct dc_link *edp_link_to_turnoff = NULL; 1540 struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context);
1542 struct dc_link *edp_link = get_link_for_edp(dc); 1541 struct dc_link *edp_link = get_edp_link(dc);
1543 bool can_edp_fast_boot_optimize = false; 1542 bool can_apply_edp_fast_boot = false;
1544 bool apply_edp_fast_boot_optimization = false;
1545 bool can_apply_seamless_boot = false; 1543 bool can_apply_seamless_boot = false;
1546 1544
1547 for (i = 0; i < context->stream_count; i++) {
1548 if (context->streams[i]->apply_seamless_boot_optimization) {
1549 can_apply_seamless_boot = true;
1550 break;
1551 }
1552 }
1553
1554 if (dc->hwss.init_pipes) 1545 if (dc->hwss.init_pipes)
1555 dc->hwss.init_pipes(dc, context); 1546 dc->hwss.init_pipes(dc, context);
1556 1547
1557 if (edp_link) { 1548 // Check fastboot support, disable on DCE8 because of blank screens
1558 /* this seems to cause blank screens on DCE8 */ 1549 if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 &&
1559 if ((dc->ctx->dce_version == DCE_VERSION_8_0) || 1550 dc->ctx->dce_version != DCE_VERSION_8_1 &&
1560 (dc->ctx->dce_version == DCE_VERSION_8_1) || 1551 dc->ctx->dce_version != DCE_VERSION_8_3) {
1561 (dc->ctx->dce_version == DCE_VERSION_8_3)) 1552
1562 can_edp_fast_boot_optimize = false; 1553 // enable fastboot if backend is enabled on eDP
1563 else 1554 if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) {
1564 can_edp_fast_boot_optimize = 1555 /* Find eDP stream and set optimization flag */
1565 edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); 1556 for (i = 0; i < context->stream_count; i++) {
1557 if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
1558 context->streams[i]->apply_edp_fast_boot_optimization = true;
1559 can_apply_edp_fast_boot = true;
1560 break;
1561 }
1562 }
1563 }
1566 } 1564 }
1567 1565
1568 if (can_edp_fast_boot_optimize) 1566 // Check seamless boot support
1569 edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context); 1567 for (i = 0; i < context->stream_count; i++) {
1570 1568 if (context->streams[i]->apply_seamless_boot_optimization) {
1571 /* if OS doesn't light up eDP and eDP link is available, we want to disable 1569 can_apply_seamless_boot = true;
1572 * If resume from S4/S5, should optimization. 1570 break;
1573 */
1574 if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) {
1575 /* Find eDP stream and set optimization flag */
1576 for (i = 0; i < context->stream_count; i++) {
1577 if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
1578 context->streams[i]->apply_edp_fast_boot_optimization = true;
1579 apply_edp_fast_boot_optimization = true;
1580 }
1581 } 1571 }
1582 } 1572 }
1583 1573
1584 if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) { 1574 /* eDP should not have stream in resume from S4 and so even with VBios post
1585 if (edp_link_to_turnoff) { 1575 * it should get turned off
1576 */
1577 if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
1578 if (edp_link_with_sink) {
1586 /*turn off backlight before DP_blank and encoder powered down*/ 1579 /*turn off backlight before DP_blank and encoder powered down*/
1587 dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); 1580 dc->hwss.edp_backlight_control(edp_link_with_sink, false);
1588 } 1581 }
1589 /*resume from S3, no vbios posting, no need to power down again*/ 1582 /*resume from S3, no vbios posting, no need to power down again*/
1590 power_down_all_hw_blocks(dc); 1583 power_down_all_hw_blocks(dc);
1591 disable_vga_and_power_gate_all_controllers(dc); 1584 disable_vga_and_power_gate_all_controllers(dc);
1592 if (edp_link_to_turnoff) 1585 if (edp_link_with_sink)
1593 dc->hwss.edp_power_control(edp_link_to_turnoff, false); 1586 dc->hwss.edp_power_control(edp_link_with_sink, false);
1594 } 1587 }
1595 bios_set_scratch_acc_mode_change(dc->ctx->dc_bios); 1588 bios_set_scratch_acc_mode_change(dc->ctx->dc_bios);
1596} 1589}
@@ -2030,8 +2023,10 @@ enum dc_status dce110_apply_ctx_to_hw(
2030 if (pipe_ctx->stream == NULL) 2023 if (pipe_ctx->stream == NULL)
2031 continue; 2024 continue;
2032 2025
2033 if (pipe_ctx->stream == pipe_ctx_old->stream) 2026 if (pipe_ctx->stream == pipe_ctx_old->stream &&
2027 pipe_ctx->stream->link->link_state_valid) {
2034 continue; 2028 continue;
2029 }
2035 2030
2036 if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) 2031 if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
2037 continue; 2032 continue;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index dcd04e9ea76b..f982c8b196cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -1097,6 +1097,11 @@ static struct pipe_ctx *dce110_acquire_underlay(
1097 1097
1098 pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg, 1098 pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg,
1099 &stream->timing, 1099 &stream->timing,
1100 0,
1101 0,
1102 0,
1103 0,
1104 pipe_ctx->stream->signal,
1100 false); 1105 false);
1101 1106
1102 pipe_ctx->stream_res.tg->funcs->enable_advanced_request( 1107 pipe_ctx->stream_res.tg->funcs->enable_advanced_request(
@@ -1129,6 +1134,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)
1129 *pool = NULL; 1134 *pool = NULL;
1130} 1135}
1131 1136
1137struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
1138 struct resource_context *res_ctx,
1139 const struct resource_pool *pool,
1140 struct dc_stream_state *stream)
1141{
1142 int i;
1143 int j = -1;
1144 struct dc_link *link = stream->link;
1145
1146 for (i = 0; i < pool->stream_enc_count; i++) {
1147 if (!res_ctx->is_stream_enc_acquired[i] &&
1148 pool->stream_enc[i]) {
1149 /* Store first available for MST second display
1150 * in daisy chain use case
1151 */
1152 j = i;
1153 if (pool->stream_enc[i]->id ==
1154 link->link_enc->preferred_engine)
1155 return pool->stream_enc[i];
1156 }
1157 }
1158
1159 /*
1160 * For CZ and later, we can allow DIG FE and BE to differ for all display types
1161 */
1162
1163 if (j >= 0)
1164 return pool->stream_enc[j];
1165
1166 return NULL;
1167}
1168
1132 1169
1133static const struct resource_funcs dce110_res_pool_funcs = { 1170static const struct resource_funcs dce110_res_pool_funcs = {
1134 .destroy = dce110_destroy_resource_pool, 1171 .destroy = dce110_destroy_resource_pool,
@@ -1137,7 +1174,8 @@ static const struct resource_funcs dce110_res_pool_funcs = {
1137 .validate_plane = dce110_validate_plane, 1174 .validate_plane = dce110_validate_plane,
1138 .acquire_idle_pipe_for_layer = dce110_acquire_underlay, 1175 .acquire_idle_pipe_for_layer = dce110_acquire_underlay,
1139 .add_stream_to_ctx = dce110_add_stream_to_ctx, 1176 .add_stream_to_ctx = dce110_add_stream_to_ctx,
1140 .validate_global = dce110_validate_global 1177 .validate_global = dce110_validate_global,
1178 .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
1141}; 1179};
1142 1180
1143static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) 1181static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
index e5f168c1f8c8..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
@@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool(
45 struct dc *dc, 45 struct dc *dc,
46 struct hw_asic_id asic_id); 46 struct hw_asic_id asic_id);
47 47
48struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
49 struct resource_context *res_ctx,
50 const struct resource_pool *pool,
51 struct dc_stream_state *stream);
52
48#endif /* __DC_RESOURCE_DCE110_H__ */ 53#endif /* __DC_RESOURCE_DCE110_H__ */
49 54
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 1b2fe0df347f..5f7c2c5641c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
1952 1952
1953void dce110_tg_program_timing(struct timing_generator *tg, 1953void dce110_tg_program_timing(struct timing_generator *tg,
1954 const struct dc_crtc_timing *timing, 1954 const struct dc_crtc_timing *timing,
1955 int vready_offset,
1956 int vstartup_start,
1957 int vupdate_offset,
1958 int vupdate_width,
1959 const enum signal_type signal,
1955 bool use_vbios) 1960 bool use_vbios)
1956{ 1961{
1957 if (use_vbios) 1962 if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index 734d4965dab1..768ccf27ada9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
256 256
257void dce110_tg_program_timing(struct timing_generator *tg, 257void dce110_tg_program_timing(struct timing_generator *tg,
258 const struct dc_crtc_timing *timing, 258 const struct dc_crtc_timing *timing,
259 int vready_offset,
260 int vstartup_start,
261 int vupdate_offset,
262 int vupdate_width,
263 const enum signal_type signal,
259 bool use_vbios); 264 bool use_vbios);
260 265
261bool dce110_tg_is_blanked(struct timing_generator *tg); 266bool dce110_tg_is_blanked(struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index a3cef60380ed..a13a2f58944e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg,
435 435
436static void dce110_timing_generator_v_program_timing(struct timing_generator *tg, 436static void dce110_timing_generator_v_program_timing(struct timing_generator *tg,
437 const struct dc_crtc_timing *timing, 437 const struct dc_crtc_timing *timing,
438 int vready_offset,
439 int vstartup_start,
440 int vupdate_offset,
441 int vupdate_width,
442 const enum signal_type signal,
438 bool use_vbios) 443 bool use_vbios)
439{ 444{
440 if (use_vbios) 445 if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index a480b15f6885..cdf759b0f5f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -993,7 +993,8 @@ static const struct resource_funcs dce112_res_pool_funcs = {
993 .validate_bandwidth = dce112_validate_bandwidth, 993 .validate_bandwidth = dce112_validate_bandwidth,
994 .validate_plane = dce100_validate_plane, 994 .validate_plane = dce100_validate_plane,
995 .add_stream_to_ctx = dce112_add_stream_to_ctx, 995 .add_stream_to_ctx = dce112_add_stream_to_ctx,
996 .validate_global = dce112_validate_global 996 .validate_global = dce112_validate_global,
997 .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
997}; 998};
998 999
999static void bw_calcs_data_update_from_pplib(struct dc *dc) 1000static void bw_calcs_data_update_from_pplib(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 6d49c7143c67..9e6a5d84b0a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -480,7 +480,7 @@ static const struct dc_debug_options debug_defaults = {
480 .disable_clock_gate = true, 480 .disable_clock_gate = true,
481}; 481};
482 482
483struct clock_source *dce120_clock_source_create( 483static struct clock_source *dce120_clock_source_create(
484 struct dc_context *ctx, 484 struct dc_context *ctx,
485 struct dc_bios *bios, 485 struct dc_bios *bios,
486 enum clock_source_id id, 486 enum clock_source_id id,
@@ -503,14 +503,14 @@ struct clock_source *dce120_clock_source_create(
503 return NULL; 503 return NULL;
504} 504}
505 505
506void dce120_clock_source_destroy(struct clock_source **clk_src) 506static void dce120_clock_source_destroy(struct clock_source **clk_src)
507{ 507{
508 kfree(TO_DCE110_CLK_SRC(*clk_src)); 508 kfree(TO_DCE110_CLK_SRC(*clk_src));
509 *clk_src = NULL; 509 *clk_src = NULL;
510} 510}
511 511
512 512
513bool dce120_hw_sequencer_create(struct dc *dc) 513static bool dce120_hw_sequencer_create(struct dc *dc)
514{ 514{
515 /* All registers used by dce11.2 match those in dce11 in offset and 515 /* All registers used by dce11.2 match those in dce11 in offset and
516 * structure 516 * structure
@@ -837,7 +837,8 @@ static const struct resource_funcs dce120_res_pool_funcs = {
837 .link_enc_create = dce120_link_encoder_create, 837 .link_enc_create = dce120_link_encoder_create,
838 .validate_bandwidth = dce112_validate_bandwidth, 838 .validate_bandwidth = dce112_validate_bandwidth,
839 .validate_plane = dce100_validate_plane, 839 .validate_plane = dce100_validate_plane,
840 .add_stream_to_ctx = dce112_add_stream_to_ctx 840 .add_stream_to_ctx = dce112_add_stream_to_ctx,
841 .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
841}; 842};
842 843
843static void bw_calcs_data_update_from_pplib(struct dc *dc) 844static void bw_calcs_data_update_from_pplib(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 04b866f0fa1f..098e56962f2a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg,
734 CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr); 734 CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr);
735} 735}
736 736
737void dce120_tg_program_timing(struct timing_generator *tg, 737static void dce120_tg_program_timing(struct timing_generator *tg,
738 const struct dc_crtc_timing *timing, 738 const struct dc_crtc_timing *timing,
739 int vready_offset,
740 int vstartup_start,
741 int vupdate_offset,
742 int vupdate_width,
743 const enum signal_type signal,
739 bool use_vbios) 744 bool use_vbios)
740{ 745{
741 if (use_vbios) 746 if (use_vbios)
@@ -1109,6 +1114,92 @@ static bool dce120_arm_vert_intr(
1109 return true; 1114 return true;
1110} 1115}
1111 1116
1117
1118static bool dce120_is_tg_enabled(struct timing_generator *tg)
1119{
1120 struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
1121 uint32_t value, field;
1122
1123 value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL,
1124 tg110->offsets.crtc);
1125 field = get_reg_field_value(value, CRTC0_CRTC_CONTROL,
1126 CRTC_CURRENT_MASTER_EN_STATE);
1127
1128 return field == 1;
1129}
1130
1131static bool dce120_configure_crc(struct timing_generator *tg,
1132 const struct crc_params *params)
1133{
1134 struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
1135
1136 /* Cannot configure crc on a CRTC that is disabled */
1137 if (!dce120_is_tg_enabled(tg))
1138 return false;
1139
1140 /* First, disable CRC before we configure it. */
1141 dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
1142 tg110->offsets.crtc, 0);
1143
1144 if (!params->enable)
1145 return true;
1146
1147 /* Program frame boundaries */
1148 /* Window A x axis start and end. */
1149 CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
1150 CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
1151 CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
1152
1153 /* Window A y axis start and end. */
1154 CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
1155 CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
1156 CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
1157
1158 /* Window B x axis start and end. */
1159 CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
1160 CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
1161 CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
1162
1163 /* Window B y axis start and end. */
1164 CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
1165 CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
1166 CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
1167
1168 /* Set crc mode and selection, and enable. Only using CRC0*/
1169 CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
1170 CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
1171 CRTC_CRC0_SELECT, params->selection,
1172 CRTC_CRC_EN, 1);
1173
1174 return true;
1175}
1176
1177static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
1178 uint32_t *g_y, uint32_t *b_cb)
1179{
1180 struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
1181 uint32_t value, field;
1182
1183 value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
1184 tg110->offsets.crtc);
1185 field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN);
1186
1187 /* Early return if CRC is not enabled for this CRTC */
1188 if (!field)
1189 return false;
1190
1191 value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
1192 tg110->offsets.crtc);
1193 *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
1194 *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
1195
1196 value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
1197 tg110->offsets.crtc);
1198 *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
1199
1200 return true;
1201}
1202
1112static const struct timing_generator_funcs dce120_tg_funcs = { 1203static const struct timing_generator_funcs dce120_tg_funcs = {
1113 .validate_timing = dce120_tg_validate_timing, 1204 .validate_timing = dce120_tg_validate_timing,
1114 .program_timing = dce120_tg_program_timing, 1205 .program_timing = dce120_tg_program_timing,
@@ -1140,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = {
1140 .set_static_screen_control = dce120_timing_generator_set_static_screen_control, 1231 .set_static_screen_control = dce120_timing_generator_set_static_screen_control,
1141 .set_test_pattern = dce120_timing_generator_set_test_pattern, 1232 .set_test_pattern = dce120_timing_generator_set_test_pattern,
1142 .arm_vert_intr = dce120_arm_vert_intr, 1233 .arm_vert_intr = dce120_arm_vert_intr,
1234 .is_tg_enabled = dce120_is_tg_enabled,
1235 .configure_crc = dce120_configure_crc,
1236 .get_crc = dce120_get_crc,
1143}; 1237};
1144 1238
1145 1239
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 27d0cc394963..2c21135a8510 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -880,7 +880,8 @@ static const struct resource_funcs dce80_res_pool_funcs = {
880 .validate_bandwidth = dce80_validate_bandwidth, 880 .validate_bandwidth = dce80_validate_bandwidth,
881 .validate_plane = dce100_validate_plane, 881 .validate_plane = dce100_validate_plane,
882 .add_stream_to_ctx = dce100_add_stream_to_ctx, 882 .add_stream_to_ctx = dce100_add_stream_to_ctx,
883 .validate_global = dce80_validate_global 883 .validate_global = dce80_validate_global,
884 .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
884}; 885};
885 886
886static bool dce80_construct( 887static bool dce80_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index 8b5ce557ee71..397e7f94e1e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
107 107
108static void program_timing(struct timing_generator *tg, 108static void program_timing(struct timing_generator *tg,
109 const struct dc_crtc_timing *timing, 109 const struct dc_crtc_timing *timing,
110 int vready_offset,
111 int vstartup_start,
112 int vupdate_offset,
113 int vupdate_width,
114 const enum signal_type signal,
110 bool use_vbios) 115 bool use_vbios)
111{ 116{
112 if (!use_vbios) 117 if (!use_vbios)
113 program_pix_dur(tg, timing->pix_clk_100hz); 118 program_pix_dur(tg, timing->pix_clk_100hz);
114 119
115 dce110_tg_program_timing(tg, timing, use_vbios); 120 dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
116} 121}
117 122
118static void dce80_timing_generator_enable_advanced_request( 123static void dce80_timing_generator_enable_advanced_request(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
index 2b2de1d913c9..9f2ffce10e12 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
@@ -27,6 +27,7 @@
27 27
28#include "reg_helper.h" 28#include "reg_helper.h"
29#include "core_types.h" 29#include "core_types.h"
30#include "dal_asic_id.h"
30 31
31#define TO_DCE_CLK_MGR(clocks)\ 32#define TO_DCE_CLK_MGR(clocks)\
32 container_of(clocks, struct dce_clk_mgr, base) 33 container_of(clocks, struct dce_clk_mgr, base)
@@ -91,13 +92,18 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl
91 92
92static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) 93static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks)
93{ 94{
95 int i;
94 struct dc *dc = clk_mgr->ctx->dc; 96 struct dc *dc = clk_mgr->ctx->dc;
95 int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks); 97 int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks);
96 bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; 98 bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz;
97 int i;
98 99
99 /* set disp clk to dpp clk threshold */ 100 /* set disp clk to dpp clk threshold */
100 dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); 101
102 if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) {
103 clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold);
104 clk_mgr->funcs->set_dprefclk(clk_mgr);
105 } else
106 dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
101 107
102 /* update request dpp clk division option */ 108 /* update request dpp clk division option */
103 for (i = 0; i < dc->res_pool->pipe_count; i++) { 109 for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -113,8 +119,13 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo
113 } 119 }
114 120
115 /* If target clk not same as dppclk threshold, set to target clock */ 121 /* If target clk not same as dppclk threshold, set to target clock */
116 if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) 122 if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) {
117 dce112_set_clock(clk_mgr, new_clocks->dispclk_khz); 123 if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) {
124 clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz);
125 clk_mgr->funcs->set_dprefclk(clk_mgr);
126 } else
127 dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
128 }
118 129
119 clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; 130 clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
120 clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; 131 clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
@@ -242,7 +253,62 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
242 } 253 }
243 } 254 }
244} 255}
245static const struct clk_mgr_funcs dcn1_funcs = { 256
257#define VBIOSSMC_MSG_SetDispclkFreq 0x4
258#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
259
260int dcn10_set_dispclk(struct clk_mgr *clk_mgr_base, int requested_dispclk_khz)
261{
262 int actual_dispclk_set_khz = -1;
263 struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base);
264
265 /* First clear response register */
266 //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_91, 0);
267 REG_WRITE(MP1_SMN_C2PMSG_91, 0);
268
269 /* Set the parameter register for the SMU message, unit is Mhz */
270 //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000);
271 REG_WRITE(MP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000);
272
273 /* Trigger the message transaction by writing the message ID */
274 //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq);
275 REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq);
276
277 REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
278
279 /* Actual dispclk set is returned in the parameter register */
280 actual_dispclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
281
282 return actual_dispclk_set_khz;
283
284}
285
286int dcn10_set_dprefclk(struct clk_mgr *clk_mgr_base)
287{
288 int actual_dprefclk_set_khz = -1;
289 struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base);
290
291 REG_WRITE(MP1_SMN_C2PMSG_91, 0);
292
293 /* Set the parameter register for the SMU message */
294 REG_WRITE(MP1_SMN_C2PMSG_83, clk_mgr_dce->dprefclk_khz / 1000);
295
296 /* Trigger the message transaction by writing the message ID */
297 REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDprefclkFreq);
298
299 /* Wait for SMU response */
300 REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
301
302 actual_dprefclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
303
304 return actual_dprefclk_set_khz;
305}
306
307int (*set_dispclk)(struct pp_smu *pp_smu, int dispclk);
308
309int (*set_dprefclk)(struct pp_smu *pp_smu);
310
311static struct clk_mgr_funcs dcn1_funcs = {
246 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, 312 .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
247 .update_clocks = dcn1_update_clocks 313 .update_clocks = dcn1_update_clocks
248}; 314};
@@ -266,8 +332,8 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)
266 clk_mgr_dce->dprefclk_ss_percentage = 0; 332 clk_mgr_dce->dprefclk_ss_percentage = 0;
267 clk_mgr_dce->dprefclk_ss_divider = 1000; 333 clk_mgr_dce->dprefclk_ss_divider = 1000;
268 clk_mgr_dce->ss_on_dprefclk = false; 334 clk_mgr_dce->ss_on_dprefclk = false;
269
270 clk_mgr_dce->dprefclk_khz = 600000; 335 clk_mgr_dce->dprefclk_khz = 600000;
336
271 if (bp->integrated_info) 337 if (bp->integrated_info)
272 clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; 338 clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
273 if (clk_mgr_dce->dentist_vco_freq_khz == 0) { 339 if (clk_mgr_dce->dentist_vco_freq_khz == 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 0db2a6e96fc0..bf978831bb0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -263,20 +263,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub)
263 DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1); 263 DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1);
264} 264}
265 265
266void hubbub1_program_watermarks( 266void hubbub1_program_urgent_watermarks(
267 struct hubbub *hubbub, 267 struct hubbub *hubbub,
268 struct dcn_watermark_set *watermarks, 268 struct dcn_watermark_set *watermarks,
269 unsigned int refclk_mhz, 269 unsigned int refclk_mhz,
270 bool safe_to_lower) 270 bool safe_to_lower)
271{ 271{
272 struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); 272 struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
273 /*
274 * Need to clamp to max of the register values (i.e. no wrap)
275 * for dcn1, all wm registers are 21-bit wide
276 */
277 uint32_t prog_wm_value; 273 uint32_t prog_wm_value;
278 274
279
280 /* Repeat for water mark set A, B, C and D. */ 275 /* Repeat for water mark set A, B, C and D. */
281 /* clock state A */ 276 /* clock state A */
282 if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) { 277 if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) {
@@ -291,60 +286,14 @@ void hubbub1_program_watermarks(
291 watermarks->a.urgent_ns, prog_wm_value); 286 watermarks->a.urgent_ns, prog_wm_value);
292 } 287 }
293 288
294 if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { 289 if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) {
295 if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { 290 hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns;
296 hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; 291 prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
297 prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
298 refclk_mhz, 0x1fffff);
299 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
300 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
301 "HW register value = 0x%x\n",
302 watermarks->a.pte_meta_urgent_ns, prog_wm_value);
303 }
304 }
305
306 if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) {
307 if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
308 > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
309 hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
310 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
311 prog_wm_value = convert_and_clamp(
312 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
313 refclk_mhz, 0x1fffff);
314 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
315 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
316 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
317 "HW register value = 0x%x\n",
318 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
319 }
320
321 if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
322 > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
323 hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
324 watermarks->a.cstate_pstate.cstate_exit_ns;
325 prog_wm_value = convert_and_clamp(
326 watermarks->a.cstate_pstate.cstate_exit_ns,
327 refclk_mhz, 0x1fffff);
328 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
329 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
330 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
331 "HW register value = 0x%x\n",
332 watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
333 }
334 }
335
336 if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
337 > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
338 hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
339 watermarks->a.cstate_pstate.pstate_change_ns;
340 prog_wm_value = convert_and_clamp(
341 watermarks->a.cstate_pstate.pstate_change_ns,
342 refclk_mhz, 0x1fffff); 292 refclk_mhz, 0x1fffff);
343 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, 293 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
344 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); 294 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
345 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" 295 "HW register value = 0x%x\n",
346 "HW register value = 0x%x\n\n", 296 watermarks->a.pte_meta_urgent_ns, prog_wm_value);
347 watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
348 } 297 }
349 298
350 /* clock state B */ 299 /* clock state B */
@@ -360,60 +309,14 @@ void hubbub1_program_watermarks(
360 watermarks->b.urgent_ns, prog_wm_value); 309 watermarks->b.urgent_ns, prog_wm_value);
361 } 310 }
362 311
363 if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { 312 if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) {
364 if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { 313 hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns;
365 hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; 314 prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
366 prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
367 refclk_mhz, 0x1fffff);
368 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
369 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
370 "HW register value = 0x%x\n",
371 watermarks->b.pte_meta_urgent_ns, prog_wm_value);
372 }
373 }
374
375 if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) {
376 if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
377 > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
378 hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
379 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
380 prog_wm_value = convert_and_clamp(
381 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
382 refclk_mhz, 0x1fffff);
383 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
384 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
385 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
386 "HW register value = 0x%x\n",
387 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
388 }
389
390 if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
391 > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
392 hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
393 watermarks->b.cstate_pstate.cstate_exit_ns;
394 prog_wm_value = convert_and_clamp(
395 watermarks->b.cstate_pstate.cstate_exit_ns,
396 refclk_mhz, 0x1fffff);
397 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
398 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
399 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
400 "HW register value = 0x%x\n",
401 watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
402 }
403 }
404
405 if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
406 > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
407 hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
408 watermarks->b.cstate_pstate.pstate_change_ns;
409 prog_wm_value = convert_and_clamp(
410 watermarks->b.cstate_pstate.pstate_change_ns,
411 refclk_mhz, 0x1fffff); 315 refclk_mhz, 0x1fffff);
412 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, 316 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
413 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); 317 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
414 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" 318 "HW register value = 0x%x\n",
415 "HW register value = 0x%x\n\n", 319 watermarks->b.pte_meta_urgent_ns, prog_wm_value);
416 watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
417 } 320 }
418 321
419 /* clock state C */ 322 /* clock state C */
@@ -429,60 +332,14 @@ void hubbub1_program_watermarks(
429 watermarks->c.urgent_ns, prog_wm_value); 332 watermarks->c.urgent_ns, prog_wm_value);
430 } 333 }
431 334
432 if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { 335 if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) {
433 if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { 336 hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns;
434 hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; 337 prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
435 prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
436 refclk_mhz, 0x1fffff);
437 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
438 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
439 "HW register value = 0x%x\n",
440 watermarks->c.pte_meta_urgent_ns, prog_wm_value);
441 }
442 }
443
444 if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) {
445 if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
446 > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
447 hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
448 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
449 prog_wm_value = convert_and_clamp(
450 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
451 refclk_mhz, 0x1fffff);
452 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
453 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
454 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
455 "HW register value = 0x%x\n",
456 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
457 }
458
459 if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
460 > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
461 hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
462 watermarks->c.cstate_pstate.cstate_exit_ns;
463 prog_wm_value = convert_and_clamp(
464 watermarks->c.cstate_pstate.cstate_exit_ns,
465 refclk_mhz, 0x1fffff);
466 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
467 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
468 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
469 "HW register value = 0x%x\n",
470 watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
471 }
472 }
473
474 if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
475 > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
476 hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
477 watermarks->c.cstate_pstate.pstate_change_ns;
478 prog_wm_value = convert_and_clamp(
479 watermarks->c.cstate_pstate.pstate_change_ns,
480 refclk_mhz, 0x1fffff); 338 refclk_mhz, 0x1fffff);
481 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, 339 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
482 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); 340 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
483 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" 341 "HW register value = 0x%x\n",
484 "HW register value = 0x%x\n\n", 342 watermarks->c.pte_meta_urgent_ns, prog_wm_value);
485 watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
486 } 343 }
487 344
488 /* clock state D */ 345 /* clock state D */
@@ -498,48 +355,199 @@ void hubbub1_program_watermarks(
498 watermarks->d.urgent_ns, prog_wm_value); 355 watermarks->d.urgent_ns, prog_wm_value);
499 } 356 }
500 357
501 if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { 358 if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) {
502 if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { 359 hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns;
503 hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; 360 prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns,
504 prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, 361 refclk_mhz, 0x1fffff);
505 refclk_mhz, 0x1fffff); 362 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value);
506 REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); 363 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n"
507 DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" 364 "HW register value = 0x%x\n",
508 "HW register value = 0x%x\n", 365 watermarks->d.pte_meta_urgent_ns, prog_wm_value);
509 watermarks->d.pte_meta_urgent_ns, prog_wm_value);
510 }
511 } 366 }
367}
512 368
513 if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { 369void hubbub1_program_stutter_watermarks(
514 if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns 370 struct hubbub *hubbub,
515 > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { 371 struct dcn_watermark_set *watermarks,
516 hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = 372 unsigned int refclk_mhz,
517 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; 373 bool safe_to_lower)
518 prog_wm_value = convert_and_clamp( 374{
519 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, 375 struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
520 refclk_mhz, 0x1fffff); 376 uint32_t prog_wm_value;
521 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
522 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
523 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
524 "HW register value = 0x%x\n",
525 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
526 }
527 377
528 if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns 378 /* clock state A */
529 > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { 379 if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
530 hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = 380 > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
531 watermarks->d.cstate_pstate.cstate_exit_ns; 381 hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
532 prog_wm_value = convert_and_clamp( 382 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
533 watermarks->d.cstate_pstate.cstate_exit_ns, 383 prog_wm_value = convert_and_clamp(
534 refclk_mhz, 0x1fffff); 384 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
535 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, 385 refclk_mhz, 0x1fffff);
536 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); 386 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
537 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" 387 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
538 "HW register value = 0x%x\n", 388 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
539 watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); 389 "HW register value = 0x%x\n",
540 } 390 watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
391 }
392
393 if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
394 > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
395 hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
396 watermarks->a.cstate_pstate.cstate_exit_ns;
397 prog_wm_value = convert_and_clamp(
398 watermarks->a.cstate_pstate.cstate_exit_ns,
399 refclk_mhz, 0x1fffff);
400 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
401 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
402 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
403 "HW register value = 0x%x\n",
404 watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
405 }
406
407 /* clock state B */
408 if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
409 > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
410 hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
411 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
412 prog_wm_value = convert_and_clamp(
413 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
414 refclk_mhz, 0x1fffff);
415 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
416 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
417 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
418 "HW register value = 0x%x\n",
419 watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
541 } 420 }
542 421
422 if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
423 > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
424 hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
425 watermarks->b.cstate_pstate.cstate_exit_ns;
426 prog_wm_value = convert_and_clamp(
427 watermarks->b.cstate_pstate.cstate_exit_ns,
428 refclk_mhz, 0x1fffff);
429 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
430 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
431 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
432 "HW register value = 0x%x\n",
433 watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
434 }
435
436 /* clock state C */
437 if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
438 > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
439 hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
440 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
441 prog_wm_value = convert_and_clamp(
442 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
443 refclk_mhz, 0x1fffff);
444 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
445 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
446 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
447 "HW register value = 0x%x\n",
448 watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
449 }
450
451 if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
452 > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
453 hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
454 watermarks->c.cstate_pstate.cstate_exit_ns;
455 prog_wm_value = convert_and_clamp(
456 watermarks->c.cstate_pstate.cstate_exit_ns,
457 refclk_mhz, 0x1fffff);
458 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
459 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
460 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
461 "HW register value = 0x%x\n",
462 watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
463 }
464
465 /* clock state D */
466 if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns
467 > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) {
468 hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
469 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
470 prog_wm_value = convert_and_clamp(
471 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
472 refclk_mhz, 0x1fffff);
473 REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
474 DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
475 DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
476 "HW register value = 0x%x\n",
477 watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
478 }
479
480 if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns
481 > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) {
482 hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns =
483 watermarks->d.cstate_pstate.cstate_exit_ns;
484 prog_wm_value = convert_and_clamp(
485 watermarks->d.cstate_pstate.cstate_exit_ns,
486 refclk_mhz, 0x1fffff);
487 REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
488 DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
489 DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
490 "HW register value = 0x%x\n",
491 watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
492 }
493
494}
495
496void hubbub1_program_pstate_watermarks(
497 struct hubbub *hubbub,
498 struct dcn_watermark_set *watermarks,
499 unsigned int refclk_mhz,
500 bool safe_to_lower)
501{
502 struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
503 uint32_t prog_wm_value;
504
505 /* clock state A */
506 if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
507 > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
508 hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
509 watermarks->a.cstate_pstate.pstate_change_ns;
510 prog_wm_value = convert_and_clamp(
511 watermarks->a.cstate_pstate.pstate_change_ns,
512 refclk_mhz, 0x1fffff);
513 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
514 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
515 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
516 "HW register value = 0x%x\n\n",
517 watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
518 }
519
520 /* clock state B */
521 if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
522 > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
523 hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
524 watermarks->b.cstate_pstate.pstate_change_ns;
525 prog_wm_value = convert_and_clamp(
526 watermarks->b.cstate_pstate.pstate_change_ns,
527 refclk_mhz, 0x1fffff);
528 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
529 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
530 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
531 "HW register value = 0x%x\n\n",
532 watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
533 }
534
535 /* clock state C */
536 if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
537 > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
538 hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
539 watermarks->c.cstate_pstate.pstate_change_ns;
540 prog_wm_value = convert_and_clamp(
541 watermarks->c.cstate_pstate.pstate_change_ns,
542 refclk_mhz, 0x1fffff);
543 REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
544 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
545 DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
546 "HW register value = 0x%x\n\n",
547 watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
548 }
549
550 /* clock state D */
543 if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns 551 if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns
544 > hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) { 552 > hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) {
545 hubbub1->watermarks.d.cstate_pstate.pstate_change_ns = 553 hubbub1->watermarks.d.cstate_pstate.pstate_change_ns =
@@ -553,6 +561,22 @@ void hubbub1_program_watermarks(
553 "HW register value = 0x%x\n\n", 561 "HW register value = 0x%x\n\n",
554 watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value); 562 watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value);
555 } 563 }
564}
565
566void hubbub1_program_watermarks(
567 struct hubbub *hubbub,
568 struct dcn_watermark_set *watermarks,
569 unsigned int refclk_mhz,
570 bool safe_to_lower)
571{
572 struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
573 /*
574 * Need to clamp to max of the register values (i.e. no wrap)
575 * for dcn1, all wm registers are 21-bit wide
576 */
577 hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
578 hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
579 hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
556 580
557 REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL, 581 REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL,
558 DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); 582 DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
@@ -903,9 +927,7 @@ void hubbub1_construct(struct hubbub *hubbub,
903 hubbub1->masks = hubbub_mask; 927 hubbub1->masks = hubbub_mask;
904 928
905 hubbub1->debug_test_index_pstate = 0x7; 929 hubbub1->debug_test_index_pstate = 0x7;
906#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
907 if (ctx->dce_version == DCN_VERSION_1_01) 930 if (ctx->dce_version == DCN_VERSION_1_01)
908 hubbub1->debug_test_index_pstate = 0xB; 931 hubbub1->debug_test_index_pstate = 0xB;
909#endif
910} 932}
911 933
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index 85811b24a497..7c2559c9ae23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub,
262 const struct dcn_hubbub_shift *hubbub_shift, 262 const struct dcn_hubbub_shift *hubbub_shift,
263 const struct dcn_hubbub_mask *hubbub_mask); 263 const struct dcn_hubbub_mask *hubbub_mask);
264 264
265void hubbub1_program_urgent_watermarks(
266 struct hubbub *hubbub,
267 struct dcn_watermark_set *watermarks,
268 unsigned int refclk_mhz,
269 bool safe_to_lower);
270void hubbub1_program_stutter_watermarks(
271 struct hubbub *hubbub,
272 struct dcn_watermark_set *watermarks,
273 unsigned int refclk_mhz,
274 bool safe_to_lower);
275void hubbub1_program_pstate_watermarks(
276 struct hubbub *hubbub,
277 struct dcn_watermark_set *watermarks,
278 unsigned int refclk_mhz,
279 bool safe_to_lower);
280
265#endif 281#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 33d311cea28c..66bb0e7db25c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -658,16 +658,15 @@ static enum dc_status dcn10_enable_stream_timing(
658 BREAK_TO_DEBUGGER(); 658 BREAK_TO_DEBUGGER();
659 return DC_ERROR_UNEXPECTED; 659 return DC_ERROR_UNEXPECTED;
660 } 660 }
661 pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
662 pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
663 pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
664 pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
665
666 pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal;
667 661
668 pipe_ctx->stream_res.tg->funcs->program_timing( 662 pipe_ctx->stream_res.tg->funcs->program_timing(
669 pipe_ctx->stream_res.tg, 663 pipe_ctx->stream_res.tg,
670 &stream->timing, 664 &stream->timing,
665 pipe_ctx->pipe_dlg_param.vready_offset,
666 pipe_ctx->pipe_dlg_param.vstartup_start,
667 pipe_ctx->pipe_dlg_param.vupdate_offset,
668 pipe_ctx->pipe_dlg_param.vupdate_width,
669 pipe_ctx->stream->signal,
671 true); 670 true);
672 671
673#if 0 /* move to after enable_crtc */ 672#if 0 /* move to after enable_crtc */
@@ -1756,7 +1755,7 @@ static void dcn10_program_output_csc(struct dc *dc,
1756 1755
1757bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) 1756bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
1758{ 1757{
1759 if (pipe_ctx->plane_state->visible) 1758 if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
1760 return true; 1759 return true;
1761 if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe)) 1760 if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
1762 return true; 1761 return true;
@@ -1765,7 +1764,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
1765 1764
1766bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) 1765bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
1767{ 1766{
1768 if (pipe_ctx->plane_state->visible) 1767 if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
1769 return true; 1768 return true;
1770 if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) 1769 if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
1771 return true; 1770 return true;
@@ -1774,7 +1773,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
1774 1773
1775bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) 1774bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
1776{ 1775{
1777 if (pipe_ctx->plane_state->visible) 1776 if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
1778 return true; 1777 return true;
1779 if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) 1778 if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
1780 return true; 1779 return true;
@@ -1920,7 +1919,7 @@ static uint16_t fixed_point_to_int_frac(
1920 return result; 1919 return result;
1921} 1920}
1922 1921
1923void build_prescale_params(struct dc_bias_and_scale *bias_and_scale, 1922void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
1924 const struct dc_plane_state *plane_state) 1923 const struct dc_plane_state *plane_state)
1925{ 1924{
1926 if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN 1925 if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
@@ -1953,7 +1952,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
1953 plane_state->color_space); 1952 plane_state->color_space);
1954 1953
1955 //set scale and bias registers 1954 //set scale and bias registers
1956 build_prescale_params(&bns_params, plane_state); 1955 dcn10_build_prescale_params(&bns_params, plane_state);
1957 if (dpp->funcs->dpp_program_bias_and_scale) 1956 if (dpp->funcs->dpp_program_bias_and_scale)
1958 dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); 1957 dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
1959} 1958}
@@ -2279,14 +2278,15 @@ static void program_all_pipe_in_tree(
2279 if (pipe_ctx->top_pipe == NULL) { 2278 if (pipe_ctx->top_pipe == NULL) {
2280 bool blank = !is_pipe_tree_visible(pipe_ctx); 2279 bool blank = !is_pipe_tree_visible(pipe_ctx);
2281 2280
2282 pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
2283 pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
2284 pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
2285 pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
2286 pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal;
2287
2288 pipe_ctx->stream_res.tg->funcs->program_global_sync( 2281 pipe_ctx->stream_res.tg->funcs->program_global_sync(
2289 pipe_ctx->stream_res.tg); 2282 pipe_ctx->stream_res.tg,
2283 pipe_ctx->pipe_dlg_param.vready_offset,
2284 pipe_ctx->pipe_dlg_param.vstartup_start,
2285 pipe_ctx->pipe_dlg_param.vupdate_offset,
2286 pipe_ctx->pipe_dlg_param.vupdate_width);
2287
2288 pipe_ctx->stream_res.tg->funcs->set_vtg_params(
2289 pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
2290 2290
2291 dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); 2291 dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
2292 2292
@@ -2644,9 +2644,6 @@ static void dcn10_wait_for_mpcc_disconnect(
2644 res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); 2644 res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
2645 pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; 2645 pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
2646 hubp->funcs->set_blank(hubp, true); 2646 hubp->funcs->set_blank(hubp, true);
2647 /*DC_LOG_ERROR(dc->ctx->logger,
2648 "[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n",
2649 i);*/
2650 } 2647 }
2651 } 2648 }
2652 2649
@@ -2790,7 +2787,6 @@ static void apply_front_porch_workaround(
2790 2787
2791int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) 2788int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
2792{ 2789{
2793 struct timing_generator *optc = pipe_ctx->stream_res.tg;
2794 const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; 2790 const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
2795 struct dc_crtc_timing patched_crtc_timing; 2791 struct dc_crtc_timing patched_crtc_timing;
2796 int vesa_sync_start; 2792 int vesa_sync_start;
@@ -2813,7 +2809,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
2813 * interlace_factor; 2809 * interlace_factor;
2814 2810
2815 vertical_line_start = asic_blank_end - 2811 vertical_line_start = asic_blank_end -
2816 optc->dlg_otg_param.vstartup_start + 1; 2812 pipe_ctx->pipe_dlg_param.vstartup_start + 1;
2817 2813
2818 return vertical_line_start; 2814 return vertical_line_start;
2819} 2815}
@@ -2961,6 +2957,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
2961 } 2957 }
2962} 2958}
2963 2959
2960static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
2961 const uint8_t *custom_sdp_message,
2962 unsigned int sdp_message_size)
2963{
2964 if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
2965 pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message(
2966 pipe_ctx->stream_res.stream_enc,
2967 custom_sdp_message,
2968 sdp_message_size);
2969 }
2970}
2971
2964static const struct hw_sequencer_funcs dcn10_funcs = { 2972static const struct hw_sequencer_funcs dcn10_funcs = {
2965 .program_gamut_remap = program_gamut_remap, 2973 .program_gamut_remap = program_gamut_remap,
2966 .init_hw = dcn10_init_hw, 2974 .init_hw = dcn10_init_hw,
@@ -2980,6 +2988,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
2980 .enable_timing_synchronization = dcn10_enable_timing_synchronization, 2988 .enable_timing_synchronization = dcn10_enable_timing_synchronization,
2981 .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, 2989 .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
2982 .update_info_frame = dce110_update_info_frame, 2990 .update_info_frame = dce110_update_info_frame,
2991 .send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
2983 .enable_stream = dce110_enable_stream, 2992 .enable_stream = dce110_enable_stream,
2984 .disable_stream = dce110_disable_stream, 2993 .disable_stream = dce110_disable_stream,
2985 .unblank_stream = dcn10_unblank_stream, 2994 .unblank_stream = dcn10_unblank_stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 4b3b27a5d23b..ef94d6b15843 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream(
83 83
84int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); 84int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
85 85
86void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
87 const struct dc_plane_state *plane_state);
86void lock_all_pipes(struct dc *dc, 88void lock_all_pipes(struct dc *dc,
87 struct dc_state *context, 89 struct dc_state *context,
88 bool lock); 90 bool lock);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
index 0126a44ba012..e25ae43f8d32 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -726,6 +726,8 @@ void dcn10_link_encoder_construct(
726 enc10->base.features.flags.bits.IS_HBR3_CAPABLE = 726 enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
727 bp_cap_info.DP_HBR3_EN; 727 bp_cap_info.DP_HBR3_EN;
728 enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; 728 enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
729 enc10->base.features.flags.bits.DP_IS_USB_C =
730 bp_cap_info.DP_IS_USB_C;
729 } else { 731 } else {
730 DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", 732 DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
731 __func__, 733 __func__,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 0345d51e9d6f..533b0f3cf6c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -46,9 +46,7 @@
46* This is a workaround for a bug that has existed since R5xx and has not been 46* This is a workaround for a bug that has existed since R5xx and has not been
47* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. 47* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
48*/ 48*/
49static void optc1_apply_front_porch_workaround( 49static void apply_front_porch_workaround(struct dc_crtc_timing *timing)
50 struct timing_generator *optc,
51 struct dc_crtc_timing *timing)
52{ 50{
53 if (timing->flags.INTERLACE == 1) { 51 if (timing->flags.INTERLACE == 1) {
54 if (timing->v_front_porch < 2) 52 if (timing->v_front_porch < 2)
@@ -60,24 +58,33 @@ static void optc1_apply_front_porch_workaround(
60} 58}
61 59
62void optc1_program_global_sync( 60void optc1_program_global_sync(
63 struct timing_generator *optc) 61 struct timing_generator *optc,
62 int vready_offset,
63 int vstartup_start,
64 int vupdate_offset,
65 int vupdate_width)
64{ 66{
65 struct optc *optc1 = DCN10TG_FROM_TG(optc); 67 struct optc *optc1 = DCN10TG_FROM_TG(optc);
66 68
67 if (optc->dlg_otg_param.vstartup_start == 0) { 69 optc1->vready_offset = vready_offset;
70 optc1->vstartup_start = vstartup_start;
71 optc1->vupdate_offset = vupdate_offset;
72 optc1->vupdate_width = vupdate_width;
73
74 if (optc1->vstartup_start == 0) {
68 BREAK_TO_DEBUGGER(); 75 BREAK_TO_DEBUGGER();
69 return; 76 return;
70 } 77 }
71 78
72 REG_SET(OTG_VSTARTUP_PARAM, 0, 79 REG_SET(OTG_VSTARTUP_PARAM, 0,
73 VSTARTUP_START, optc->dlg_otg_param.vstartup_start); 80 VSTARTUP_START, optc1->vstartup_start);
74 81
75 REG_SET_2(OTG_VUPDATE_PARAM, 0, 82 REG_SET_2(OTG_VUPDATE_PARAM, 0,
76 VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset, 83 VUPDATE_OFFSET, optc1->vupdate_offset,
77 VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width); 84 VUPDATE_WIDTH, optc1->vupdate_width);
78 85
79 REG_SET(OTG_VREADY_PARAM, 0, 86 REG_SET(OTG_VREADY_PARAM, 0,
80 VREADY_OFFSET, optc->dlg_otg_param.vready_offset); 87 VREADY_OFFSET, optc1->vready_offset);
81} 88}
82 89
83static void optc1_disable_stereo(struct timing_generator *optc) 90static void optc1_disable_stereo(struct timing_generator *optc)
@@ -132,25 +139,32 @@ void optc1_setup_vertical_interrupt2(
132void optc1_program_timing( 139void optc1_program_timing(
133 struct timing_generator *optc, 140 struct timing_generator *optc,
134 const struct dc_crtc_timing *dc_crtc_timing, 141 const struct dc_crtc_timing *dc_crtc_timing,
142 int vready_offset,
143 int vstartup_start,
144 int vupdate_offset,
145 int vupdate_width,
146 const enum signal_type signal,
135 bool use_vbios) 147 bool use_vbios)
136{ 148{
137 struct dc_crtc_timing patched_crtc_timing; 149 struct dc_crtc_timing patched_crtc_timing;
138 uint32_t vesa_sync_start;
139 uint32_t asic_blank_end; 150 uint32_t asic_blank_end;
140 uint32_t asic_blank_start; 151 uint32_t asic_blank_start;
141 uint32_t v_total; 152 uint32_t v_total;
142 uint32_t v_sync_end; 153 uint32_t v_sync_end;
143 uint32_t v_init, v_fp2;
144 uint32_t h_sync_polarity, v_sync_polarity; 154 uint32_t h_sync_polarity, v_sync_polarity;
145 uint32_t start_point = 0; 155 uint32_t start_point = 0;
146 uint32_t field_num = 0; 156 uint32_t field_num = 0;
147 uint32_t h_div_2; 157 uint32_t h_div_2;
148 int32_t vertical_line_start;
149 158
150 struct optc *optc1 = DCN10TG_FROM_TG(optc); 159 struct optc *optc1 = DCN10TG_FROM_TG(optc);
151 160
161 optc1->signal = signal;
162 optc1->vready_offset = vready_offset;
163 optc1->vstartup_start = vstartup_start;
164 optc1->vupdate_offset = vupdate_offset;
165 optc1->vupdate_width = vupdate_width;
152 patched_crtc_timing = *dc_crtc_timing; 166 patched_crtc_timing = *dc_crtc_timing;
153 optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); 167 apply_front_porch_workaround(&patched_crtc_timing);
154 168
155 /* Load horizontal timing */ 169 /* Load horizontal timing */
156 170
@@ -163,24 +177,16 @@ void optc1_program_timing(
163 OTG_H_SYNC_A_START, 0, 177 OTG_H_SYNC_A_START, 0,
164 OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); 178 OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width);
165 179
166 /* asic_h_blank_end = HsyncWidth + HbackPorch = 180 /* blank_start = line end - front porch */
167 * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - 181 asic_blank_start = patched_crtc_timing.h_total -
168 * vesa.h_left_border
169 */
170 vesa_sync_start = patched_crtc_timing.h_addressable +
171 patched_crtc_timing.h_border_right +
172 patched_crtc_timing.h_front_porch; 182 patched_crtc_timing.h_front_porch;
173 183
174 asic_blank_end = patched_crtc_timing.h_total - 184 /* blank_end = blank_start - active */
175 vesa_sync_start - 185 asic_blank_end = asic_blank_start -
186 patched_crtc_timing.h_border_right -
187 patched_crtc_timing.h_addressable -
176 patched_crtc_timing.h_border_left; 188 patched_crtc_timing.h_border_left;
177 189
178 /* h_blank_start = v_blank_end + v_active */
179 asic_blank_start = asic_blank_end +
180 patched_crtc_timing.h_border_left +
181 patched_crtc_timing.h_addressable +
182 patched_crtc_timing.h_border_right;
183
184 REG_UPDATE_2(OTG_H_BLANK_START_END, 190 REG_UPDATE_2(OTG_H_BLANK_START_END,
185 OTG_H_BLANK_START, asic_blank_start, 191 OTG_H_BLANK_START, asic_blank_start,
186 OTG_H_BLANK_END, asic_blank_end); 192 OTG_H_BLANK_END, asic_blank_end);
@@ -212,24 +218,15 @@ void optc1_program_timing(
212 OTG_V_SYNC_A_START, 0, 218 OTG_V_SYNC_A_START, 0,
213 OTG_V_SYNC_A_END, v_sync_end); 219 OTG_V_SYNC_A_END, v_sync_end);
214 220
215 vesa_sync_start = patched_crtc_timing.v_addressable + 221 /* blank_start = frame end - front porch */
216 patched_crtc_timing.v_border_bottom + 222 asic_blank_start = patched_crtc_timing.v_total -
217 patched_crtc_timing.v_front_porch; 223 patched_crtc_timing.v_front_porch;
218 224
219 asic_blank_end = (patched_crtc_timing.v_total - 225 /* blank_end = blank_start - active */
220 vesa_sync_start - 226 asic_blank_end = asic_blank_start -
221 patched_crtc_timing.v_border_top); 227 patched_crtc_timing.v_border_bottom -
222 228 patched_crtc_timing.v_addressable -
223 /* v_blank_start = v_blank_end + v_active */ 229 patched_crtc_timing.v_border_top;
224 asic_blank_start = asic_blank_end +
225 (patched_crtc_timing.v_border_top +
226 patched_crtc_timing.v_addressable +
227 patched_crtc_timing.v_border_bottom);
228
229 vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
230 v_fp2 = 0;
231 if (vertical_line_start < 0)
232 v_fp2 = -vertical_line_start;
233 230
234 REG_UPDATE_2(OTG_V_BLANK_START_END, 231 REG_UPDATE_2(OTG_V_BLANK_START_END,
235 OTG_V_BLANK_START, asic_blank_start, 232 OTG_V_BLANK_START, asic_blank_start,
@@ -242,10 +239,9 @@ void optc1_program_timing(
242 REG_UPDATE(OTG_V_SYNC_A_CNTL, 239 REG_UPDATE(OTG_V_SYNC_A_CNTL,
243 OTG_V_SYNC_A_POL, v_sync_polarity); 240 OTG_V_SYNC_A_POL, v_sync_polarity);
244 241
245 v_init = asic_blank_start; 242 if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT ||
246 if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || 243 optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
247 optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || 244 optc1->signal == SIGNAL_TYPE_EDP) {
248 optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) {
249 start_point = 1; 245 start_point = 1;
250 if (patched_crtc_timing.flags.INTERLACE == 1) 246 if (patched_crtc_timing.flags.INTERLACE == 1)
251 field_num = 1; 247 field_num = 1;
@@ -253,13 +249,10 @@ void optc1_program_timing(
253 249
254 /* Interlace */ 250 /* Interlace */
255 if (REG(OTG_INTERLACE_CONTROL)) { 251 if (REG(OTG_INTERLACE_CONTROL)) {
256 if (patched_crtc_timing.flags.INTERLACE == 1) { 252 if (patched_crtc_timing.flags.INTERLACE == 1)
257 REG_UPDATE(OTG_INTERLACE_CONTROL, 253 REG_UPDATE(OTG_INTERLACE_CONTROL,
258 OTG_INTERLACE_ENABLE, 1); 254 OTG_INTERLACE_ENABLE, 1);
259 v_init = v_init / 2; 255 else
260 if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end)
261 v_fp2 = v_fp2 / 2;
262 } else
263 REG_UPDATE(OTG_INTERLACE_CONTROL, 256 REG_UPDATE(OTG_INTERLACE_CONTROL,
264 OTG_INTERLACE_ENABLE, 0); 257 OTG_INTERLACE_ENABLE, 0);
265 } 258 }
@@ -268,16 +261,18 @@ void optc1_program_timing(
268 REG_UPDATE(CONTROL, 261 REG_UPDATE(CONTROL,
269 VTG0_ENABLE, 0); 262 VTG0_ENABLE, 0);
270 263
271 REG_UPDATE_2(CONTROL,
272 VTG0_FP2, v_fp2,
273 VTG0_VCOUNT_INIT, v_init);
274
275 /* original code is using VTG offset to address OTG reg, seems wrong */ 264 /* original code is using VTG offset to address OTG reg, seems wrong */
276 REG_UPDATE_2(OTG_CONTROL, 265 REG_UPDATE_2(OTG_CONTROL,
277 OTG_START_POINT_CNTL, start_point, 266 OTG_START_POINT_CNTL, start_point,
278 OTG_FIELD_NUMBER_CNTL, field_num); 267 OTG_FIELD_NUMBER_CNTL, field_num);
279 268
280 optc1_program_global_sync(optc); 269 optc->funcs->program_global_sync(optc,
270 vready_offset,
271 vstartup_start,
272 vupdate_offset,
273 vupdate_width);
274
275 optc->funcs->set_vtg_params(optc, dc_crtc_timing);
281 276
282 /* TODO 277 /* TODO
283 * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 278 * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1
@@ -296,6 +291,48 @@ void optc1_program_timing(
296 291
297} 292}
298 293
294void optc1_set_vtg_params(struct timing_generator *optc,
295 const struct dc_crtc_timing *dc_crtc_timing)
296{
297 struct dc_crtc_timing patched_crtc_timing;
298 uint32_t asic_blank_end;
299 uint32_t v_init;
300 uint32_t v_fp2 = 0;
301 int32_t vertical_line_start;
302
303 struct optc *optc1 = DCN10TG_FROM_TG(optc);
304
305 patched_crtc_timing = *dc_crtc_timing;
306 apply_front_porch_workaround(&patched_crtc_timing);
307
308 /* VCOUNT_INIT is the start of blank */
309 v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch;
310
311 /* end of blank = v_init - active */
312 asic_blank_end = v_init -
313 patched_crtc_timing.v_border_bottom -
314 patched_crtc_timing.v_addressable -
315 patched_crtc_timing.v_border_top;
316
317 /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */
318 vertical_line_start = asic_blank_end - optc1->vstartup_start + 1;
319 if (vertical_line_start < 0)
320 v_fp2 = -vertical_line_start;
321
322 /* Interlace */
323 if (REG(OTG_INTERLACE_CONTROL)) {
324 if (patched_crtc_timing.flags.INTERLACE == 1) {
325 v_init = v_init / 2;
326 if ((optc1->vstartup_start/2)*2 > asic_blank_end)
327 v_fp2 = v_fp2 / 2;
328 }
329 }
330
331 REG_UPDATE_2(CONTROL,
332 VTG0_FP2, v_fp2,
333 VTG0_VCOUNT_INIT, v_init);
334}
335
299void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) 336void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
300{ 337{
301 struct optc *optc1 = DCN10TG_FROM_TG(optc); 338 struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -1420,6 +1457,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
1420 .clear_optc_underflow = optc1_clear_optc_underflow, 1457 .clear_optc_underflow = optc1_clear_optc_underflow,
1421 .get_crc = optc1_get_crc, 1458 .get_crc = optc1_get_crc,
1422 .configure_crc = optc1_configure_crc, 1459 .configure_crc = optc1_configure_crc,
1460 .set_vtg_params = optc1_set_vtg_params,
1423}; 1461};
1424 1462
1425void dcn10_timing_generator_init(struct optc *optc1) 1463void dcn10_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 4eb9a898c237..651b8caa4b9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -446,6 +446,12 @@ struct optc {
446 uint32_t min_v_sync_width; 446 uint32_t min_v_sync_width;
447 uint32_t min_v_blank; 447 uint32_t min_v_blank;
448 uint32_t min_v_blank_interlace; 448 uint32_t min_v_blank_interlace;
449
450 int vstartup_start;
451 int vupdate_offset;
452 int vupdate_width;
453 int vready_offset;
454 enum signal_type signal;
449}; 455};
450 456
451void dcn10_timing_generator_init(struct optc *optc); 457void dcn10_timing_generator_init(struct optc *optc);
@@ -481,6 +487,11 @@ bool optc1_validate_timing(
481void optc1_program_timing( 487void optc1_program_timing(
482 struct timing_generator *optc, 488 struct timing_generator *optc,
483 const struct dc_crtc_timing *dc_crtc_timing, 489 const struct dc_crtc_timing *dc_crtc_timing,
490 int vready_offset,
491 int vstartup_start,
492 int vupdate_offset,
493 int vupdate_width,
494 const enum signal_type signal,
484 bool use_vbios); 495 bool use_vbios);
485 496
486void optc1_setup_vertical_interrupt0( 497void optc1_setup_vertical_interrupt0(
@@ -495,7 +506,11 @@ void optc1_setup_vertical_interrupt2(
495 uint32_t start_line); 506 uint32_t start_line);
496 507
497void optc1_program_global_sync( 508void optc1_program_global_sync(
498 struct timing_generator *optc); 509 struct timing_generator *optc,
510 int vready_offset,
511 int vstartup_start,
512 int vupdate_offset,
513 int vupdate_width);
499 514
500bool optc1_disable_crtc(struct timing_generator *optc); 515bool optc1_disable_crtc(struct timing_generator *optc);
501 516
@@ -582,4 +597,7 @@ bool optc1_get_crc(struct timing_generator *optc,
582 597
583bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); 598bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
584 599
600void optc1_set_vtg_params(struct timing_generator *optc,
601 const struct dc_crtc_timing *dc_crtc_timing);
602
585#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ 603#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 7eccb54c421d..bfddd51294a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -29,7 +29,6 @@
29#include "resource.h" 29#include "resource.h"
30#include "include/irq_service_interface.h" 30#include "include/irq_service_interface.h"
31#include "dcn10_resource.h" 31#include "dcn10_resource.h"
32
33#include "dcn10_ipp.h" 32#include "dcn10_ipp.h"
34#include "dcn10_mpc.h" 33#include "dcn10_mpc.h"
35#include "irq/dcn10/irq_service_dcn10.h" 34#include "irq/dcn10/irq_service_dcn10.h"
@@ -153,9 +152,7 @@ enum dcn10_clk_src_array_id {
153 DCN10_CLK_SRC_PLL2, 152 DCN10_CLK_SRC_PLL2,
154 DCN10_CLK_SRC_PLL3, 153 DCN10_CLK_SRC_PLL3,
155 DCN10_CLK_SRC_TOTAL, 154 DCN10_CLK_SRC_TOTAL,
156#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
157 DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 155 DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3
158#endif
159}; 156};
160 157
161/* begin ********************* 158/* begin *********************
@@ -445,7 +442,6 @@ static const struct bios_registers bios_regs = {
445 HUBP_REG_LIST_DCN10(id)\ 442 HUBP_REG_LIST_DCN10(id)\
446} 443}
447 444
448
449static const struct dcn_mi_registers hubp_regs[] = { 445static const struct dcn_mi_registers hubp_regs[] = {
450 hubp_regs(0), 446 hubp_regs(0),
451 hubp_regs(1), 447 hubp_regs(1),
@@ -461,7 +457,6 @@ static const struct dcn_mi_mask hubp_mask = {
461 HUBP_MASK_SH_LIST_DCN10(_MASK) 457 HUBP_MASK_SH_LIST_DCN10(_MASK)
462}; 458};
463 459
464
465static const struct dcn_hubbub_registers hubbub_reg = { 460static const struct dcn_hubbub_registers hubbub_reg = {
466 HUBBUB_REG_LIST_DCN10(0) 461 HUBBUB_REG_LIST_DCN10(0)
467}; 462};
@@ -494,6 +489,27 @@ static const struct dce110_clk_src_mask cs_mask = {
494 CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) 489 CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK)
495}; 490};
496 491
492
493#define mmMP1_SMN_C2PMSG_91 0x1629B
494#define mmMP1_SMN_C2PMSG_83 0x16293
495#define mmMP1_SMN_C2PMSG_67 0x16283
496
497#define MP1_SMN_C2PMSG_91__CONTENT_MASK 0xffffffffL
498#define MP1_SMN_C2PMSG_83__CONTENT_MASK 0xffffffffL
499#define MP1_SMN_C2PMSG_67__CONTENT_MASK 0xffffffffL
500#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT 0x00000000
501#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT 0x00000000
502#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT 0x00000000
503
504
505static const struct clk_mgr_shift clk_mgr_shift = {
506 CLK_MASK_SH_LIST_RV1(__SHIFT)
507};
508
509static const struct clk_mgr_mask clk_mgr_mask = {
510 CLK_MASK_SH_LIST_RV1(_MASK)
511};
512
497static const struct resource_caps res_cap = { 513static const struct resource_caps res_cap = {
498 .num_timing_generator = 4, 514 .num_timing_generator = 4,
499 .num_opp = 4, 515 .num_opp = 4,
@@ -504,7 +520,6 @@ static const struct resource_caps res_cap = {
504 .num_ddc = 4, 520 .num_ddc = 4,
505}; 521};
506 522
507#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
508static const struct resource_caps rv2_res_cap = { 523static const struct resource_caps rv2_res_cap = {
509 .num_timing_generator = 3, 524 .num_timing_generator = 3,
510 .num_opp = 3, 525 .num_opp = 3,
@@ -514,7 +529,6 @@ static const struct resource_caps rv2_res_cap = {
514 .num_pll = 3, 529 .num_pll = 3,
515 .num_ddc = 3, 530 .num_ddc = 3,
516}; 531};
517#endif
518 532
519static const struct dc_plane_cap plane_cap = { 533static const struct dc_plane_cap plane_cap = {
520 .type = DC_PLANE_TYPE_DCN_UNIVERSAL, 534 .type = DC_PLANE_TYPE_DCN_UNIVERSAL,
@@ -1217,6 +1231,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan
1217 return result; 1231 return result;
1218} 1232}
1219 1233
1234struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
1235 struct resource_context *res_ctx,
1236 const struct resource_pool *pool,
1237 struct dc_stream_state *stream)
1238{
1239 int i;
1240 int j = -1;
1241 struct dc_link *link = stream->link;
1242
1243 for (i = 0; i < pool->stream_enc_count; i++) {
1244 if (!res_ctx->is_stream_enc_acquired[i] &&
1245 pool->stream_enc[i]) {
1246 /* Store first available for MST second display
1247 * in daisy chain use case
1248 */
1249 j = i;
1250 if (pool->stream_enc[i]->id ==
1251 link->link_enc->preferred_engine)
1252 return pool->stream_enc[i];
1253 }
1254 }
1255
1256 /*
1257 * For CZ and later, we can allow DIG FE and BE to differ for all display types
1258 */
1259
1260 if (j >= 0)
1261 return pool->stream_enc[j];
1262
1263 return NULL;
1264}
1265
1220static const struct dc_cap_funcs cap_funcs = { 1266static const struct dc_cap_funcs cap_funcs = {
1221 .get_dcc_compression_cap = dcn10_get_dcc_compression_cap 1267 .get_dcc_compression_cap = dcn10_get_dcc_compression_cap
1222}; 1268};
@@ -1229,7 +1275,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {
1229 .validate_plane = dcn10_validate_plane, 1275 .validate_plane = dcn10_validate_plane,
1230 .validate_global = dcn10_validate_global, 1276 .validate_global = dcn10_validate_global,
1231 .add_stream_to_ctx = dcn10_add_stream_to_ctx, 1277 .add_stream_to_ctx = dcn10_add_stream_to_ctx,
1232 .get_default_swizzle_mode = dcn10_get_default_swizzle_mode 1278 .get_default_swizzle_mode = dcn10_get_default_swizzle_mode,
1279 .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
1233}; 1280};
1234 1281
1235static uint32_t read_pipe_fuses(struct dc_context *ctx) 1282static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1252,11 +1299,9 @@ static bool construct(
1252 1299
1253 ctx->dc_bios->regs = &bios_regs; 1300 ctx->dc_bios->regs = &bios_regs;
1254 1301
1255#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
1256 if (ctx->dce_version == DCN_VERSION_1_01) 1302 if (ctx->dce_version == DCN_VERSION_1_01)
1257 pool->base.res_cap = &rv2_res_cap; 1303 pool->base.res_cap = &rv2_res_cap;
1258 else 1304 else
1259#endif
1260 pool->base.res_cap = &res_cap; 1305 pool->base.res_cap = &res_cap;
1261 pool->base.funcs = &dcn10_res_pool_funcs; 1306 pool->base.funcs = &dcn10_res_pool_funcs;
1262 1307
@@ -1273,10 +1318,8 @@ static bool construct(
1273 /* max pipe num for ASIC before check pipe fuses */ 1318 /* max pipe num for ASIC before check pipe fuses */
1274 pool->base.pipe_count = pool->base.res_cap->num_timing_generator; 1319 pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
1275 1320
1276#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
1277 if (dc->ctx->dce_version == DCN_VERSION_1_01) 1321 if (dc->ctx->dce_version == DCN_VERSION_1_01)
1278 pool->base.pipe_count = 3; 1322 pool->base.pipe_count = 3;
1279#endif
1280 dc->caps.max_video_width = 3840; 1323 dc->caps.max_video_width = 3840;
1281 dc->caps.max_downscale_ratio = 200; 1324 dc->caps.max_downscale_ratio = 200;
1282 dc->caps.i2c_speed_in_khz = 100; 1325 dc->caps.i2c_speed_in_khz = 100;
@@ -1309,26 +1352,17 @@ static bool construct(
1309 CLOCK_SOURCE_COMBO_PHY_PLL2, 1352 CLOCK_SOURCE_COMBO_PHY_PLL2,
1310 &clk_src_regs[2], false); 1353 &clk_src_regs[2], false);
1311 1354
1312#ifdef CONFIG_DRM_AMD_DC_DCN1_01
1313 if (dc->ctx->dce_version == DCN_VERSION_1_0) { 1355 if (dc->ctx->dce_version == DCN_VERSION_1_0) {
1314 pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = 1356 pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
1315 dcn10_clock_source_create(ctx, ctx->dc_bios, 1357 dcn10_clock_source_create(ctx, ctx->dc_bios,
1316 CLOCK_SOURCE_COMBO_PHY_PLL3, 1358 CLOCK_SOURCE_COMBO_PHY_PLL3,
1317 &clk_src_regs[3], false); 1359 &clk_src_regs[3], false);
1318 } 1360 }
1319#else
1320 pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
1321 dcn10_clock_source_create(ctx, ctx->dc_bios,
1322 CLOCK_SOURCE_COMBO_PHY_PLL3,
1323 &clk_src_regs[3], false);
1324#endif
1325 1361
1326 pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; 1362 pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL;
1327 1363
1328#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
1329 if (dc->ctx->dce_version == DCN_VERSION_1_01) 1364 if (dc->ctx->dce_version == DCN_VERSION_1_01)
1330 pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; 1365 pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL;
1331#endif
1332 1366
1333 pool->base.dp_clock_source = 1367 pool->base.dp_clock_source =
1334 dcn10_clock_source_create(ctx, ctx->dc_bios, 1368 dcn10_clock_source_create(ctx, ctx->dc_bios,
@@ -1343,12 +1377,6 @@ static bool construct(
1343 goto fail; 1377 goto fail;
1344 } 1378 }
1345 } 1379 }
1346 pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
1347 if (pool->base.clk_mgr == NULL) {
1348 dm_error("DC: failed to create display clock!\n");
1349 BREAK_TO_DEBUGGER();
1350 goto fail;
1351 }
1352 1380
1353 pool->base.dmcu = dcn10_dmcu_create(ctx, 1381 pool->base.dmcu = dcn10_dmcu_create(ctx,
1354 &dmcu_regs, 1382 &dmcu_regs,
@@ -1374,7 +1402,6 @@ static bool construct(
1374 memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); 1402 memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
1375 memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); 1403 memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
1376 1404
1377#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
1378 if (dc->ctx->dce_version == DCN_VERSION_1_01) { 1405 if (dc->ctx->dce_version == DCN_VERSION_1_01) {
1379 struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; 1406 struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
1380 struct dcn_ip_params *dcn_ip = dc->dcn_ip; 1407 struct dcn_ip_params *dcn_ip = dc->dcn_ip;
@@ -1385,7 +1412,6 @@ static bool construct(
1385 dcn_soc->dram_clock_change_latency = 23; 1412 dcn_soc->dram_clock_change_latency = 23;
1386 dcn_ip->max_num_dpp = 3; 1413 dcn_ip->max_num_dpp = 3;
1387 } 1414 }
1388#endif
1389 if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { 1415 if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
1390 dc->dcn_soc->urgent_latency = 3; 1416 dc->dcn_soc->urgent_latency = 3;
1391 dc->debug.disable_dmcu = true; 1417 dc->debug.disable_dmcu = true;
@@ -1410,6 +1436,13 @@ static bool construct(
1410 1436
1411 pool->base.pp_smu = dcn10_pp_smu_create(ctx); 1437 pool->base.pp_smu = dcn10_pp_smu_create(ctx);
1412 1438
1439 pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
1440 if (pool->base.clk_mgr == NULL) {
1441 dm_error("DC: failed to create display clock!\n");
1442 BREAK_TO_DEBUGGER();
1443 goto fail;
1444 }
1445
1413 if (!dc->debug.disable_pplib_clock_request) 1446 if (!dc->debug.disable_pplib_clock_request)
1414 dcn_bw_update_from_pplib(dc); 1447 dcn_bw_update_from_pplib(dc);
1415 dcn_bw_sync_calcs_and_dml(dc); 1448 dcn_bw_sync_calcs_and_dml(dc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
index 999c684a0b36..633025ccb870 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
@@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool(
42 const struct dc_init_data *init_data, 42 const struct dc_init_data *init_data,
43 struct dc *dc); 43 struct dc *dc);
44 44
45struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
46 struct resource_context *res_ctx,
47 const struct resource_pool *pool,
48 struct dc_stream_state *stream);
49
45 50
46#endif /* __DC_RESOURCE_DCN10_H__ */ 51#endif /* __DC_RESOURCE_DCN10_H__ */
47 52
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 8ee9f6dc1d62..ba71b5224e7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -415,6 +415,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
415 case COLOR_SPACE_APPCTRL: 415 case COLOR_SPACE_APPCTRL:
416 case COLOR_SPACE_CUSTOMPOINTS: 416 case COLOR_SPACE_CUSTOMPOINTS:
417 case COLOR_SPACE_UNKNOWN: 417 case COLOR_SPACE_UNKNOWN:
418 case COLOR_SPACE_YCBCR709_BLACK:
418 /* do nothing */ 419 /* do nothing */
419 break; 420 break;
420 } 421 }
@@ -726,11 +727,9 @@ void enc1_stream_encoder_update_dp_info_packets(
726 3, /* packetIndex */ 727 3, /* packetIndex */
727 &info_frame->hdrsmd); 728 &info_frame->hdrsmd);
728 729
729 if (info_frame->dpsdp.valid) 730 /* packetIndex 4 is used for send immediate sdp message, and please
730 enc1_update_generic_info_packet( 731 * use other packetIndex (such as 5,6) for other info packet
731 enc1, 732 */
732 4,/* packetIndex */
733 &info_frame->dpsdp);
734 733
735 /* enable/disable transmission of packet(s). 734 /* enable/disable transmission of packet(s).
736 * If enabled, packet transmission begins on the next frame 735 * If enabled, packet transmission begins on the next frame
@@ -738,7 +737,101 @@ void enc1_stream_encoder_update_dp_info_packets(
738 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); 737 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
739 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); 738 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
740 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); 739 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
741 REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid); 740
741
742 /* This bit is the master enable bit.
743 * When enabling secondary stream engine,
744 * this master bit must also be set.
745 * This register shared with audio info frame.
746 * Therefore we need to enable master bit
747 * if at least on of the fields is not 0
748 */
749 value = REG_READ(DP_SEC_CNTL);
750 if (value)
751 REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
752}
753
754void enc1_stream_encoder_send_immediate_sdp_message(
755 struct stream_encoder *enc,
756 const uint8_t *custom_sdp_message,
757 unsigned int sdp_message_size)
758{
759 struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
760 uint32_t value = 0;
761
762 /* TODOFPGA Figure out a proper number for max_retries polling for lock
763 * use 50 for now.
764 */
765 uint32_t max_retries = 50;
766
767 /* check if GSP4 is transmitted */
768 REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING,
769 0, 10, max_retries);
770
771 /* disable GSP4 transmitting */
772 REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0);
773
774 /* transmit GSP4 at the earliest time in a frame */
775 REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1);
776
777 /*we need turn on clock before programming AFMT block*/
778 REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
779
780 /* check if HW reading GSP memory */
781 REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT,
782 0, 10, max_retries);
783
784 /* HW does is not reading GSP memory not reading too long ->
785 * something wrong. clear GPS memory access and notify?
786 * hw SW is writing to GSP memory
787 */
788 REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
789
790 /* use generic packet 4 for immediate sdp message */
791 REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
792 AFMT_GENERIC_INDEX, 4);
793
794 /* write generic packet header
795 * (4th byte is for GENERIC0 only)
796 */
797 REG_SET_4(AFMT_GENERIC_HDR, 0,
798 AFMT_GENERIC_HB0, custom_sdp_message[0],
799 AFMT_GENERIC_HB1, custom_sdp_message[1],
800 AFMT_GENERIC_HB2, custom_sdp_message[2],
801 AFMT_GENERIC_HB3, custom_sdp_message[3]);
802
803 /* write generic packet contents
804 * (we never use last 4 bytes)
805 * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers
806 */
807 {
808 const uint32_t *content =
809 (const uint32_t *) &custom_sdp_message[4];
810
811 REG_WRITE(AFMT_GENERIC_0, *content++);
812 REG_WRITE(AFMT_GENERIC_1, *content++);
813 REG_WRITE(AFMT_GENERIC_2, *content++);
814 REG_WRITE(AFMT_GENERIC_3, *content++);
815 REG_WRITE(AFMT_GENERIC_4, *content++);
816 REG_WRITE(AFMT_GENERIC_5, *content++);
817 REG_WRITE(AFMT_GENERIC_6, *content++);
818 REG_WRITE(AFMT_GENERIC_7, *content);
819 }
820
821 /* check whether GENERIC4 registers double buffer update in immediate mode
822 * is pending
823 */
824 REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING,
825 0, 10, max_retries);
826
827 /* atomically update double-buffered GENERIC4 registers in immediate mode
828 * (update immediately)
829 */
830 REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
831 AFMT_GENERIC4_IMMEDIATE_UPDATE, 1);
832
833 /* enable GSP4 transmitting */
834 REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1);
742 835
743 /* This bit is the master enable bit. 836 /* This bit is the master enable bit.
744 * When enabling secondary stream engine, 837 * When enabling secondary stream engine,
@@ -1462,6 +1555,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
1462 enc1_stream_encoder_stop_hdmi_info_packets, 1555 enc1_stream_encoder_stop_hdmi_info_packets,
1463 .update_dp_info_packets = 1556 .update_dp_info_packets =
1464 enc1_stream_encoder_update_dp_info_packets, 1557 enc1_stream_encoder_update_dp_info_packets,
1558 .send_immediate_sdp_message =
1559 enc1_stream_encoder_send_immediate_sdp_message,
1465 .stop_dp_info_packets = 1560 .stop_dp_info_packets =
1466 enc1_stream_encoder_stop_dp_info_packets, 1561 enc1_stream_encoder_stop_dp_info_packets,
1467 .dp_blank = 1562 .dp_blank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index e654c2f55971..a292b106a8b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -81,6 +81,7 @@
81 SRI(DP_MSE_RATE_UPDATE, DP, id), \ 81 SRI(DP_MSE_RATE_UPDATE, DP, id), \
82 SRI(DP_PIXEL_FORMAT, DP, id), \ 82 SRI(DP_PIXEL_FORMAT, DP, id), \
83 SRI(DP_SEC_CNTL, DP, id), \ 83 SRI(DP_SEC_CNTL, DP, id), \
84 SRI(DP_SEC_CNTL2, DP, id), \
84 SRI(DP_STEER_FIFO, DP, id), \ 85 SRI(DP_STEER_FIFO, DP, id), \
85 SRI(DP_VID_M, DP, id), \ 86 SRI(DP_VID_M, DP, id), \
86 SRI(DP_VID_N, DP, id), \ 87 SRI(DP_VID_N, DP, id), \
@@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers {
118 uint32_t AFMT_60958_1; 119 uint32_t AFMT_60958_1;
119 uint32_t AFMT_60958_2; 120 uint32_t AFMT_60958_2;
120 uint32_t DIG_FE_CNTL; 121 uint32_t DIG_FE_CNTL;
122 uint32_t DIG_FE_CNTL2;
121 uint32_t DP_MSE_RATE_CNTL; 123 uint32_t DP_MSE_RATE_CNTL;
122 uint32_t DP_MSE_RATE_UPDATE; 124 uint32_t DP_MSE_RATE_UPDATE;
123 uint32_t DP_PIXEL_FORMAT; 125 uint32_t DP_PIXEL_FORMAT;
124 uint32_t DP_SEC_CNTL; 126 uint32_t DP_SEC_CNTL;
127 uint32_t DP_SEC_CNTL2;
125 uint32_t DP_STEER_FIFO; 128 uint32_t DP_STEER_FIFO;
126 uint32_t DP_VID_M; 129 uint32_t DP_VID_M;
127 uint32_t DP_VID_N; 130 uint32_t DP_VID_N;
@@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers {
191 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ 194 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
192 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ 195 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
193 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ 196 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
197 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
198 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
199 SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
200 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
194 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ 201 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
195 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ 202 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
196 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ 203 SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
@@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers {
245 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\ 252 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\
246 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\ 253 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\
247 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ 254 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\
255 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\
248 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\ 256 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\
249 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\ 257 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\
250 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ 258 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\
@@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers {
253 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ 261 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\
254 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ 262 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\
255 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ 263 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\
264 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\
256 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ 265 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\
257 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ 266 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\
258 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ 267 SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\
@@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers {
260 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ 269 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
261 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ 270 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
262 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ 271 SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
272 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\
263 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\ 273 SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
264 SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ 274 SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
265 SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ 275 SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
@@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers {
304 type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\ 314 type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\
305 type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\ 315 type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\
306 type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ 316 type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\
317 type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\
307 type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\ 318 type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\
308 type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\ 319 type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\
309 type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ 320 type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\
@@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers {
312 type AFMT_GENERIC2_FRAME_UPDATE;\ 323 type AFMT_GENERIC2_FRAME_UPDATE;\
313 type AFMT_GENERIC3_FRAME_UPDATE;\ 324 type AFMT_GENERIC3_FRAME_UPDATE;\
314 type AFMT_GENERIC4_FRAME_UPDATE;\ 325 type AFMT_GENERIC4_FRAME_UPDATE;\
326 type AFMT_GENERIC4_IMMEDIATE_UPDATE;\
315 type AFMT_GENERIC5_FRAME_UPDATE;\ 327 type AFMT_GENERIC5_FRAME_UPDATE;\
316 type AFMT_GENERIC6_FRAME_UPDATE;\ 328 type AFMT_GENERIC6_FRAME_UPDATE;\
317 type AFMT_GENERIC7_FRAME_UPDATE;\ 329 type AFMT_GENERIC7_FRAME_UPDATE;\
@@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers {
366 type DP_SEC_GSP5_ENABLE;\ 378 type DP_SEC_GSP5_ENABLE;\
367 type DP_SEC_GSP6_ENABLE;\ 379 type DP_SEC_GSP6_ENABLE;\
368 type DP_SEC_GSP7_ENABLE;\ 380 type DP_SEC_GSP7_ENABLE;\
381 type DP_SEC_GSP7_PPS;\
369 type DP_SEC_GSP7_SEND;\ 382 type DP_SEC_GSP7_SEND;\
383 type DP_SEC_GSP4_SEND;\
384 type DP_SEC_GSP4_SEND_PENDING;\
385 type DP_SEC_GSP4_LINE_NUM;\
386 type DP_SEC_GSP4_SEND_ANY_LINE;\
370 type DP_SEC_MPG_ENABLE;\ 387 type DP_SEC_MPG_ENABLE;\
371 type DP_VID_STREAM_DIS_DEFER;\ 388 type DP_VID_STREAM_DIS_DEFER;\
372 type DP_VID_STREAM_ENABLE;\ 389 type DP_VID_STREAM_ENABLE;\
@@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets(
484 struct stream_encoder *enc, 501 struct stream_encoder *enc,
485 const struct encoder_info_frame *info_frame); 502 const struct encoder_info_frame *info_frame);
486 503
504void enc1_stream_encoder_send_immediate_sdp_message(
505 struct stream_encoder *enc,
506 const uint8_t *custom_sdp_message,
507 unsigned int sdp_message_size);
508
487void enc1_stream_encoder_stop_dp_info_packets( 509void enc1_stream_encoder_stop_dp_info_packets(
488 struct stream_encoder *enc); 510 struct stream_encoder *enc);
489 511
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4fc4208d1472..9f7ebf6a4e40 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -80,6 +80,7 @@ struct pp_smu_funcs_rv {
80 /* PPSMC_MSG_SetDisplayCount 80 /* PPSMC_MSG_SetDisplayCount
81 * 0 triggers S0i2 optimization 81 * 0 triggers S0i2 optimization
82 */ 82 */
83
83 void (*set_display_count)(struct pp_smu *pp, int count); 84 void (*set_display_count)(struct pp_smu *pp, int count);
84 85
85 /* reader and writer WM's are sent together as part of one table*/ 86 /* reader and writer WM's are sent together as part of one table*/
@@ -115,7 +116,6 @@ struct pp_smu_funcs_rv {
115 116
116 /* PME w/a */ 117 /* PME w/a */
117 void (*set_pme_wa_enable)(struct pp_smu *pp); 118 void (*set_pme_wa_enable)(struct pp_smu *pp);
118
119}; 119};
120 120
121struct pp_smu_funcs { 121struct pp_smu_funcs {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index c5b791d158a7..6cc59f138095 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st {
219 unsigned char xfc_enable; 219 unsigned char xfc_enable;
220 unsigned char xfc_slave; 220 unsigned char xfc_slave;
221 struct _vcs_dpi_display_xfc_params_st xfc_params; 221 struct _vcs_dpi_display_xfc_params_st xfc_params;
222 //for vstartuplines calculation freesync
223 unsigned char v_total_min;
224 unsigned char v_total_max;
222}; 225};
223struct writeback_st { 226struct writeback_st {
224 int wb_src_height; 227 int wb_src_height;
@@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st {
289 unsigned char otg_inst; 292 unsigned char otg_inst;
290 unsigned char odm_combine; 293 unsigned char odm_combine;
291 unsigned char use_maximum_vstartup; 294 unsigned char use_maximum_vstartup;
295 unsigned int vtotal_max;
296 unsigned int vtotal_min;
292}; 297};
293 298
294struct _vcs_dpi_display_pipe_params_st { 299struct _vcs_dpi_display_pipe_params_st {
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index c2028c4744a6..a610fae16280 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -84,10 +84,6 @@ bool dal_hw_factory_init(
84 return true; 84 return true;
85#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 85#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
86 case DCN_VERSION_1_0: 86 case DCN_VERSION_1_0:
87 dal_hw_factory_dcn10_init(factory);
88 return true;
89#endif
90#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
91 case DCN_VERSION_1_01: 87 case DCN_VERSION_1_01:
92 dal_hw_factory_dcn10_init(factory); 88 dal_hw_factory_dcn10_init(factory);
93 return true; 89 return true;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 236ca28784a9..77615146b96e 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -84,11 +84,6 @@ bool dal_hw_translate_init(
84 dal_hw_translate_dcn10_init(translate); 84 dal_hw_translate_dcn10_init(translate);
85 return true; 85 return true;
86#endif 86#endif
87#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
88 case DCN_VERSION_1_01:
89 dal_hw_translate_dcn10_init(translate);
90 return true;
91#endif
92 87
93 default: 88 default:
94 BREAK_TO_DEBUGGER(); 89 BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 6f5ab05d6467..539d34d3439c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -123,6 +123,11 @@ struct resource_funcs {
123 enum dc_status (*get_default_swizzle_mode)( 123 enum dc_status (*get_default_swizzle_mode)(
124 struct dc_plane_state *plane_state); 124 struct dc_plane_state *plane_state);
125 125
126 struct stream_encoder *(*find_first_free_match_stream_enc_for_link)(
127 struct resource_context *res_ctx,
128 const struct resource_pool *pool,
129 struct dc_stream_state *stream);
130
126}; 131};
127 132
128struct audio_support{ 133struct audio_support{
@@ -212,6 +217,25 @@ struct plane_resource {
212 struct dcn_fe_bandwidth bw; 217 struct dcn_fe_bandwidth bw;
213}; 218};
214 219
220union pipe_update_flags {
221 struct {
222 uint32_t enable : 1;
223 uint32_t disable : 1;
224 uint32_t odm : 1;
225 uint32_t global_sync : 1;
226 uint32_t opp_changed : 1;
227 uint32_t tg_changed : 1;
228 uint32_t mpcc : 1;
229 uint32_t dppclk : 1;
230 uint32_t hubp_interdependent : 1;
231 uint32_t hubp_rq_dlg_ttu : 1;
232 uint32_t gamut_remap : 1;
233 uint32_t scaler : 1;
234 uint32_t viewport : 1;
235 } bits;
236 uint32_t raw;
237};
238
215struct pipe_ctx { 239struct pipe_ctx {
216 struct dc_plane_state *plane_state; 240 struct dc_plane_state *plane_state;
217 struct dc_stream_state *stream; 241 struct dc_stream_state *stream;
@@ -234,6 +258,7 @@ struct pipe_ctx {
234 struct _vcs_dpi_display_rq_regs_st rq_regs; 258 struct _vcs_dpi_display_rq_regs_st rq_regs;
235 struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param; 259 struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;
236#endif 260#endif
261 union pipe_update_flags update_flags;
237}; 262};
238 263
239struct resource_context { 264struct resource_context {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 31bd6d5183ab..f3fd3f8cac26 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -31,7 +31,7 @@
31 31
32struct clk_mgr { 32struct clk_mgr {
33 struct dc_context *ctx; 33 struct dc_context *ctx;
34 const struct clk_mgr_funcs *funcs; 34 struct clk_mgr_funcs *funcs;
35 35
36 struct dc_clocks clks; 36 struct dc_clocks clks;
37}; 37};
@@ -44,6 +44,12 @@ struct clk_mgr_funcs {
44 int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); 44 int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr);
45 45
46 void (*init_clocks)(struct clk_mgr *clk_mgr); 46 void (*init_clocks)(struct clk_mgr *clk_mgr);
47
48 /* Returns actual clk that's set */
49 int (*set_dispclk)(struct clk_mgr *clk_mgr, int requested_dispclk_khz);
50 int (*set_dprefclk)(struct clk_mgr *clk_mgr);
47}; 51};
48 52
53
54
49#endif /* __DAL_CLK_MGR_H__ */ 55#endif /* __DAL_CLK_MGR_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index c9d3e37e9531..ca162079a41b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -59,6 +59,7 @@ struct encoder_feature_support {
59 uint32_t IS_TPS3_CAPABLE:1; 59 uint32_t IS_TPS3_CAPABLE:1;
60 uint32_t IS_TPS4_CAPABLE:1; 60 uint32_t IS_TPS4_CAPABLE:1;
61 uint32_t HDMI_6GB_EN:1; 61 uint32_t HDMI_6GB_EN:1;
62 uint32_t DP_IS_USB_C:1;
62 } bits; 63 } bits;
63 uint32_t raw; 64 uint32_t raw;
64 } flags; 65 } flags;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 49854eb73d1d..537563888f87 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -63,8 +63,6 @@ struct encoder_info_frame {
63 struct dc_info_packet vsc; 63 struct dc_info_packet vsc;
64 /* HDR Static MetaData */ 64 /* HDR Static MetaData */
65 struct dc_info_packet hdrsmd; 65 struct dc_info_packet hdrsmd;
66 /* custom sdp message */
67 struct dc_info_packet dpsdp;
68}; 66};
69 67
70struct encoder_unblank_param { 68struct encoder_unblank_param {
@@ -123,6 +121,11 @@ struct stream_encoder_funcs {
123 struct stream_encoder *enc, 121 struct stream_encoder *enc,
124 const struct encoder_info_frame *info_frame); 122 const struct encoder_info_frame *info_frame);
125 123
124 void (*send_immediate_sdp_message)(
125 struct stream_encoder *enc,
126 const uint8_t *custom_sdp_message,
127 unsigned int sdp_message_size);
128
126 void (*stop_dp_info_packets)( 129 void (*stop_dp_info_packets)(
127 struct stream_encoder *enc); 130 struct stream_encoder *enc);
128 131
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 067d53caf28a..0b8c6896581f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -70,14 +70,6 @@ enum crtc_state {
70 CRTC_STATE_VACTIVE 70 CRTC_STATE_VACTIVE
71}; 71};
72 72
73struct _dlg_otg_param {
74 int vstartup_start;
75 int vupdate_offset;
76 int vupdate_width;
77 int vready_offset;
78 enum signal_type signal;
79};
80
81struct vupdate_keepout_params { 73struct vupdate_keepout_params {
82 int start_offset; 74 int start_offset;
83 int end_offset; 75 int end_offset;
@@ -126,7 +118,6 @@ struct timing_generator {
126 const struct timing_generator_funcs *funcs; 118 const struct timing_generator_funcs *funcs;
127 struct dc_bios *bp; 119 struct dc_bios *bp;
128 struct dc_context *ctx; 120 struct dc_context *ctx;
129 struct _dlg_otg_param dlg_otg_param;
130 int inst; 121 int inst;
131}; 122};
132 123
@@ -140,7 +131,13 @@ struct timing_generator_funcs {
140 const struct dc_crtc_timing *timing); 131 const struct dc_crtc_timing *timing);
141 void (*program_timing)(struct timing_generator *tg, 132 void (*program_timing)(struct timing_generator *tg,
142 const struct dc_crtc_timing *timing, 133 const struct dc_crtc_timing *timing,
143 bool use_vbios); 134 int vready_offset,
135 int vstartup_start,
136 int vupdate_offset,
137 int vupdate_width,
138 const enum signal_type signal,
139 bool use_vbios
140 );
144 void (*setup_vertical_interrupt0)( 141 void (*setup_vertical_interrupt0)(
145 struct timing_generator *optc, 142 struct timing_generator *optc,
146 uint32_t start_line, 143 uint32_t start_line,
@@ -210,7 +207,11 @@ struct timing_generator_funcs {
210 207
211 bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width); 208 bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width);
212 209
213 void (*program_global_sync)(struct timing_generator *tg); 210 void (*program_global_sync)(struct timing_generator *tg,
211 int vready_offset,
212 int vstartup_start,
213 int vupdate_offset,
214 int vupdate_width);
214 void (*enable_optc_clock)(struct timing_generator *tg, bool enable); 215 void (*enable_optc_clock)(struct timing_generator *tg, bool enable);
215 void (*program_stereo)(struct timing_generator *tg, 216 void (*program_stereo)(struct timing_generator *tg,
216 const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); 217 const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
@@ -237,6 +238,8 @@ struct timing_generator_funcs {
237 bool (*get_crc)(struct timing_generator *tg, 238 bool (*get_crc)(struct timing_generator *tg,
238 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); 239 uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
239 240
241 void (*set_vtg_params)(struct timing_generator *optc,
242 const struct dc_crtc_timing *dc_crtc_timing);
240}; 243};
241 244
242#endif 245#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 33905468e2b9..eb1c12ed026a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -158,6 +158,11 @@ struct hw_sequencer_funcs {
158 158
159 void (*update_info_frame)(struct pipe_ctx *pipe_ctx); 159 void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
160 160
161 void (*send_immediate_sdp_message)(
162 struct pipe_ctx *pipe_ctx,
163 const uint8_t *custom_sdp_message,
164 unsigned int sdp_message_size);
165
161 void (*enable_stream)(struct pipe_ctx *pipe_ctx); 166 void (*enable_stream)(struct pipe_ctx *pipe_ctx);
162 167
163 void (*disable_stream)(struct pipe_ctx *pipe_ctx, 168 void (*disable_stream)(struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
index 01bf01a34a08..c30437ae8395 100644
--- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h
+++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
@@ -307,7 +307,8 @@ struct bp_encoder_cap_info {
307 uint32_t DP_HBR2_EN:1; 307 uint32_t DP_HBR2_EN:1;
308 uint32_t DP_HBR3_EN:1; 308 uint32_t DP_HBR3_EN:1;
309 uint32_t HDMI_6GB_EN:1; 309 uint32_t HDMI_6GB_EN:1;
310 uint32_t RESERVED:30; 310 uint32_t DP_IS_USB_C:1;
311 uint32_t RESERVED:27;
311}; 312};
312 313
313#endif /*__DAL_BIOS_PARSER_TYPES_H__ */ 314#endif /*__DAL_BIOS_PARSER_TYPES_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 4c8ce7938f01..63c3e77159d9 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -131,21 +131,18 @@
131#define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ 131#define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */
132#define RAVEN_A0 0x01 132#define RAVEN_A0 0x01
133#define RAVEN_B0 0x21 133#define RAVEN_B0 0x21
134#define PICASSO_A0 0x41
135#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
136/* DCN1_01 */ 134/* DCN1_01 */
135#define PICASSO_A0 0x41
137#define RAVEN2_A0 0x81 136#define RAVEN2_A0 0x81
138#endif 137#define RAVEN1_F0 0xF0
139#define RAVEN_UNKNOWN 0xFF 138#define RAVEN_UNKNOWN 0xFF
140 139
141#define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) 140#define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
142#define RAVEN1_F0 0xF0 141#define RAVEN1_F0 0xF0
143#define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) 142#define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
144 143
145#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
146#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) 144#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
147#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) 145#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0))
148#endif /* DCN1_01 */
149 146
150#define FAMILY_RV 142 /* DCN 1*/ 147#define FAMILY_RV 142 /* DCN 1*/
151 148
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index f5bd869d4320..dabdbc0999d4 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -45,9 +45,7 @@ enum dce_version {
45 DCE_VERSION_12_1, 45 DCE_VERSION_12_1,
46 DCE_VERSION_MAX, 46 DCE_VERSION_MAX,
47 DCN_VERSION_1_0, 47 DCN_VERSION_1_0,
48#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
49 DCN_VERSION_1_01, 48 DCN_VERSION_1_01,
50#endif /* DCN1_01 */
51 DCN_VERSION_MAX 49 DCN_VERSION_MAX
52}; 50};
53 51
diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h
index 2b836e582c08..845fea8a387f 100644
--- a/drivers/gpu/drm/amd/display/include/set_mode_types.h
+++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h
@@ -84,7 +84,10 @@ union hdmi_info_packet {
84 uint16_t bar_left; 84 uint16_t bar_left;
85 uint16_t bar_right; 85 uint16_t bar_right;
86 86
87 uint8_t reserved[14]; 87 uint8_t F140_F143:4;
88 uint8_t ACE0_ACE3:4;
89
90 uint8_t reserved[13];
88 } bits; 91 } bits;
89 92
90 struct info_packet_raw_data packet_raw_data; 93 struct info_packet_raw_data packet_raw_data;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index a1055413bade..8601d371776e 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -240,16 +240,27 @@ struct dividers {
240 struct fixed31_32 divider3; 240 struct fixed31_32 divider3;
241}; 241};
242 242
243static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) 243enum gamma_type_index {
244 gamma_type_index_2_4,
245 gamma_type_index_2_2,
246 gamma_type_index_2_2_flat
247};
248
249static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type)
244{ 250{
245 static const int32_t numerator01[] = { 31308, 180000}; 251 static const int32_t numerator01[] = { 31308, 180000, 0};
246 static const int32_t numerator02[] = { 12920, 4500}; 252 static const int32_t numerator02[] = { 12920, 4500, 0};
247 static const int32_t numerator03[] = { 55, 99}; 253 static const int32_t numerator03[] = { 55, 99, 0};
248 static const int32_t numerator04[] = { 55, 99}; 254 static const int32_t numerator04[] = { 55, 99, 0};
249 static const int32_t numerator05[] = { 2400, 2200}; 255 static const int32_t numerator05[] = { 2400, 2200, 2200};
250 256
251 uint32_t i = 0; 257 uint32_t i = 0;
252 uint32_t index = is_2_4 == true ? 0:1; 258 uint32_t index = 0;
259
260 if (type == gamma_type_index_2_2)
261 index = 1;
262 else if (type == gamma_type_index_2_2_flat)
263 index = 2;
253 264
254 do { 265 do {
255 coefficients->a0[i] = dc_fixpt_from_fraction( 266 coefficients->a0[i] = dc_fixpt_from_fraction(
@@ -697,7 +708,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,
697 708
698static void build_regamma(struct pwl_float_data_ex *rgb_regamma, 709static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
699 uint32_t hw_points_num, 710 uint32_t hw_points_num,
700 const struct hw_x_point *coordinate_x, bool is_2_4) 711 const struct hw_x_point *coordinate_x, enum gamma_type_index type)
701{ 712{
702 uint32_t i; 713 uint32_t i;
703 714
@@ -705,7 +716,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
705 struct pwl_float_data_ex *rgb = rgb_regamma; 716 struct pwl_float_data_ex *rgb = rgb_regamma;
706 const struct hw_x_point *coord_x = coordinate_x; 717 const struct hw_x_point *coord_x = coordinate_x;
707 718
708 build_coefficients(&coeff, is_2_4); 719 build_coefficients(&coeff, type);
709 720
710 i = 0; 721 i = 0;
711 722
@@ -892,13 +903,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
892 903
893static void build_degamma(struct pwl_float_data_ex *curve, 904static void build_degamma(struct pwl_float_data_ex *curve,
894 uint32_t hw_points_num, 905 uint32_t hw_points_num,
895 const struct hw_x_point *coordinate_x, bool is_2_4) 906 const struct hw_x_point *coordinate_x, enum gamma_type_index type)
896{ 907{
897 uint32_t i; 908 uint32_t i;
898 struct gamma_coefficients coeff; 909 struct gamma_coefficients coeff;
899 uint32_t begin_index, end_index; 910 uint32_t begin_index, end_index;
900 911
901 build_coefficients(&coeff, is_2_4); 912 build_coefficients(&coeff, type);
902 i = 0; 913 i = 0;
903 914
904 /* X points is 2^-25 to 2^7 915 /* X points is 2^-25 to 2^7
@@ -1614,7 +1625,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
1614 coordinates_x, 1625 coordinates_x,
1615 output_tf->sdr_ref_white_level); 1626 output_tf->sdr_ref_white_level);
1616 } else if (tf == TRANSFER_FUNCTION_GAMMA22 && 1627 } else if (tf == TRANSFER_FUNCTION_GAMMA22 &&
1617 fs_params != NULL) { 1628 fs_params != NULL && fs_params->skip_tm == 0) {
1618 build_freesync_hdr(rgb_regamma, 1629 build_freesync_hdr(rgb_regamma,
1619 MAX_HW_POINTS, 1630 MAX_HW_POINTS,
1620 coordinates_x, 1631 coordinates_x,
@@ -1627,7 +1638,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
1627 1638
1628 build_regamma(rgb_regamma, 1639 build_regamma(rgb_regamma,
1629 MAX_HW_POINTS, 1640 MAX_HW_POINTS,
1630 coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); 1641 coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 :
1642 tf == TRANSFER_FUNCTION_GAMMA22 ?
1643 gamma_type_index_2_2_flat : gamma_type_index_2_2);
1631 } 1644 }
1632 map_regamma_hw_to_x_user(ramp, coeff, rgb_user, 1645 map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
1633 coordinates_x, axis_x, rgb_regamma, 1646 coordinates_x, axis_x, rgb_regamma,
@@ -1832,7 +1845,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
1832 build_degamma(curve, 1845 build_degamma(curve,
1833 MAX_HW_POINTS, 1846 MAX_HW_POINTS,
1834 coordinates_x, 1847 coordinates_x,
1835 tf == TRANSFER_FUNCTION_SRGB ? true : false); 1848 tf == TRANSFER_FUNCTION_SRGB ?
1849 gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ?
1850 gamma_type_index_2_2_flat : gamma_type_index_2_2);
1836 else if (tf == TRANSFER_FUNCTION_LINEAR) { 1851 else if (tf == TRANSFER_FUNCTION_LINEAR) {
1837 // just copy coordinates_x into curve 1852 // just copy coordinates_x into curve
1838 i = 0; 1853 i = 0;
@@ -1932,7 +1947,10 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
1932 1947
1933 build_regamma(rgb_regamma, 1948 build_regamma(rgb_regamma,
1934 MAX_HW_POINTS, 1949 MAX_HW_POINTS,
1935 coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); 1950 coordinates_x,
1951 trans == TRANSFER_FUNCTION_SRGB ?
1952 gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
1953 gamma_type_index_2_2_flat : gamma_type_index_2_2);
1936 for (i = 0; i <= MAX_HW_POINTS ; i++) { 1954 for (i = 0; i <= MAX_HW_POINTS ; i++) {
1937 points->red[i] = rgb_regamma[i].r; 1955 points->red[i] = rgb_regamma[i].r;
1938 points->green[i] = rgb_regamma[i].g; 1956 points->green[i] = rgb_regamma[i].g;
@@ -2002,7 +2020,8 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
2002 2020
2003 kvfree(rgb_degamma); 2021 kvfree(rgb_degamma);
2004 } else if (trans == TRANSFER_FUNCTION_SRGB || 2022 } else if (trans == TRANSFER_FUNCTION_SRGB ||
2005 trans == TRANSFER_FUNCTION_BT709) { 2023 trans == TRANSFER_FUNCTION_BT709 ||
2024 trans == TRANSFER_FUNCTION_GAMMA22) {
2006 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, 2025 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2007 sizeof(*rgb_degamma), 2026 sizeof(*rgb_degamma),
2008 GFP_KERNEL); 2027 GFP_KERNEL);
@@ -2011,7 +2030,10 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
2011 2030
2012 build_degamma(rgb_degamma, 2031 build_degamma(rgb_degamma,
2013 MAX_HW_POINTS, 2032 MAX_HW_POINTS,
2014 coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); 2033 coordinates_x,
2034 trans == TRANSFER_FUNCTION_SRGB ?
2035 gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
2036 gamma_type_index_2_2_flat : gamma_type_index_2_2);
2015 for (i = 0; i <= MAX_HW_POINTS ; i++) { 2037 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2016 points->red[i] = rgb_degamma[i].r; 2038 points->red[i] = rgb_degamma[i].r;
2017 points->green[i] = rgb_degamma[i].g; 2039 points->green[i] = rgb_degamma[i].g;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index a6e164df090a..369953fafadf 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -79,6 +79,7 @@ struct freesync_hdr_tf_params {
79 unsigned int max_content; // luminance in nits 79 unsigned int max_content; // luminance in nits
80 unsigned int min_display; // luminance in 1/10000 nits 80 unsigned int min_display; // luminance in 1/10000 nits
81 unsigned int max_display; // luminance in nits 81 unsigned int max_display; // luminance in nits
82 unsigned int skip_tm; // skip tm
82}; 83};
83 84
84void setup_x_points_distribution(void); 85void setup_x_points_distribution(void);
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index db06fab2ad5c..bc13c552797f 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
63 if (stream->psr_version != 0) 63 if (stream->psr_version != 0)
64 vscPacketRevision = 2; 64 vscPacketRevision = 2;
65 65
66 if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) 66 /* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */
67 if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
68 stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
67 vscPacketRevision = 5; 69 vscPacketRevision = 5;
68 70
69 /* VSC packet not needed based on the features 71 /* VSC packet not needed based on the features
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index a9575db8d7aa..6efcaa93e17b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -30,4 +30,22 @@
30#define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 30#define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044
31#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 31#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0
32 32
33#define smnPerfMonCtlLo0 0x01d440UL
34#define smnPerfMonCtlHi0 0x01d444UL
35#define smnPerfMonCtlLo1 0x01d450UL
36#define smnPerfMonCtlHi1 0x01d454UL
37#define smnPerfMonCtlLo2 0x01d460UL
38#define smnPerfMonCtlHi2 0x01d464UL
39#define smnPerfMonCtlLo3 0x01d470UL
40#define smnPerfMonCtlHi3 0x01d474UL
41
42#define smnPerfMonCtrLo0 0x01d448UL
43#define smnPerfMonCtrHi0 0x01d44cUL
44#define smnPerfMonCtrLo1 0x01d458UL
45#define smnPerfMonCtrHi1 0x01d45cUL
46#define smnPerfMonCtrLo2 0x01d468UL
47#define smnPerfMonCtrHi2 0x01d46cUL
48#define smnPerfMonCtrLo3 0x01d478UL
49#define smnPerfMonCtrHi3 0x01d47cUL
50
33#endif 51#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index 529b37db274c..f1d048e0ed2c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -829,6 +829,8 @@
829#define mmTD_CNTL_BASE_IDX 0 829#define mmTD_CNTL_BASE_IDX 0
830#define mmTD_STATUS 0x0526 830#define mmTD_STATUS 0x0526
831#define mmTD_STATUS_BASE_IDX 0 831#define mmTD_STATUS_BASE_IDX 0
832#define mmTD_EDC_CNT 0x052e
833#define mmTD_EDC_CNT_BASE_IDX 0
832#define mmTD_DSM_CNTL 0x052f 834#define mmTD_DSM_CNTL 0x052f
833#define mmTD_DSM_CNTL_BASE_IDX 0 835#define mmTD_DSM_CNTL_BASE_IDX 0
834#define mmTD_DSM_CNTL2 0x0530 836#define mmTD_DSM_CNTL2 0x0530
@@ -845,6 +847,8 @@
845#define mmTA_STATUS_BASE_IDX 0 847#define mmTA_STATUS_BASE_IDX 0
846#define mmTA_SCRATCH 0x0564 848#define mmTA_SCRATCH 0x0564
847#define mmTA_SCRATCH_BASE_IDX 0 849#define mmTA_SCRATCH_BASE_IDX 0
850#define mmTA_EDC_CNT 0x0586
851#define mmTA_EDC_CNT_BASE_IDX 0
848 852
849 853
850// addressBlock: gc_gdsdec 854// addressBlock: gc_gdsdec
@@ -1051,6 +1055,13 @@
1051#define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX 0 1055#define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX 0
1052 1056
1053 1057
1058// addressBlock: gc_ea_gceadec2
1059// base address: 0x9c00
1060#define mmGCEA_EDC_CNT 0x0706
1061#define mmGCEA_EDC_CNT_BASE_IDX 0
1062#define mmGCEA_EDC_CNT2 0x0707
1063#define mmGCEA_EDC_CNT2_BASE_IDX 0
1064
1054// addressBlock: gc_rmi_rmidec 1065// addressBlock: gc_rmi_rmidec
1055// base address: 0x9e00 1066// base address: 0x9e00
1056#define mmRMI_GENERAL_CNTL 0x0780 1067#define mmRMI_GENERAL_CNTL 0x0780
@@ -1709,6 +1720,8 @@
1709#define mmTC_CFG_L1_VOLATILE_BASE_IDX 0 1720#define mmTC_CFG_L1_VOLATILE_BASE_IDX 0
1710#define mmTC_CFG_L2_VOLATILE 0x0b23 1721#define mmTC_CFG_L2_VOLATILE 0x0b23
1711#define mmTC_CFG_L2_VOLATILE_BASE_IDX 0 1722#define mmTC_CFG_L2_VOLATILE_BASE_IDX 0
1723#define mmTCI_EDC_CNT 0x0b60
1724#define mmTCI_EDC_CNT_BASE_IDX 0
1712#define mmTCI_STATUS 0x0b61 1725#define mmTCI_STATUS 0x0b61
1713#define mmTCI_STATUS_BASE_IDX 0 1726#define mmTCI_STATUS_BASE_IDX 0
1714#define mmTCI_CNTL_1 0x0b62 1727#define mmTCI_CNTL_1 0x0b62
@@ -2594,6 +2607,24 @@
2594#define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX 0 2607#define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX 0
2595#define mmCP_RB_DOORBELL_CLEAR 0x1188 2608#define mmCP_RB_DOORBELL_CLEAR 0x1188
2596#define mmCP_RB_DOORBELL_CLEAR_BASE_IDX 0 2609#define mmCP_RB_DOORBELL_CLEAR_BASE_IDX 0
2610#define mmCPF_EDC_TAG_CNT 0x1189
2611#define mmCPF_EDC_TAG_CNT_BASE_IDX 0
2612#define mmCPF_EDC_ROQ_CNT 0x118a
2613#define mmCPF_EDC_ROQ_CNT_BASE_IDX 0
2614#define mmCPG_EDC_TAG_CNT 0x118b
2615#define mmCPG_EDC_TAG_CNT_BASE_IDX 0
2616#define mmCPG_EDC_DMA_CNT 0x118d
2617#define mmCPG_EDC_DMA_CNT_BASE_IDX 0
2618#define mmCPC_EDC_SCRATCH_CNT 0x118e
2619#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX 0
2620#define mmCPC_EDC_UCODE_CNT 0x118f
2621#define mmCPC_EDC_UCODE_CNT_BASE_IDX 0
2622#define mmDC_EDC_STATE_CNT 0x1191
2623#define mmDC_EDC_STATE_CNT_BASE_IDX 0
2624#define mmDC_EDC_CSINVOC_CNT 0x1192
2625#define mmDC_EDC_CSINVOC_CNT_BASE_IDX 0
2626#define mmDC_EDC_RESTORE_CNT 0x1193
2627#define mmDC_EDC_RESTORE_CNT_BASE_IDX 0
2597#define mmCP_GFX_MQD_CONTROL 0x11a0 2628#define mmCP_GFX_MQD_CONTROL 0x11a0
2598#define mmCP_GFX_MQD_CONTROL_BASE_IDX 0 2629#define mmCP_GFX_MQD_CONTROL_BASE_IDX 0
2599#define mmCP_GFX_MQD_BASE_ADDR 0x11a1 2630#define mmCP_GFX_MQD_BASE_ADDR 0x11a1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
index 8c75669eb500..9470ec5e0f42 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
@@ -54,5 +54,8 @@
54#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 54#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258
55#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c 55#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c
56 56
57#define smnPCIE_RX_NUM_NAK 0x11180038
58#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
59
57#endif // _nbio_6_1_SMN_HEADER 60#endif // _nbio_6_1_SMN_HEADER
58 61
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
index 5563f0715896..caf5ffdc130a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
@@ -51,4 +51,7 @@
51#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 51#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258
52#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c 52#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c
53 53
54#define smnPCIE_RX_NUM_NAK 0x11180038
55#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
56
54#endif // _nbio_7_0_SMN_HEADER 57#endif // _nbio_7_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
index c1457d880c4d..4bcacf529852 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
@@ -50,4 +50,7 @@
50#define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c 50#define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c
51#define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250 51#define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250
52 52
53#define smnPCIE_RX_NUM_NAK 0x11180038
54#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
55
53#endif // _nbio_7_4_0_SMN_HEADER 56#endif // _nbio_7_4_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h
index 749eab94e335..699e658c3cec 100644
--- a/drivers/gpu/drm/amd/include/cik_structs.h
+++ b/drivers/gpu/drm/amd/include/cik_structs.h
@@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers {
282 uint32_t reserved_123; 282 uint32_t reserved_123;
283 uint32_t reserved_124; 283 uint32_t reserved_124;
284 uint32_t reserved_125; 284 uint32_t reserved_125;
285 uint32_t reserved_126; 285 /* reserved_126,127: repurposed for driver-internal use */
286 uint32_t reserved_127;
287 uint32_t sdma_engine_id; 286 uint32_t sdma_engine_id;
288 uint32_t sdma_queue_id; 287 uint32_t sdma_queue_id;
289}; 288};
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b897aca9b4c9..98b9533e672b 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -174,6 +174,7 @@ struct tile_config {
174#define ALLOC_MEM_FLAGS_GTT (1 << 1) 174#define ALLOC_MEM_FLAGS_GTT (1 << 1)
175#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) 175#define ALLOC_MEM_FLAGS_USERPTR (1 << 2)
176#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) 176#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
177#define ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
177 178
178/* 179/*
179 * Allocation flags attributes/access options. 180 * Allocation flags attributes/access options.
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba9b685..9f661bf96ed0 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -109,8 +109,12 @@ enum amd_pp_sensors {
109 AMDGPU_PP_SENSOR_UVD_DCLK, 109 AMDGPU_PP_SENSOR_UVD_DCLK,
110 AMDGPU_PP_SENSOR_VCE_ECCLK, 110 AMDGPU_PP_SENSOR_VCE_ECCLK,
111 AMDGPU_PP_SENSOR_GPU_LOAD, 111 AMDGPU_PP_SENSOR_GPU_LOAD,
112 AMDGPU_PP_SENSOR_MEM_LOAD,
112 AMDGPU_PP_SENSOR_GFX_MCLK, 113 AMDGPU_PP_SENSOR_GFX_MCLK,
113 AMDGPU_PP_SENSOR_GPU_TEMP, 114 AMDGPU_PP_SENSOR_GPU_TEMP,
115 AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
116 AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
117 AMDGPU_PP_SENSOR_MEM_TEMP,
114 AMDGPU_PP_SENSOR_VCE_POWER, 118 AMDGPU_PP_SENSOR_VCE_POWER,
115 AMDGPU_PP_SENSOR_UVD_POWER, 119 AMDGPU_PP_SENSOR_UVD_POWER,
116 AMDGPU_PP_SENSOR_GPU_POWER, 120 AMDGPU_PP_SENSOR_GPU_POWER,
@@ -159,6 +163,13 @@ struct pp_states_info {
159 uint32_t states[16]; 163 uint32_t states[16];
160}; 164};
161 165
166enum PP_HWMON_TEMP {
167 PP_TEMP_EDGE = 0,
168 PP_TEMP_JUNCTION,
169 PP_TEMP_MEM,
170 PP_TEMP_MAX
171};
172
162#define PP_GROUP_MASK 0xF0000000 173#define PP_GROUP_MASK 0xF0000000
163#define PP_GROUP_SHIFT 28 174#define PP_GROUP_SHIFT 28
164 175
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index ceaf4932258d..8b383dbe1cda 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -151,8 +151,7 @@ struct v9_sdma_mqd {
151 uint32_t reserved_123; 151 uint32_t reserved_123;
152 uint32_t reserved_124; 152 uint32_t reserved_124;
153 uint32_t reserved_125; 153 uint32_t reserved_125;
154 uint32_t reserved_126; 154 /* reserved_126,127: repurposed for driver-internal use */
155 uint32_t reserved_127;
156 uint32_t sdma_engine_id; 155 uint32_t sdma_engine_id;
157 uint32_t sdma_queue_id; 156 uint32_t sdma_queue_id;
158}; 157};
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index 717fbae1d362..c17613287cd0 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -151,8 +151,7 @@ struct vi_sdma_mqd {
151 uint32_t reserved_123; 151 uint32_t reserved_123;
152 uint32_t reserved_124; 152 uint32_t reserved_124;
153 uint32_t reserved_125; 153 uint32_t reserved_125;
154 uint32_t reserved_126; 154 /* reserved_126,127: repurposed for driver-internal use */
155 uint32_t reserved_127;
156 uint32_t sdma_engine_id; 155 uint32_t sdma_engine_id;
157 uint32_t sdma_queue_id; 156 uint32_t sdma_queue_id;
158}; 157};
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index eec329ab6037..3026c7e2d3ea 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -30,6 +30,36 @@
30#include "atom.h" 30#include "atom.h"
31#include "amd_pcie.h" 31#include "amd_pcie.h"
32 32
33int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version)
34{
35 int ret = 0;
36
37 if (!if_version && !smu_version)
38 return -EINVAL;
39
40 if (if_version) {
41 ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion);
42 if (ret)
43 return ret;
44
45 ret = smu_read_smc_arg(smu, if_version);
46 if (ret)
47 return ret;
48 }
49
50 if (smu_version) {
51 ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion);
52 if (ret)
53 return ret;
54
55 ret = smu_read_smc_arg(smu, smu_version);
56 if (ret)
57 return ret;
58 }
59
60 return ret;
61}
62
33int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, 63int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
34 bool gate) 64 bool gate)
35{ 65{
@@ -168,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size)
168 ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf; 198 ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;
169 int ret = 0; 199 int ret = 0;
170 200
201 if (!smu->pm_enabled)
202 return -EINVAL;
171 if (header->usStructureSize != size) { 203 if (header->usStructureSize != size) {
172 pr_err("pp table size not matched !\n"); 204 pr_err("pp table size not matched !\n");
173 return -EIO; 205 return -EIO;
@@ -203,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu)
203 int ret = 0; 235 int ret = 0;
204 uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32]; 236 uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32];
205 237
238 if (!smu->pm_enabled)
239 return ret;
206 mutex_lock(&feature->mutex); 240 mutex_lock(&feature->mutex);
207 bitmap_fill(feature->allowed, SMU_FEATURE_MAX); 241 bitmap_fill(feature->allowed, SMU_FEATURE_MAX);
208 mutex_unlock(&feature->mutex); 242 mutex_unlock(&feature->mutex);
@@ -314,6 +348,7 @@ static int smu_early_init(void *handle)
314 struct smu_context *smu = &adev->smu; 348 struct smu_context *smu = &adev->smu;
315 349
316 smu->adev = adev; 350 smu->adev = adev;
351 smu->pm_enabled = !!amdgpu_dpm;
317 mutex_init(&smu->mutex); 352 mutex_init(&smu->mutex);
318 353
319 return smu_set_funcs(adev); 354 return smu_set_funcs(adev);
@@ -323,6 +358,9 @@ static int smu_late_init(void *handle)
323{ 358{
324 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 359 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
325 struct smu_context *smu = &adev->smu; 360 struct smu_context *smu = &adev->smu;
361
362 if (!smu->pm_enabled)
363 return 0;
326 mutex_lock(&smu->mutex); 364 mutex_lock(&smu->mutex);
327 smu_handle_task(&adev->smu, 365 smu_handle_task(&adev->smu,
328 smu->smu_dpm.dpm_level, 366 smu->smu_dpm.dpm_level,
@@ -406,9 +444,6 @@ static int smu_sw_init(void *handle)
406 struct smu_context *smu = &adev->smu; 444 struct smu_context *smu = &adev->smu;
407 int ret; 445 int ret;
408 446
409 if (!is_support_sw_smu(adev))
410 return -EINVAL;
411
412 smu->pool_size = adev->pm.smu_prv_buffer_size; 447 smu->pool_size = adev->pm.smu_prv_buffer_size;
413 smu->smu_feature.feature_num = SMU_FEATURE_MAX; 448 smu->smu_feature.feature_num = SMU_FEATURE_MAX;
414 mutex_init(&smu->smu_feature.mutex); 449 mutex_init(&smu->smu_feature.mutex);
@@ -460,9 +495,6 @@ static int smu_sw_fini(void *handle)
460 struct smu_context *smu = &adev->smu; 495 struct smu_context *smu = &adev->smu;
461 int ret; 496 int ret;
462 497
463 if (!is_support_sw_smu(adev))
464 return -EINVAL;
465
466 ret = smu_smc_table_sw_fini(smu); 498 ret = smu_smc_table_sw_fini(smu);
467 if (ret) { 499 if (ret) {
468 pr_err("Failed to sw fini smc table!\n"); 500 pr_err("Failed to sw fini smc table!\n");
@@ -612,10 +644,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
612 * check if the format_revision in vbios is up to pptable header 644 * check if the format_revision in vbios is up to pptable header
613 * version, and the structure size is not 0. 645 * version, and the structure size is not 0.
614 */ 646 */
615 ret = smu_get_clk_info_from_vbios(smu);
616 if (ret)
617 return ret;
618
619 ret = smu_check_pptable(smu); 647 ret = smu_check_pptable(smu);
620 if (ret) 648 if (ret)
621 return ret; 649 return ret;
@@ -716,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
716 */ 744 */
717 ret = smu_set_tool_table_location(smu); 745 ret = smu_set_tool_table_location(smu);
718 746
747 if (!smu_is_dpm_running(smu))
748 pr_info("dpm has been disabled\n");
749
719 return ret; 750 return ret;
720} 751}
721 752
@@ -788,9 +819,6 @@ static int smu_hw_init(void *handle)
788 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 819 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
789 struct smu_context *smu = &adev->smu; 820 struct smu_context *smu = &adev->smu;
790 821
791 if (!is_support_sw_smu(adev))
792 return -EINVAL;
793
794 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 822 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
795 ret = smu_load_microcode(smu); 823 ret = smu_load_microcode(smu);
796 if (ret) 824 if (ret)
@@ -831,7 +859,10 @@ static int smu_hw_init(void *handle)
831 859
832 mutex_unlock(&smu->mutex); 860 mutex_unlock(&smu->mutex);
833 861
834 adev->pm.dpm_enabled = true; 862 if (!smu->pm_enabled)
863 adev->pm.dpm_enabled = false;
864 else
865 adev->pm.dpm_enabled = true;
835 866
836 pr_info("SMU is initialized successfully!\n"); 867 pr_info("SMU is initialized successfully!\n");
837 868
@@ -849,9 +880,6 @@ static int smu_hw_fini(void *handle)
849 struct smu_table_context *table_context = &smu->smu_table; 880 struct smu_table_context *table_context = &smu->smu_table;
850 int ret = 0; 881 int ret = 0;
851 882
852 if (!is_support_sw_smu(adev))
853 return -EINVAL;
854
855 kfree(table_context->driver_pptable); 883 kfree(table_context->driver_pptable);
856 table_context->driver_pptable = NULL; 884 table_context->driver_pptable = NULL;
857 885
@@ -906,9 +934,6 @@ static int smu_suspend(void *handle)
906 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
907 struct smu_context *smu = &adev->smu; 935 struct smu_context *smu = &adev->smu;
908 936
909 if (!is_support_sw_smu(adev))
910 return -EINVAL;
911
912 ret = smu_system_features_control(smu, false); 937 ret = smu_system_features_control(smu, false);
913 if (ret) 938 if (ret)
914 return ret; 939 return ret;
@@ -924,9 +949,6 @@ static int smu_resume(void *handle)
924 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 949 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
925 struct smu_context *smu = &adev->smu; 950 struct smu_context *smu = &adev->smu;
926 951
927 if (!is_support_sw_smu(adev))
928 return -EINVAL;
929
930 pr_info("SMU is resuming...\n"); 952 pr_info("SMU is resuming...\n");
931 953
932 mutex_lock(&smu->mutex); 954 mutex_lock(&smu->mutex);
@@ -955,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu,
955 int index = 0; 977 int index = 0;
956 int num_of_active_display = 0; 978 int num_of_active_display = 0;
957 979
958 if (!is_support_sw_smu(smu->adev)) 980 if (!smu->pm_enabled || !is_support_sw_smu(smu->adev))
959 return -EINVAL; 981 return -EINVAL;
960 982
961 if (!display_config) 983 if (!display_config)
@@ -1083,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle,
1083 1105
1084 struct smu_context *smu = (struct smu_context*)(handle); 1106 struct smu_context *smu = (struct smu_context*)(handle);
1085 struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); 1107 struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
1086 if (!smu_dpm_ctx->dpm_context) 1108 if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)
1087 return -EINVAL; 1109 return -EINVAL;
1088 1110
1089 if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { 1111 if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) {
@@ -1126,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
1126 long workload; 1148 long workload;
1127 struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); 1149 struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
1128 1150
1151 if (!smu->pm_enabled)
1152 return -EINVAL;
1129 if (!skip_display_settings) { 1153 if (!skip_display_settings) {
1130 ret = smu_display_config_changed(smu); 1154 ret = smu_display_config_changed(smu);
1131 if (ret) { 1155 if (ret) {
@@ -1134,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
1134 } 1158 }
1135 } 1159 }
1136 1160
1161 if (!smu->pm_enabled)
1162 return -EINVAL;
1137 ret = smu_apply_clocks_adjust_rules(smu); 1163 ret = smu_apply_clocks_adjust_rules(smu);
1138 if (ret) { 1164 if (ret) {
1139 pr_err("Failed to apply clocks adjust rules!"); 1165 pr_err("Failed to apply clocks adjust rules!");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 70f7f47a2fcf..cc57fb953e62 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -225,7 +225,16 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
225int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) 225int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
226{ 226{
227 int ret = 0; 227 int ret = 0;
228 struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; 228 struct PP_TemperatureRange range = {
229 TEMP_RANGE_MIN,
230 TEMP_RANGE_MAX,
231 TEMP_RANGE_MAX,
232 TEMP_RANGE_MIN,
233 TEMP_RANGE_MAX,
234 TEMP_RANGE_MAX,
235 TEMP_RANGE_MIN,
236 TEMP_RANGE_MAX,
237 TEMP_RANGE_MAX};
229 struct amdgpu_device *adev = hwmgr->adev; 238 struct amdgpu_device *adev = hwmgr->adev;
230 239
231 if (hwmgr->hwmgr_func->get_thermal_temperature_range) 240 if (hwmgr->hwmgr_func->get_thermal_temperature_range)
@@ -239,6 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
239 248
240 adev->pm.dpm.thermal.min_temp = range.min; 249 adev->pm.dpm.thermal.min_temp = range.min;
241 adev->pm.dpm.thermal.max_temp = range.max; 250 adev->pm.dpm.thermal.max_temp = range.max;
251 adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
252 adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
253 adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
254 adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
255 adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
256 adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
257 adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
242 258
243 return ret; 259 return ret;
244} 260}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 048757e8f494..16591be8b0ca 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3532,9 +3532,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
3532 *size = 4; 3532 *size = 4;
3533 return 0; 3533 return 0;
3534 case AMDGPU_PP_SENSOR_GPU_LOAD: 3534 case AMDGPU_PP_SENSOR_GPU_LOAD:
3535 case AMDGPU_PP_SENSOR_MEM_LOAD:
3535 offset = data->soft_regs_start + smum_get_offsetof(hwmgr, 3536 offset = data->soft_regs_start + smum_get_offsetof(hwmgr,
3536 SMU_SoftRegisters, 3537 SMU_SoftRegisters,
3537 AverageGraphicsActivity); 3538 (idx == AMDGPU_PP_SENSOR_GPU_LOAD) ?
3539 AverageGraphicsActivity:
3540 AverageMemoryActivity);
3538 3541
3539 activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset); 3542 activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
3540 activity_percent += 0x80; 3543 activity_percent += 0x80;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 384c37875cd0..1d9bb29adaef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -356,6 +356,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
356 struct vega10_hwmgr *data = hwmgr->backend; 356 struct vega10_hwmgr *data = hwmgr->backend;
357 int i; 357 int i;
358 uint32_t sub_vendor_id, hw_revision; 358 uint32_t sub_vendor_id, hw_revision;
359 uint32_t top32, bottom32;
359 struct amdgpu_device *adev = hwmgr->adev; 360 struct amdgpu_device *adev = hwmgr->adev;
360 361
361 vega10_initialize_power_tune_defaults(hwmgr); 362 vega10_initialize_power_tune_defaults(hwmgr);
@@ -499,6 +500,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
499 (hw_revision == 0) && 500 (hw_revision == 0) &&
500 (sub_vendor_id != 0x1002)) 501 (sub_vendor_id != 0x1002))
501 data->smu_features[GNLD_PCC_LIMIT].supported = true; 502 data->smu_features[GNLD_PCC_LIMIT].supported = true;
503
504 /* Get the SN to turn into a Unique ID */
505 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
506 top32 = smum_get_argument(hwmgr);
507 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
508 bottom32 = smum_get_argument(hwmgr);
509
510 adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
502} 511}
503 512
504#ifdef PPLIB_VEGA10_EVV_SUPPORT 513#ifdef PPLIB_VEGA10_EVV_SUPPORT
@@ -2267,8 +2276,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
2267 pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1; 2276 pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1;
2268 pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2; 2277 pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2;
2269 pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB; 2278 pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB;
2270 pp_table->AcgAvfsGb.m1_shift = 0; 2279 pp_table->AcgAvfsGb.m1_shift = 24;
2271 pp_table->AcgAvfsGb.m2_shift = 0; 2280 pp_table->AcgAvfsGb.m2_shift = 12;
2272 pp_table->AcgAvfsGb.b_shift = 0; 2281 pp_table->AcgAvfsGb.b_shift = 0;
2273 2282
2274 } else { 2283 } else {
@@ -2364,6 +2373,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable)
2364 struct vega10_hwmgr *data = hwmgr->backend; 2373 struct vega10_hwmgr *data = hwmgr->backend;
2365 2374
2366 if (data->smu_features[GNLD_AVFS].supported) { 2375 if (data->smu_features[GNLD_AVFS].supported) {
2376 /* Already enabled or disabled */
2377 if (!(enable ^ data->smu_features[GNLD_AVFS].enabled))
2378 return 0;
2379
2367 if (enable) { 2380 if (enable) {
2368 PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, 2381 PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
2369 true, 2382 true,
@@ -2466,11 +2479,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
2466 return; 2479 return;
2467 } 2480 }
2468 } 2481 }
2469
2470 if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
2471 data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
2472 data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
2473 }
2474} 2482}
2475 2483
2476/** 2484/**
@@ -3683,6 +3691,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
3683 3691
3684 vega10_update_avfs(hwmgr); 3692 vega10_update_avfs(hwmgr);
3685 3693
3694 /*
3695 * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC.
3696 * That will help to keep AVFS disabled.
3697 */
3686 data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; 3698 data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
3687 3699
3688 return 0; 3700 return 0;
@@ -3785,6 +3797,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
3785 *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr); 3797 *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
3786 *size = 4; 3798 *size = 4;
3787 break; 3799 break;
3800 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
3801 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
3802 *((uint32_t *)value) = smum_get_argument(hwmgr) *
3803 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
3804 *size = 4;
3805 break;
3806 case AMDGPU_PP_SENSOR_MEM_TEMP:
3807 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
3808 *((uint32_t *)value) = smum_get_argument(hwmgr) *
3809 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
3810 *size = 4;
3811 break;
3788 case AMDGPU_PP_SENSOR_UVD_POWER: 3812 case AMDGPU_PP_SENSOR_UVD_POWER:
3789 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; 3813 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
3790 *size = 4; 3814 *size = 4;
@@ -4852,12 +4876,22 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
4852static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, 4876static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
4853 struct PP_TemperatureRange *thermal_data) 4877 struct PP_TemperatureRange *thermal_data)
4854{ 4878{
4855 struct phm_ppt_v2_information *table_info = 4879 struct vega10_hwmgr *data = hwmgr->backend;
4856 (struct phm_ppt_v2_information *)hwmgr->pptable; 4880 PPTable_t *pp_table = &(data->smc_state_table.pp_table);
4857 4881
4858 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); 4882 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
4859 4883
4860 thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp * 4884 thermal_data->max = pp_table->TedgeLimit *
4885 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4886 thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
4887 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4888 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
4889 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4890 thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
4891 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4892 thermal_data->mem_crit_max = pp_table->ThbmLimit *
4893 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4894 thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
4861 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4895 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4862 4896
4863 return 0; 4897 return 0;
@@ -4988,13 +5022,70 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
4988 return true; 5022 return true;
4989} 5023}
4990 5024
5025static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr)
5026{
5027 struct vega10_hwmgr *data = hwmgr->backend;
5028 struct pp_power_state *ps = hwmgr->request_ps;
5029 struct vega10_power_state *vega10_ps;
5030 struct vega10_single_dpm_table *gfx_dpm_table =
5031 &data->dpm_table.gfx_table;
5032 struct vega10_single_dpm_table *soc_dpm_table =
5033 &data->dpm_table.soc_table;
5034 struct vega10_single_dpm_table *mem_dpm_table =
5035 &data->dpm_table.mem_table;
5036 int max_level;
5037
5038 if (!ps)
5039 return;
5040
5041 vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
5042 max_level = vega10_ps->performance_level_count - 1;
5043
5044 if (vega10_ps->performance_levels[max_level].gfx_clock !=
5045 gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
5046 vega10_ps->performance_levels[max_level].gfx_clock =
5047 gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
5048
5049 if (vega10_ps->performance_levels[max_level].soc_clock !=
5050 soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
5051 vega10_ps->performance_levels[max_level].soc_clock =
5052 soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
5053
5054 if (vega10_ps->performance_levels[max_level].mem_clock !=
5055 mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
5056 vega10_ps->performance_levels[max_level].mem_clock =
5057 mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
5058
5059 if (!hwmgr->ps)
5060 return;
5061
5062 ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1));
5063 vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
5064 max_level = vega10_ps->performance_level_count - 1;
5065
5066 if (vega10_ps->performance_levels[max_level].gfx_clock !=
5067 gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
5068 vega10_ps->performance_levels[max_level].gfx_clock =
5069 gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
5070
5071 if (vega10_ps->performance_levels[max_level].soc_clock !=
5072 soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
5073 vega10_ps->performance_levels[max_level].soc_clock =
5074 soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
5075
5076 if (vega10_ps->performance_levels[max_level].mem_clock !=
5077 mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
5078 vega10_ps->performance_levels[max_level].mem_clock =
5079 mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
5080}
5081
4991static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, 5082static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
4992 enum PP_OD_DPM_TABLE_COMMAND type) 5083 enum PP_OD_DPM_TABLE_COMMAND type)
4993{ 5084{
4994 struct vega10_hwmgr *data = hwmgr->backend; 5085 struct vega10_hwmgr *data = hwmgr->backend;
4995 struct phm_ppt_v2_information *table_info = hwmgr->pptable; 5086 struct phm_ppt_v2_information *table_info = hwmgr->pptable;
4996 struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; 5087 struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk;
4997 struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; 5088 struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table;
4998 5089
4999 struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk = 5090 struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =
5000 &data->odn_dpm_table.vdd_dep_on_socclk; 5091 &data->odn_dpm_table.vdd_dep_on_socclk;
@@ -5018,7 +5109,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
5018 break; 5109 break;
5019 } 5110 }
5020 if (j == od_vddc_lookup_table->count) { 5111 if (j == od_vddc_lookup_table->count) {
5021 od_vddc_lookup_table->entries[j-1].us_vdd = 5112 j = od_vddc_lookup_table->count - 1;
5113 od_vddc_lookup_table->entries[j].us_vdd =
5022 podn_vdd_dep->entries[i].vddc; 5114 podn_vdd_dep->entries[i].vddc;
5023 data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; 5115 data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
5024 } 5116 }
@@ -5026,25 +5118,38 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
5026 } 5118 }
5027 dpm_table = &data->dpm_table.soc_table; 5119 dpm_table = &data->dpm_table.soc_table;
5028 for (i = 0; i < dep_table->count; i++) { 5120 for (i = 0; i < dep_table->count; i++) {
5029 if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && 5121 if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd &&
5030 dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { 5122 dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) {
5031 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; 5123 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
5032 podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; 5124 for (; (i < dep_table->count) &&
5033 dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; 5125 (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) {
5126 podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk;
5127 dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
5128 }
5129 break;
5130 } else {
5131 dpm_table->dpm_levels[i].value = dep_table->entries[i].clk;
5132 podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc;
5133 podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd;
5134 podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk;
5034 } 5135 }
5035 } 5136 }
5036 if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < 5137 if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
5037 podn_vdd_dep->entries[dep_table->count-1].clk) { 5138 podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) {
5038 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; 5139 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
5039 podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; 5140 podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk =
5040 dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; 5141 podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
5142 dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value =
5143 podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
5041 } 5144 }
5042 if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < 5145 if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
5043 podn_vdd_dep->entries[dep_table->count-1].vddInd) { 5146 podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) {
5044 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; 5147 data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
5045 podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; 5148 podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd =
5149 podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd;
5046 } 5150 }
5047 } 5151 }
5152 vega10_odn_update_power_state(hwmgr);
5048} 5153}
5049 5154
5050static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, 5155static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
@@ -5079,6 +5184,11 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
5079 } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { 5184 } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
5080 memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); 5185 memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));
5081 vega10_odn_initial_default_setting(hwmgr); 5186 vega10_odn_initial_default_setting(hwmgr);
5187 vega10_odn_update_power_state(hwmgr);
5188 /* force to update all clock tables */
5189 data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK |
5190 DPMTABLE_UPDATE_MCLK |
5191 DPMTABLE_UPDATE_SOCCLK;
5082 return 0; 5192 return 0;
5083 } else if (PP_OD_COMMIT_DPM_TABLE == type) { 5193 } else if (PP_OD_COMMIT_DPM_TABLE == type) {
5084 vega10_check_dpm_table_updated(hwmgr); 5194 vega10_check_dpm_table_updated(hwmgr);
@@ -5201,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
5201 5311
5202int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) 5312int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
5203{ 5313{
5314 struct amdgpu_device *adev = hwmgr->adev;
5315
5204 hwmgr->hwmgr_func = &vega10_hwmgr_funcs; 5316 hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
5205 hwmgr->pptable_func = &vega10_pptable_funcs; 5317 hwmgr->pptable_func = &vega10_pptable_funcs;
5318 if (amdgpu_passthrough(adev))
5319 return vega10_baco_set_cap(hwmgr);
5206 5320
5207 return 0; 5321 return 0;
5208} 5322}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d74dc85..83d22cdeaa29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
1371 1371
1372 return result; 1372 return result;
1373} 1373}
1374
1375int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
1376{
1377 int result = 0;
1378
1379 const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
1380
1381 powerplay_table = get_powerplay_table(hwmgr);
1382
1383 PP_ASSERT_WITH_CODE((powerplay_table != NULL),
1384 "Missing PowerPlay Table!", return -1);
1385
1386 result = check_powerplay_tables(hwmgr, powerplay_table);
1387
1388 PP_ASSERT_WITH_CODE((result == 0),
1389 "check_powerplay_tables failed", return result);
1390
1391 set_hw_cap(
1392 hwmgr,
1393 0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
1394 PHM_PlatformCaps_BACO);
1395 return result;
1396}
1397
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2af7aa3..da5fbec9b0cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
59extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index, 59extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
60 struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *, 60 struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
61 struct pp_power_state *, void *, uint32_t)); 61 struct pp_power_state *, void *, uint32_t));
62extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
62#endif 63#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 707cd4b0357f..efb6d3762feb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr)
289static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr) 289static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
290{ 290{
291 struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); 291 struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
292 struct amdgpu_device *adev = hwmgr->adev;
293 uint32_t top32, bottom32;
292 int i; 294 int i;
293 295
294 data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = 296 data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
353 ((data->registry_data.disallowed_features >> i) & 1) ? 355 ((data->registry_data.disallowed_features >> i) & 1) ?
354 false : true; 356 false : true;
355 } 357 }
358
359 /* Get the SN to turn into a Unique ID */
360 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
361 top32 = smum_get_argument(hwmgr);
362 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
363 bottom32 = smum_get_argument(hwmgr);
364
365 adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
356} 366}
357 367
358static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) 368static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -1237,21 +1247,39 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
1237 return (mem_clk * 100); 1247 return (mem_clk * 100);
1238} 1248}
1239 1249
1250static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table)
1251{
1252 struct vega12_hwmgr *data =
1253 (struct vega12_hwmgr *)(hwmgr->backend);
1254 int ret = 0;
1255
1256 if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) {
1257 ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table,
1258 TABLE_SMU_METRICS, true);
1259 if (ret) {
1260 pr_info("Failed to export SMU metrics table!\n");
1261 return ret;
1262 }
1263 memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t));
1264 data->metrics_time = jiffies;
1265 } else
1266 memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t));
1267
1268 return ret;
1269}
1270
1240static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) 1271static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
1241{ 1272{
1242#if 0 1273 SmuMetrics_t metrics_table;
1243 uint32_t value; 1274 int ret = 0;
1244 1275
1245 PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, 1276 ret = vega12_get_metrics_table(hwmgr, &metrics_table);
1246 PPSMC_MSG_GetCurrPkgPwr), 1277 if (ret)
1247 "Failed to get current package power!", 1278 return ret;
1248 return -EINVAL);
1249 1279
1250 value = smum_get_argument(hwmgr); 1280 *query = metrics_table.CurrSocketPower << 8;
1251 /* power value is an integer */ 1281
1252 *query = value << 8; 1282 return ret;
1253#endif
1254 return 0;
1255} 1283}
1256 1284
1257static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq) 1285static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
@@ -1290,25 +1318,27 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f
1290 1318
1291static int vega12_get_current_activity_percent( 1319static int vega12_get_current_activity_percent(
1292 struct pp_hwmgr *hwmgr, 1320 struct pp_hwmgr *hwmgr,
1321 int idx,
1293 uint32_t *activity_percent) 1322 uint32_t *activity_percent)
1294{ 1323{
1324 SmuMetrics_t metrics_table;
1295 int ret = 0; 1325 int ret = 0;
1296 uint32_t current_activity = 50;
1297 1326
1298#if 0 1327 ret = vega12_get_metrics_table(hwmgr, &metrics_table);
1299 ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0); 1328 if (ret)
1300 if (!ret) { 1329 return ret;
1301 current_activity = smum_get_argument(hwmgr); 1330
1302 if (current_activity > 100) { 1331 switch (idx) {
1303 PP_ASSERT(false, 1332 case AMDGPU_PP_SENSOR_GPU_LOAD:
1304 "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!"); 1333 *activity_percent = metrics_table.AverageGfxActivity;
1305 current_activity = 100; 1334 break;
1306 } 1335 case AMDGPU_PP_SENSOR_MEM_LOAD:
1307 } else 1336 *activity_percent = metrics_table.AverageUclkActivity;
1308 PP_ASSERT(false, 1337 break;
1309 "[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!"); 1338 default:
1310#endif 1339 pr_err("Invalid index for retrieving clock activity\n");
1311 *activity_percent = current_activity; 1340 return -EINVAL;
1341 }
1312 1342
1313 return ret; 1343 return ret;
1314} 1344}
@@ -1317,6 +1347,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
1317 void *value, int *size) 1347 void *value, int *size)
1318{ 1348{
1319 struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); 1349 struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
1350 SmuMetrics_t metrics_table;
1320 int ret = 0; 1351 int ret = 0;
1321 1352
1322 switch (idx) { 1353 switch (idx) {
@@ -1331,7 +1362,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
1331 *size = 4; 1362 *size = 4;
1332 break; 1363 break;
1333 case AMDGPU_PP_SENSOR_GPU_LOAD: 1364 case AMDGPU_PP_SENSOR_GPU_LOAD:
1334 ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value); 1365 case AMDGPU_PP_SENSOR_MEM_LOAD:
1366 ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
1335 if (!ret) 1367 if (!ret)
1336 *size = 4; 1368 *size = 4;
1337 break; 1369 break;
@@ -1339,6 +1371,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
1339 *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr); 1371 *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
1340 *size = 4; 1372 *size = 4;
1341 break; 1373 break;
1374 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
1375 ret = vega12_get_metrics_table(hwmgr, &metrics_table);
1376 if (ret)
1377 return ret;
1378
1379 *((uint32_t *)value) = metrics_table.TemperatureHotspot *
1380 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1381 *size = 4;
1382 break;
1383 case AMDGPU_PP_SENSOR_MEM_TEMP:
1384 ret = vega12_get_metrics_table(hwmgr, &metrics_table);
1385 if (ret)
1386 return ret;
1387
1388 *((uint32_t *)value) = metrics_table.TemperatureHBM *
1389 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1390 *size = 4;
1391 break;
1342 case AMDGPU_PP_SENSOR_UVD_POWER: 1392 case AMDGPU_PP_SENSOR_UVD_POWER:
1343 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; 1393 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
1344 *size = 4; 1394 *size = 4;
@@ -1349,6 +1399,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
1349 break; 1399 break;
1350 case AMDGPU_PP_SENSOR_GPU_POWER: 1400 case AMDGPU_PP_SENSOR_GPU_POWER:
1351 ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); 1401 ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
1402 if (!ret)
1403 *size = 4;
1352 break; 1404 break;
1353 case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: 1405 case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK:
1354 ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value); 1406 ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value);
@@ -2526,12 +2578,23 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
2526static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, 2578static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
2527 struct PP_TemperatureRange *thermal_data) 2579 struct PP_TemperatureRange *thermal_data)
2528{ 2580{
2529 struct phm_ppt_v3_information *pptable_information = 2581 struct vega12_hwmgr *data =
2530 (struct phm_ppt_v3_information *)hwmgr->pptable; 2582 (struct vega12_hwmgr *)(hwmgr->backend);
2583 PPTable_t *pp_table = &(data->smc_state_table.pp_table);
2531 2584
2532 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); 2585 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
2533 2586
2534 thermal_data->max = pptable_information->us_software_shutdown_temp * 2587 thermal_data->max = pp_table->TedgeLimit *
2588 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2589 thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
2590 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2591 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
2592 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2593 thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
2594 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2595 thermal_data->mem_crit_max = pp_table->ThbmLimit *
2596 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2597 thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
2535 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2598 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2536 2599
2537 return 0; 2600 return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index b3e424d28994..73875399666a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -396,6 +396,9 @@ struct vega12_hwmgr {
396 396
397 /* ---- Gfxoff ---- */ 397 /* ---- Gfxoff ---- */
398 bool gfxoff_controlled_by_driver; 398 bool gfxoff_controlled_by_driver;
399
400 unsigned long metrics_time;
401 SmuMetrics_t metrics_table;
399}; 402};
400 403
401#define VEGA12_DPM2_NEAR_TDP_DEC 10 404#define VEGA12_DPM2_NEAR_TDP_DEC 10
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9b9f87b84910..f27c6fbb192e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
97 if (hwmgr->smu_version < 0x282100) 97 if (hwmgr->smu_version < 0x282100)
98 data->registry_data.disallowed_features |= FEATURE_ECC_MASK; 98 data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
99 99
100 if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK))
101 data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK;
102
103 if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK))
104 data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK;
105
106 if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK))
107 data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK;
108
109 if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK))
110 data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK;
111
112 if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK))
113 data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK;
114
115 if (!(hwmgr->feature_mask & PP_ULV_MASK))
116 data->registry_data.disallowed_features |= FEATURE_ULV_MASK;
117
118 if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK))
119 data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK;
120
100 data->registry_data.od_state_in_dc_support = 0; 121 data->registry_data.od_state_in_dc_support = 0;
101 data->registry_data.thermal_support = 1; 122 data->registry_data.thermal_support = 1;
102 data->registry_data.skip_baco_hardware = 0; 123 data->registry_data.skip_baco_hardware = 0;
@@ -303,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)
303static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) 324static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
304{ 325{
305 struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); 326 struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
327 struct amdgpu_device *adev = hwmgr->adev;
328 uint32_t top32, bottom32;
306 int i; 329 int i;
307 330
308 data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = 331 data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -372,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
372 ((data->registry_data.disallowed_features >> i) & 1) ? 395 ((data->registry_data.disallowed_features >> i) & 1) ?
373 false : true; 396 false : true;
374 } 397 }
398
399 /* Get the SN to turn into a Unique ID */
400 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
401 top32 = smum_get_argument(hwmgr);
402 smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
403 bottom32 = smum_get_argument(hwmgr);
404
405 adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
375} 406}
376 407
377static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) 408static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -2094,6 +2125,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
2094} 2125}
2095 2126
2096static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, 2127static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
2128 int idx,
2097 uint32_t *activity_percent) 2129 uint32_t *activity_percent)
2098{ 2130{
2099 int ret = 0; 2131 int ret = 0;
@@ -2103,7 +2135,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
2103 if (ret) 2135 if (ret)
2104 return ret; 2136 return ret;
2105 2137
2106 *activity_percent = metrics_table.AverageGfxActivity; 2138 switch (idx) {
2139 case AMDGPU_PP_SENSOR_GPU_LOAD:
2140 *activity_percent = metrics_table.AverageGfxActivity;
2141 break;
2142 case AMDGPU_PP_SENSOR_MEM_LOAD:
2143 *activity_percent = metrics_table.AverageUclkActivity;
2144 break;
2145 default:
2146 pr_err("Invalid index for retrieving clock activity\n");
2147 return -EINVAL;
2148 }
2107 2149
2108 return ret; 2150 return ret;
2109} 2151}
@@ -2134,14 +2176,33 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
2134 *size = 4; 2176 *size = 4;
2135 break; 2177 break;
2136 case AMDGPU_PP_SENSOR_GPU_LOAD: 2178 case AMDGPU_PP_SENSOR_GPU_LOAD:
2137 ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value); 2179 case AMDGPU_PP_SENSOR_MEM_LOAD:
2180 ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
2138 if (!ret) 2181 if (!ret)
2139 *size = 4; 2182 *size = 4;
2140 break; 2183 break;
2141 case AMDGPU_PP_SENSOR_GPU_TEMP: 2184 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
2142 *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr); 2185 *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
2143 *size = 4; 2186 *size = 4;
2144 break; 2187 break;
2188 case AMDGPU_PP_SENSOR_EDGE_TEMP:
2189 ret = vega20_get_metrics_table(hwmgr, &metrics_table);
2190 if (ret)
2191 return ret;
2192
2193 *((uint32_t *)value) = metrics_table.TemperatureEdge *
2194 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2195 *size = 4;
2196 break;
2197 case AMDGPU_PP_SENSOR_MEM_TEMP:
2198 ret = vega20_get_metrics_table(hwmgr, &metrics_table);
2199 if (ret)
2200 return ret;
2201
2202 *((uint32_t *)value) = metrics_table.TemperatureHBM *
2203 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
2204 *size = 4;
2205 break;
2145 case AMDGPU_PP_SENSOR_UVD_POWER: 2206 case AMDGPU_PP_SENSOR_UVD_POWER:
2146 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; 2207 *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
2147 *size = 4; 2208 *size = 4;
@@ -3974,12 +4035,23 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
3974static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, 4035static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
3975 struct PP_TemperatureRange *thermal_data) 4036 struct PP_TemperatureRange *thermal_data)
3976{ 4037{
3977 struct phm_ppt_v3_information *pptable_information = 4038 struct vega20_hwmgr *data =
3978 (struct phm_ppt_v3_information *)hwmgr->pptable; 4039 (struct vega20_hwmgr *)(hwmgr->backend);
4040 PPTable_t *pp_table = &(data->smc_state_table.pp_table);
3979 4041
3980 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); 4042 memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
3981 4043
3982 thermal_data->max = pptable_information->us_software_shutdown_temp * 4044 thermal_data->max = pp_table->TedgeLimit *
4045 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4046 thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
4047 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4048 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
4049 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4050 thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
4051 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4052 thermal_data->mem_crit_max = pp_table->ThbmLimit *
4053 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
4054 thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
3983 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4055 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
3984 4056
3985 return 0; 4057 return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index c8b168b3413b..3eb1de9ecf73 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -401,8 +401,12 @@ struct smu_context
401 uint32_t workload_setting[WORKLOAD_POLICY_MAX]; 401 uint32_t workload_setting[WORKLOAD_POLICY_MAX];
402 uint32_t power_profile_mode; 402 uint32_t power_profile_mode;
403 uint32_t default_power_profile_mode; 403 uint32_t default_power_profile_mode;
404 bool pm_enabled;
404 405
405 uint32_t smc_if_version; 406 uint32_t smc_if_version;
407
408 unsigned long metrics_time;
409 void *metrics_table;
406}; 410};
407 411
408struct pptable_funcs { 412struct pptable_funcs {
@@ -458,6 +462,8 @@ struct pptable_funcs {
458 uint32_t *mclk_mask, 462 uint32_t *mclk_mask,
459 uint32_t *soc_mask); 463 uint32_t *soc_mask);
460 int (*set_cpu_power_state)(struct smu_context *smu); 464 int (*set_cpu_power_state)(struct smu_context *smu);
465 int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures);
466 int (*get_ppfeature_status)(struct smu_context *smu, char *buf);
461}; 467};
462 468
463struct smu_funcs 469struct smu_funcs
@@ -727,7 +733,10 @@ struct smu_funcs
727 ((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0) 733 ((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0)
728#define smu_set_xgmi_pstate(smu, pstate) \ 734#define smu_set_xgmi_pstate(smu, pstate) \
729 ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) 735 ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0)
730 736#define smu_set_ppfeature_status(smu, ppfeatures) \
737 ((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL)
738#define smu_get_ppfeature_status(smu, buf) \
739 ((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL)
731 740
732extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, 741extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
733 uint16_t *size, uint8_t *frev, uint8_t *crev, 742 uint16_t *size, uint8_t *frev, uint8_t *crev,
@@ -767,4 +776,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b
767extern int smu_handle_task(struct smu_context *smu, 776extern int smu_handle_task(struct smu_context *smu,
768 enum amd_dpm_forced_level level, 777 enum amd_dpm_forced_level level,
769 enum amd_pp_task task_id); 778 enum amd_pp_task task_id);
779int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version);
770#endif 780#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a99b5cbb113e..a5f2227a3971 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,6 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
124struct PP_TemperatureRange { 124struct PP_TemperatureRange {
125 int min; 125 int min;
126 int max; 126 int max;
127 int edge_emergency_max;
128 int hotspot_min;
129 int hotspot_crit_max;
130 int hotspot_emergency_max;
131 int mem_min;
132 int mem_crit_max;
133 int mem_emergency_max;
127}; 134};
128 135
129struct PP_StateValidationBlock { 136struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 201d2b6329ab..3e30768f9e1c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,18 @@
27 27
28static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = 28static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
29{ 29{
30 {-273150, 99000}, 30 {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
31 { 120000, 120000}, 31 { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
32}; 32};
33 33
34static const struct PP_TemperatureRange SMU7ThermalPolicy[] = 34static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
35{ 35{
36 {-273150, 99000}, 36 {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
37 { 120000, 120000}, 37 { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
38}; 38};
39 39
40#define CTF_OFFSET_EDGE 5
41#define CTF_OFFSET_HOTSPOT 5
42#define CTF_OFFSET_HBM 5
43
40#endif 44#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index aa8d81f4111e..02c965d64256 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -36,6 +36,9 @@
36#define smnMP0_FW_INTF 0x30101c0 36#define smnMP0_FW_INTF 0x30101c0
37#define smnMP1_PUB_CTRL 0x3010b14 37#define smnMP1_PUB_CTRL 0x3010b14
38 38
39#define TEMP_RANGE_MIN (0)
40#define TEMP_RANGE_MAX (80 * 1000)
41
39struct smu_11_0_max_sustainable_clocks { 42struct smu_11_0_max_sustainable_clocks {
40 uint32_t display_clock; 43 uint32_t display_clock;
41 uint32_t phy_clock; 44 uint32_t phy_clock;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 82550a8a3a3f..c5288831aa15 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -41,6 +41,7 @@ enum SMU_MEMBER {
41 HandshakeDisables = 0, 41 HandshakeDisables = 0,
42 VoltageChangeTimeout, 42 VoltageChangeTimeout,
43 AverageGraphicsActivity, 43 AverageGraphicsActivity,
44 AverageMemoryActivity,
44 PreVBlankGap, 45 PreVBlankGap,
45 VBlankTimeout, 46 VBlankTimeout,
46 UcodeLoadStatus, 47 UcodeLoadStatus,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 92903a4cc4d8..d2eeb6240484 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -223,20 +223,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu)
223 223
224static int smu_v11_0_check_fw_version(struct smu_context *smu) 224static int smu_v11_0_check_fw_version(struct smu_context *smu)
225{ 225{
226 uint32_t smu_version = 0xff; 226 uint32_t if_version = 0xff, smu_version = 0xff;
227 uint16_t smu_major;
228 uint8_t smu_minor, smu_debug;
227 int ret = 0; 229 int ret = 0;
228 230
229 ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); 231 ret = smu_get_smc_version(smu, &if_version, &smu_version);
230 if (ret) 232 if (ret)
231 goto err; 233 return ret;
232 234
233 ret = smu_read_smc_arg(smu, &smu_version); 235 smu_major = (smu_version >> 16) & 0xffff;
234 if (ret) 236 smu_minor = (smu_version >> 8) & 0xff;
235 goto err; 237 smu_debug = (smu_version >> 0) & 0xff;
238
239 pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n",
240 if_version, smu_version, smu_major, smu_minor, smu_debug);
236 241
237 if (smu_version != smu->smc_if_version) 242 if (if_version != smu->smc_if_version) {
243 pr_err("SMU driver if version not matched\n");
238 ret = -EINVAL; 244 ret = -EINVAL;
239err: 245 }
246
240 return ret; 247 return ret;
241} 248}
242 249
@@ -353,6 +360,8 @@ static int smu_v11_0_init_power(struct smu_context *smu)
353{ 360{
354 struct smu_power_context *smu_power = &smu->smu_power; 361 struct smu_power_context *smu_power = &smu->smu_power;
355 362
363 if (!smu->pm_enabled)
364 return 0;
356 if (smu_power->power_context || smu_power->power_context_size != 0) 365 if (smu_power->power_context || smu_power->power_context_size != 0)
357 return -EINVAL; 366 return -EINVAL;
358 367
@@ -362,6 +371,13 @@ static int smu_v11_0_init_power(struct smu_context *smu)
362 return -ENOMEM; 371 return -ENOMEM;
363 smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context); 372 smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context);
364 373
374 smu->metrics_time = 0;
375 smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
376 if (!smu->metrics_table) {
377 kfree(smu_power->power_context);
378 return -ENOMEM;
379 }
380
365 return 0; 381 return 0;
366} 382}
367 383
@@ -369,10 +385,14 @@ static int smu_v11_0_fini_power(struct smu_context *smu)
369{ 385{
370 struct smu_power_context *smu_power = &smu->smu_power; 386 struct smu_power_context *smu_power = &smu->smu_power;
371 387
388 if (!smu->pm_enabled)
389 return 0;
372 if (!smu_power->power_context || smu_power->power_context_size == 0) 390 if (!smu_power->power_context || smu_power->power_context_size == 0)
373 return -EINVAL; 391 return -EINVAL;
374 392
393 kfree(smu->metrics_table);
375 kfree(smu_power->power_context); 394 kfree(smu_power->power_context);
395 smu->metrics_table = NULL;
376 smu_power->power_context = NULL; 396 smu_power->power_context = NULL;
377 smu_power->power_context_size = 0; 397 smu_power->power_context_size = 0;
378 398
@@ -634,6 +654,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
634{ 654{
635 struct smu_table_context *table_context = &smu->smu_table; 655 struct smu_table_context *table_context = &smu->smu_table;
636 656
657 if (!smu->pm_enabled)
658 return 0;
637 if (!table_context) 659 if (!table_context)
638 return -EINVAL; 660 return -EINVAL;
639 661
@@ -662,6 +684,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu)
662static int smu_v11_0_init_display(struct smu_context *smu) 684static int smu_v11_0_init_display(struct smu_context *smu)
663{ 685{
664 int ret = 0; 686 int ret = 0;
687
688 if (!smu->pm_enabled)
689 return ret;
665 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0); 690 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0);
666 return ret; 691 return ret;
667} 692}
@@ -671,6 +696,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32
671 uint32_t feature_low = 0, feature_high = 0; 696 uint32_t feature_low = 0, feature_high = 0;
672 int ret = 0; 697 int ret = 0;
673 698
699 if (!smu->pm_enabled)
700 return ret;
674 if (feature_id >= 0 && feature_id < 31) 701 if (feature_id >= 0 && feature_id < 31)
675 feature_low = (1 << feature_id); 702 feature_low = (1 << feature_id);
676 else if (feature_id > 31 && feature_id < 63) 703 else if (feature_id > 31 && feature_id < 63)
@@ -777,10 +804,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu,
777 uint32_t feature_mask[2]; 804 uint32_t feature_mask[2];
778 int ret = 0; 805 int ret = 0;
779 806
780 ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : 807 if (smu->pm_enabled) {
781 SMU_MSG_DisableAllSmuFeatures)); 808 ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
782 if (ret) 809 SMU_MSG_DisableAllSmuFeatures));
783 return ret; 810 if (ret)
811 return ret;
812 }
813
784 ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); 814 ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
785 if (ret) 815 if (ret)
786 return ret; 816 return ret;
@@ -797,6 +827,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu)
797{ 827{
798 int ret = 0; 828 int ret = 0;
799 829
830 if (!smu->pm_enabled)
831 return ret;
800 if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) 832 if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT))
801 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1); 833 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1);
802 834
@@ -809,6 +841,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
809{ 841{
810 int ret = 0; 842 int ret = 0;
811 843
844 if (!smu->pm_enabled)
845 return ret;
812 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq, 846 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq,
813 clock_select << 16); 847 clock_select << 16);
814 if (ret) { 848 if (ret) {
@@ -995,9 +1029,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_
995static int smu_v11_0_get_thermal_range(struct smu_context *smu, 1029static int smu_v11_0_get_thermal_range(struct smu_context *smu,
996 struct PP_TemperatureRange *range) 1030 struct PP_TemperatureRange *range)
997{ 1031{
1032 PPTable_t *pptable = smu->smu_table.driver_pptable;
998 memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); 1033 memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
999 1034
1000 range->max = smu->smu_table.software_shutdown_temp * 1035 range->max = pptable->TedgeLimit *
1036 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1037 range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) *
1038 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1039 range->hotspot_crit_max = pptable->ThotspotLimit *
1040 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1041 range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
1042 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1043 range->mem_crit_max = pptable->ThbmLimit *
1044 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1045 range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)*
1001 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 1046 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1002 1047
1003 return 0; 1048 return 0;
@@ -1062,9 +1107,20 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu)
1062static int smu_v11_0_start_thermal_control(struct smu_context *smu) 1107static int smu_v11_0_start_thermal_control(struct smu_context *smu)
1063{ 1108{
1064 int ret = 0; 1109 int ret = 0;
1065 struct PP_TemperatureRange range; 1110 struct PP_TemperatureRange range = {
1111 TEMP_RANGE_MIN,
1112 TEMP_RANGE_MAX,
1113 TEMP_RANGE_MAX,
1114 TEMP_RANGE_MIN,
1115 TEMP_RANGE_MAX,
1116 TEMP_RANGE_MAX,
1117 TEMP_RANGE_MIN,
1118 TEMP_RANGE_MAX,
1119 TEMP_RANGE_MAX};
1066 struct amdgpu_device *adev = smu->adev; 1120 struct amdgpu_device *adev = smu->adev;
1067 1121
1122 if (!smu->pm_enabled)
1123 return ret;
1068 smu_v11_0_get_thermal_range(smu, &range); 1124 smu_v11_0_get_thermal_range(smu, &range);
1069 1125
1070 if (smu->smu_table.thermal_controller_type) { 1126 if (smu->smu_table.thermal_controller_type) {
@@ -1082,11 +1138,39 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
1082 1138
1083 adev->pm.dpm.thermal.min_temp = range.min; 1139 adev->pm.dpm.thermal.min_temp = range.min;
1084 adev->pm.dpm.thermal.max_temp = range.max; 1140 adev->pm.dpm.thermal.max_temp = range.max;
1141 adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
1142 adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
1143 adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
1144 adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
1145 adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
1146 adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
1147 adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
1148
1149 return ret;
1150}
1151
1152static int smu_v11_0_get_metrics_table(struct smu_context *smu,
1153 SmuMetrics_t *metrics_table)
1154{
1155 int ret = 0;
1156
1157 if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) {
1158 ret = smu_update_table(smu, TABLE_SMU_METRICS,
1159 (void *)metrics_table, false);
1160 if (ret) {
1161 pr_info("Failed to export SMU metrics table!\n");
1162 return ret;
1163 }
1164 memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t));
1165 smu->metrics_time = jiffies;
1166 } else
1167 memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t));
1085 1168
1086 return ret; 1169 return ret;
1087} 1170}
1088 1171
1089static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, 1172static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
1173 enum amd_pp_sensors sensor,
1090 uint32_t *value) 1174 uint32_t *value)
1091{ 1175{
1092 int ret = 0; 1176 int ret = 0;
@@ -1095,31 +1179,64 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
1095 if (!value) 1179 if (!value)
1096 return -EINVAL; 1180 return -EINVAL;
1097 1181
1098 ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); 1182 ret = smu_v11_0_get_metrics_table(smu, &metrics);
1099 if (ret) 1183 if (ret)
1100 return ret; 1184 return ret;
1101 1185
1102 *value = metrics.AverageGfxActivity; 1186 switch (sensor) {
1187 case AMDGPU_PP_SENSOR_GPU_LOAD:
1188 *value = metrics.AverageGfxActivity;
1189 break;
1190 case AMDGPU_PP_SENSOR_MEM_LOAD:
1191 *value = metrics.AverageUclkActivity;
1192 break;
1193 default:
1194 pr_err("Invalid sensor for retrieving clock activity\n");
1195 return -EINVAL;
1196 }
1103 1197
1104 return 0; 1198 return 0;
1105} 1199}
1106 1200
1107static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value) 1201static int smu_v11_0_thermal_get_temperature(struct smu_context *smu,
1202 enum amd_pp_sensors sensor,
1203 uint32_t *value)
1108{ 1204{
1109 struct amdgpu_device *adev = smu->adev; 1205 struct amdgpu_device *adev = smu->adev;
1206 SmuMetrics_t metrics;
1110 uint32_t temp = 0; 1207 uint32_t temp = 0;
1208 int ret = 0;
1111 1209
1112 if (!value) 1210 if (!value)
1113 return -EINVAL; 1211 return -EINVAL;
1114 1212
1115 temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); 1213 ret = smu_v11_0_get_metrics_table(smu, &metrics);
1116 temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> 1214 if (ret)
1117 CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; 1215 return ret;
1118 1216
1119 temp = temp & 0x1ff; 1217 switch (sensor) {
1120 temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; 1218 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
1219 temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
1220 temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
1221 CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
1222
1223 temp = temp & 0x1ff;
1224 temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES;
1121 1225
1122 *value = temp; 1226 *value = temp;
1227 break;
1228 case AMDGPU_PP_SENSOR_EDGE_TEMP:
1229 *value = metrics.TemperatureEdge *
1230 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1231 break;
1232 case AMDGPU_PP_SENSOR_MEM_TEMP:
1233 *value = metrics.TemperatureHBM *
1234 PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
1235 break;
1236 default:
1237 pr_err("Invalid sensor for retrieving temp\n");
1238 return -EINVAL;
1239 }
1123 1240
1124 return 0; 1241 return 0;
1125} 1242}
@@ -1132,7 +1249,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value)
1132 if (!value) 1249 if (!value)
1133 return -EINVAL; 1250 return -EINVAL;
1134 1251
1135 ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); 1252 ret = smu_v11_0_get_metrics_table(smu, &metrics);
1136 if (ret) 1253 if (ret)
1137 return ret; 1254 return ret;
1138 1255
@@ -1174,7 +1291,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
1174 int ret = 0; 1291 int ret = 0;
1175 switch (sensor) { 1292 switch (sensor) {
1176 case AMDGPU_PP_SENSOR_GPU_LOAD: 1293 case AMDGPU_PP_SENSOR_GPU_LOAD:
1294 case AMDGPU_PP_SENSOR_MEM_LOAD:
1177 ret = smu_v11_0_get_current_activity_percent(smu, 1295 ret = smu_v11_0_get_current_activity_percent(smu,
1296 sensor,
1178 (uint32_t *)data); 1297 (uint32_t *)data);
1179 *size = 4; 1298 *size = 4;
1180 break; 1299 break;
@@ -1186,8 +1305,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
1186 ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data); 1305 ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data);
1187 *size = 4; 1306 *size = 4;
1188 break; 1307 break;
1189 case AMDGPU_PP_SENSOR_GPU_TEMP: 1308 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
1190 ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data); 1309 case AMDGPU_PP_SENSOR_EDGE_TEMP:
1310 case AMDGPU_PP_SENSOR_MEM_TEMP:
1311 ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data);
1191 *size = 4; 1312 *size = 4;
1192 break; 1313 break;
1193 case AMDGPU_PP_SENSOR_GPU_POWER: 1314 case AMDGPU_PP_SENSOR_GPU_POWER:
@@ -1235,6 +1356,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
1235 PPCLK_e clk_select = 0; 1356 PPCLK_e clk_select = 0;
1236 uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; 1357 uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
1237 1358
1359 if (!smu->pm_enabled)
1360 return -EINVAL;
1238 if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { 1361 if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) {
1239 switch (clk_type) { 1362 switch (clk_type) {
1240 case amd_pp_dcef_clock: 1363 case amd_pp_dcef_clock:
@@ -1518,7 +1641,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf)
1518 "PD_Data_error_rate_coeff"}; 1641 "PD_Data_error_rate_coeff"};
1519 int result = 0; 1642 int result = 0;
1520 1643
1521 if (!buf) 1644 if (!smu->pm_enabled || !buf)
1522 return -EINVAL; 1645 return -EINVAL;
1523 1646
1524 size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", 1647 size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
@@ -1605,6 +1728,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input
1605 1728
1606 smu->power_profile_mode = input[size]; 1729 smu->power_profile_mode = input[size];
1607 1730
1731 if (!smu->pm_enabled)
1732 return ret;
1608 if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { 1733 if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
1609 pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); 1734 pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);
1610 return -EINVAL; 1735 return -EINVAL;
@@ -1710,24 +1835,24 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu,
1710 1835
1711static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable) 1836static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
1712{ 1837{
1713 if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) 1838 if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))
1714 return 0; 1839 return 0;
1715 1840
1716 if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) 1841 if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))
1717 return 0; 1842 return 0;
1718 1843
1719 return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); 1844 return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);
1720} 1845}
1721 1846
1722static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) 1847static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable)
1723{ 1848{
1724 if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) 1849 if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))
1725 return 0; 1850 return 0;
1726 1851
1727 if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) 1852 if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))
1728 return 0; 1853 return 0;
1729 1854
1730 return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); 1855 return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);
1731} 1856}
1732 1857
1733static int smu_v11_0_get_current_rpm(struct smu_context *smu, 1858static int smu_v11_0_get_current_rpm(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 669bd0c2a16c..9ef57fcf7e78 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2254,6 +2254,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member)
2254 return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout); 2254 return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout);
2255 case AverageGraphicsActivity: 2255 case AverageGraphicsActivity:
2256 return offsetof(SMU7_SoftRegisters, AverageGraphicsA); 2256 return offsetof(SMU7_SoftRegisters, AverageGraphicsA);
2257 case AverageMemoryActivity:
2258 return offsetof(SMU7_SoftRegisters, AverageMemoryA);
2257 case PreVBlankGap: 2259 case PreVBlankGap:
2258 return offsetof(SMU7_SoftRegisters, PreVBlankGap); 2260 return offsetof(SMU7_SoftRegisters, PreVBlankGap);
2259 case VBlankTimeout: 2261 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index bc8375cbf297..0ce85b73338e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member)
2304 return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout); 2304 return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout);
2305 case AverageGraphicsActivity: 2305 case AverageGraphicsActivity:
2306 return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); 2306 return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity);
2307 case AverageMemoryActivity:
2308 return offsetof(SMU73_SoftRegisters, AverageMemoryActivity);
2307 case PreVBlankGap: 2309 case PreVBlankGap:
2308 return offsetof(SMU73_SoftRegisters, PreVBlankGap); 2310 return offsetof(SMU73_SoftRegisters, PreVBlankGap);
2309 case VBlankTimeout: 2311 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index 375ccf6ff5f2..f24f13d77808 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -2219,6 +2219,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member)
2219 return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout); 2219 return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout);
2220 case AverageGraphicsActivity: 2220 case AverageGraphicsActivity:
2221 return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); 2221 return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity);
2222 case AverageMemoryActivity:
2223 return offsetof(SMU71_SoftRegisters, AverageMemoryActivity);
2222 case PreVBlankGap: 2224 case PreVBlankGap:
2223 return offsetof(SMU71_SoftRegisters, PreVBlankGap); 2225 return offsetof(SMU71_SoftRegisters, PreVBlankGap);
2224 case VBlankTimeout: 2226 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2d4cfe14f72e..0d8958e71b94 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -2313,6 +2313,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)
2313 return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout); 2313 return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout);
2314 case AverageGraphicsActivity: 2314 case AverageGraphicsActivity:
2315 return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); 2315 return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity);
2316 case AverageMemoryActivity:
2317 return offsetof(SMU74_SoftRegisters, AverageMemoryActivity);
2316 case PreVBlankGap: 2318 case PreVBlankGap:
2317 return offsetof(SMU74_SoftRegisters, PreVBlankGap); 2319 return offsetof(SMU74_SoftRegisters, PreVBlankGap);
2318 case VBlankTimeout: 2320 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 3ed6c5f1e5cf..060c0f7f5238 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -2611,6 +2611,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member)
2611 return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout); 2611 return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout);
2612 case AverageGraphicsActivity: 2612 case AverageGraphicsActivity:
2613 return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); 2613 return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
2614 case AverageMemoryActivity:
2615 return offsetof(SMU72_SoftRegisters, AverageMemoryActivity);
2614 case PreVBlankGap: 2616 case PreVBlankGap:
2615 return offsetof(SMU72_SoftRegisters, PreVBlankGap); 2617 return offsetof(SMU72_SoftRegisters, PreVBlankGap);
2616 case VBlankTimeout: 2618 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index ddb801517667..1eaf0fa28ef7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
287 priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01; 287 priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;
288 priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t); 288 priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t);
289 289
290 /* allocate space for SMU_METRICS table */
291 ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
292 sizeof(SmuMetrics_t),
293 PAGE_SIZE,
294 AMDGPU_GEM_DOMAIN_VRAM,
295 &priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
296 &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
297 &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
298 if (ret)
299 goto err4;
300
301 priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01;
302 priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t);
303
290 return 0; 304 return 0;
291 305
306err4:
307 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
308 &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
309 &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
292err3: 310err3:
293 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle, 311 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,
294 &priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr, 312 &priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr,
@@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)
334 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, 352 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
335 &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, 353 &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
336 &priv->smu_tables.entry[TABLE_OVERDRIVE].table); 354 &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
355 amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
356 &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
357 &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
337 kfree(hwmgr->smu_backend); 358 kfree(hwmgr->smu_backend);
338 hwmgr->smu_backend = NULL; 359 hwmgr->smu_backend = NULL;
339 } 360 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index 1e69300f6175..d499204b2184 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)
2167 return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout); 2167 return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);
2168 case AverageGraphicsActivity: 2168 case AverageGraphicsActivity:
2169 return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); 2169 return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity);
2170 case AverageMemoryActivity:
2171 return offsetof(SMU75_SoftRegisters, AverageMemoryActivity);
2170 case PreVBlankGap: 2172 case PreVBlankGap:
2171 return offsetof(SMU75_SoftRegisters, PreVBlankGap); 2173 return offsetof(SMU75_SoftRegisters, PreVBlankGap);
2172 case VBlankTimeout: 2174 case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 8fafcbdb1dfd..4aa8f5a69c4c 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu,
2374 return ret; 2374 return ret;
2375} 2375}
2376 2376
2377static int vega20_get_enabled_smc_features(struct smu_context *smu,
2378 uint64_t *features_enabled)
2379{
2380 uint32_t feature_mask[2] = {0, 0};
2381 int ret = 0;
2382
2383 ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
2384 if (ret)
2385 return ret;
2386
2387 *features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) |
2388 (((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK));
2389
2390 return ret;
2391}
2392
2393static int vega20_enable_smc_features(struct smu_context *smu,
2394 bool enable, uint64_t feature_mask)
2395{
2396 uint32_t smu_features_low, smu_features_high;
2397 int ret = 0;
2398
2399 smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT);
2400 smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT);
2401
2402 if (enable) {
2403 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow,
2404 smu_features_low);
2405 if (ret)
2406 return ret;
2407 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh,
2408 smu_features_high);
2409 if (ret)
2410 return ret;
2411 } else {
2412 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow,
2413 smu_features_low);
2414 if (ret)
2415 return ret;
2416 ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh,
2417 smu_features_high);
2418 if (ret)
2419 return ret;
2420 }
2421
2422 return 0;
2423
2424}
2425
2426static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf)
2427{
2428 static const char *ppfeature_name[] = {
2429 "DPM_PREFETCHER",
2430 "GFXCLK_DPM",
2431 "UCLK_DPM",
2432 "SOCCLK_DPM",
2433 "UVD_DPM",
2434 "VCE_DPM",
2435 "ULV",
2436 "MP0CLK_DPM",
2437 "LINK_DPM",
2438 "DCEFCLK_DPM",
2439 "GFXCLK_DS",
2440 "SOCCLK_DS",
2441 "LCLK_DS",
2442 "PPT",
2443 "TDC",
2444 "THERMAL",
2445 "GFX_PER_CU_CG",
2446 "RM",
2447 "DCEFCLK_DS",
2448 "ACDC",
2449 "VR0HOT",
2450 "VR1HOT",
2451 "FW_CTF",
2452 "LED_DISPLAY",
2453 "FAN_CONTROL",
2454 "GFX_EDC",
2455 "GFXOFF",
2456 "CG",
2457 "FCLK_DPM",
2458 "FCLK_DS",
2459 "MP1CLK_DS",
2460 "MP0CLK_DS",
2461 "XGMI",
2462 "ECC"};
2463 static const char *output_title[] = {
2464 "FEATURES",
2465 "BITMASK",
2466 "ENABLEMENT"};
2467 uint64_t features_enabled;
2468 int i;
2469 int ret = 0;
2470 int size = 0;
2471
2472 ret = vega20_get_enabled_smc_features(smu, &features_enabled);
2473 if (ret)
2474 return ret;
2475
2476 size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled);
2477 size += sprintf(buf + size, "%-19s %-22s %s\n",
2478 output_title[0],
2479 output_title[1],
2480 output_title[2]);
2481 for (i = 0; i < GNLD_FEATURES_MAX; i++) {
2482 size += sprintf(buf + size, "%-19s 0x%016llx %6s\n",
2483 ppfeature_name[i],
2484 1ULL << i,
2485 (features_enabled & (1ULL << i)) ? "Y" : "N");
2486 }
2487
2488 return size;
2489}
2490
2491static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks)
2492{
2493 uint64_t features_enabled;
2494 uint64_t features_to_enable;
2495 uint64_t features_to_disable;
2496 int ret = 0;
2497
2498 if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX))
2499 return -EINVAL;
2500
2501 ret = vega20_get_enabled_smc_features(smu, &features_enabled);
2502 if (ret)
2503 return ret;
2504
2505 features_to_disable =
2506 features_enabled & ~new_ppfeature_masks;
2507 features_to_enable =
2508 ~features_enabled & new_ppfeature_masks;
2509
2510 pr_debug("features_to_disable 0x%llx\n", features_to_disable);
2511 pr_debug("features_to_enable 0x%llx\n", features_to_enable);
2512
2513 if (features_to_disable) {
2514 ret = vega20_enable_smc_features(smu, false, features_to_disable);
2515 if (ret)
2516 return ret;
2517 }
2518
2519 if (features_to_enable) {
2520 ret = vega20_enable_smc_features(smu, true, features_to_enable);
2521 if (ret)
2522 return ret;
2523 }
2524
2525 return 0;
2526}
2527
2377static const struct pptable_funcs vega20_ppt_funcs = { 2528static const struct pptable_funcs vega20_ppt_funcs = {
2378 .alloc_dpm_context = vega20_allocate_dpm_context, 2529 .alloc_dpm_context = vega20_allocate_dpm_context,
2379 .store_powerplay_table = vega20_store_powerplay_table, 2530 .store_powerplay_table = vega20_store_powerplay_table,
@@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = {
2404 .unforce_dpm_levels = vega20_unforce_dpm_levels, 2555 .unforce_dpm_levels = vega20_unforce_dpm_levels,
2405 .upload_dpm_level = vega20_upload_dpm_level, 2556 .upload_dpm_level = vega20_upload_dpm_level,
2406 .get_profiling_clk_mask = vega20_get_profiling_clk_mask, 2557 .get_profiling_clk_mask = vega20_get_profiling_clk_mask,
2558 .set_ppfeature_status = vega20_set_ppfeature_status,
2559 .get_ppfeature_status = vega20_get_ppfeature_status,
2407}; 2560};
2408 2561
2409void vega20_set_ppt_funcs(struct smu_context *smu) 2562void vega20_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
index 5a0d2af63173..87f3a8303645 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
@@ -36,6 +36,50 @@
36#define AVFS_CURVE 0 36#define AVFS_CURVE 0
37#define OD8_HOTCURVE_TEMPERATURE 85 37#define OD8_HOTCURVE_TEMPERATURE 85
38 38
39#define SMU_FEATURES_LOW_MASK 0x00000000FFFFFFFF
40#define SMU_FEATURES_LOW_SHIFT 0
41#define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000
42#define SMU_FEATURES_HIGH_SHIFT 32
43
44enum {
45 GNLD_DPM_PREFETCHER = 0,
46 GNLD_DPM_GFXCLK,
47 GNLD_DPM_UCLK,
48 GNLD_DPM_SOCCLK,
49 GNLD_DPM_UVD,
50 GNLD_DPM_VCE,
51 GNLD_ULV,
52 GNLD_DPM_MP0CLK,
53 GNLD_DPM_LINK,
54 GNLD_DPM_DCEFCLK,
55 GNLD_DS_GFXCLK,
56 GNLD_DS_SOCCLK,
57 GNLD_DS_LCLK,
58 GNLD_PPT,
59 GNLD_TDC,
60 GNLD_THERMAL,
61 GNLD_GFX_PER_CU_CG,
62 GNLD_RM,
63 GNLD_DS_DCEFCLK,
64 GNLD_ACDC,
65 GNLD_VR0HOT,
66 GNLD_VR1HOT,
67 GNLD_FW_CTF,
68 GNLD_LED_DISPLAY,
69 GNLD_FAN_CONTROL,
70 GNLD_DIDT,
71 GNLD_GFXOFF,
72 GNLD_CG,
73 GNLD_DPM_FCLK,
74 GNLD_DS_FCLK,
75 GNLD_DS_MP1CLK,
76 GNLD_DS_MP0CLK,
77 GNLD_XGMI,
78 GNLD_ECC,
79
80 GNLD_FEATURES_MAX
81};
82
39struct vega20_dpm_level { 83struct vega20_dpm_level {
40 bool enabled; 84 bool enabled;
41 uint32_t value; 85 uint32_t value;