aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c117
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c282
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c140
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c276
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c351
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c382
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c141
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c702
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c1
78 files changed, 2605 insertions, 2152 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 138cb787d27e..f76bcb9c45e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
53 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ 53 amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
54 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ 54 amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
55 amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ 55 amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
56 amdgpu_gmc.o amdgpu_xgmi.o 56 amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o
57 57
58# add asic specific block 58# add asic specific block
59amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ 59amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -105,6 +105,7 @@ amdgpu-y += \
105# add GFX block 105# add GFX block
106amdgpu-y += \ 106amdgpu-y += \
107 amdgpu_gfx.o \ 107 amdgpu_gfx.o \
108 amdgpu_rlc.o \
108 gfx_v8_0.o \ 109 gfx_v8_0.o \
109 gfx_v9_0.o 110 gfx_v9_0.o
110 111
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d0102cfc8efb..42f882c633ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -75,6 +75,7 @@
75#include "amdgpu_sdma.h" 75#include "amdgpu_sdma.h"
76#include "amdgpu_dm.h" 76#include "amdgpu_dm.h"
77#include "amdgpu_virt.h" 77#include "amdgpu_virt.h"
78#include "amdgpu_csa.h"
78#include "amdgpu_gart.h" 79#include "amdgpu_gart.h"
79#include "amdgpu_debugfs.h" 80#include "amdgpu_debugfs.h"
80#include "amdgpu_job.h" 81#include "amdgpu_job.h"
@@ -151,6 +152,7 @@ extern int amdgpu_compute_multipipe;
151extern int amdgpu_gpu_recovery; 152extern int amdgpu_gpu_recovery;
152extern int amdgpu_emu_mode; 153extern int amdgpu_emu_mode;
153extern uint amdgpu_smu_memory_pool_size; 154extern uint amdgpu_smu_memory_pool_size;
155extern uint amdgpu_dc_feature_mask;
154extern struct amdgpu_mgpu_info mgpu_info; 156extern struct amdgpu_mgpu_info mgpu_info;
155 157
156#ifdef CONFIG_DRM_AMDGPU_SI 158#ifdef CONFIG_DRM_AMDGPU_SI
@@ -432,7 +434,7 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
432 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive 434 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
433 */ 435 */
434 436
435 /* sDMA engines reserved from 0xe0 -oxef */ 437 /* sDMA engines reserved from 0xe0 -0xef */
436 AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0, 438 AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
437 AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1, 439 AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
438 AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8, 440 AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
@@ -830,7 +832,6 @@ struct amdgpu_device {
830 bool need_dma32; 832 bool need_dma32;
831 bool need_swiotlb; 833 bool need_swiotlb;
832 bool accel_working; 834 bool accel_working;
833 struct work_struct reset_work;
834 struct notifier_block acpi_nb; 835 struct notifier_block acpi_nb;
835 struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; 836 struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
836 struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; 837 struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c31a8849e9f8..60f9a87e9c74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -144,7 +144,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
144 KGD_MAX_QUEUES); 144 KGD_MAX_QUEUES);
145 145
146 /* remove the KIQ bit as well */ 146 /* remove the KIQ bit as well */
147 if (adev->gfx.kiq.ring.ready) 147 if (adev->gfx.kiq.ring.sched.ready)
148 clear_bit(amdgpu_gfx_queue_to_bit(adev, 148 clear_bit(amdgpu_gfx_queue_to_bit(adev,
149 adev->gfx.kiq.ring.me - 1, 149 adev->gfx.kiq.ring.me - 1,
150 adev->gfx.kiq.ring.pipe, 150 adev->gfx.kiq.ring.pipe,
@@ -268,9 +268,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
268 amdgpu_device_gpu_recover(adev, NULL); 268 amdgpu_device_gpu_recover(adev, NULL);
269} 269}
270 270
271int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 271int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
272 void **mem_obj, uint64_t *gpu_addr, 272 void **mem_obj, uint64_t *gpu_addr,
273 void **cpu_ptr, bool mqd_gfx9) 273 void **cpu_ptr, bool mqd_gfx9)
274{ 274{
275 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 275 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
276 struct amdgpu_bo *bo = NULL; 276 struct amdgpu_bo *bo = NULL;
@@ -340,7 +340,7 @@ allocate_mem_reserve_bo_failed:
340 return r; 340 return r;
341} 341}
342 342
343void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 343void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
344{ 344{
345 struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; 345 struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
346 346
@@ -351,8 +351,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
351 amdgpu_bo_unref(&(bo)); 351 amdgpu_bo_unref(&(bo));
352} 352}
353 353
354void get_local_mem_info(struct kgd_dev *kgd, 354void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
355 struct kfd_local_mem_info *mem_info) 355 struct kfd_local_mem_info *mem_info)
356{ 356{
357 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 357 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
358 uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : 358 uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
@@ -383,7 +383,7 @@ void get_local_mem_info(struct kgd_dev *kgd,
383 mem_info->mem_clk_max = 100; 383 mem_info->mem_clk_max = 100;
384} 384}
385 385
386uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) 386uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
387{ 387{
388 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 388 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
389 389
@@ -392,7 +392,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
392 return 0; 392 return 0;
393} 393}
394 394
395uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 395uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
396{ 396{
397 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 397 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
398 398
@@ -405,7 +405,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
405 return 100; 405 return 100;
406} 406}
407 407
408void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) 408void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
409{ 409{
410 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 410 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
411 struct amdgpu_cu_info acu_info = adev->gfx.cu_info; 411 struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8e0d4f7196b4..bcf587b4ba98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -134,16 +134,16 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
134void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); 134void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
135 135
136/* Shared API */ 136/* Shared API */
137int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 137int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
138 void **mem_obj, uint64_t *gpu_addr, 138 void **mem_obj, uint64_t *gpu_addr,
139 void **cpu_ptr, bool mqd_gfx9); 139 void **cpu_ptr, bool mqd_gfx9);
140void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); 140void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
141void get_local_mem_info(struct kgd_dev *kgd, 141void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
142 struct kfd_local_mem_info *mem_info); 142 struct kfd_local_mem_info *mem_info);
143uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); 143uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
144 144
145uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 145uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
146void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); 146void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
147uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); 147uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
148uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); 148uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
149 149
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 244d9834a381..72a357dae070 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -173,13 +173,6 @@ static int get_tile_config(struct kgd_dev *kgd,
173} 173}
174 174
175static const struct kfd2kgd_calls kfd2kgd = { 175static const struct kfd2kgd_calls kfd2kgd = {
176 .init_gtt_mem_allocation = alloc_gtt_mem,
177 .free_gtt_mem = free_gtt_mem,
178 .get_local_mem_info = get_local_mem_info,
179 .get_gpu_clock_counter = get_gpu_clock_counter,
180 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
181 .alloc_pasid = amdgpu_pasid_alloc,
182 .free_pasid = amdgpu_pasid_free,
183 .program_sh_mem_settings = kgd_program_sh_mem_settings, 176 .program_sh_mem_settings = kgd_program_sh_mem_settings,
184 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 177 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
185 .init_interrupts = kgd_init_interrupts, 178 .init_interrupts = kgd_init_interrupts,
@@ -200,28 +193,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
200 .get_fw_version = get_fw_version, 193 .get_fw_version = get_fw_version,
201 .set_scratch_backing_va = set_scratch_backing_va, 194 .set_scratch_backing_va = set_scratch_backing_va,
202 .get_tile_config = get_tile_config, 195 .get_tile_config = get_tile_config,
203 .get_cu_info = get_cu_info,
204 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
205 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
206 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
207 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
208 .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
209 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
210 .set_vm_context_page_table_base = set_vm_context_page_table_base, 196 .set_vm_context_page_table_base = set_vm_context_page_table_base,
211 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
212 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
213 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
214 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
215 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
216 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
217 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
218 .invalidate_tlbs = invalidate_tlbs, 197 .invalidate_tlbs = invalidate_tlbs,
219 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 198 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
220 .submit_ib = amdgpu_amdkfd_submit_ib,
221 .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
222 .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, 199 .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
223 .gpu_recover = amdgpu_amdkfd_gpu_reset,
224 .set_compute_idle = amdgpu_amdkfd_set_compute_idle
225}; 200};
226 201
227struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 202struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 9f149914ad6c..0e2a56b6a9b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -128,13 +128,6 @@ static int get_tile_config(struct kgd_dev *kgd,
128} 128}
129 129
130static const struct kfd2kgd_calls kfd2kgd = { 130static const struct kfd2kgd_calls kfd2kgd = {
131 .init_gtt_mem_allocation = alloc_gtt_mem,
132 .free_gtt_mem = free_gtt_mem,
133 .get_local_mem_info = get_local_mem_info,
134 .get_gpu_clock_counter = get_gpu_clock_counter,
135 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
136 .alloc_pasid = amdgpu_pasid_alloc,
137 .free_pasid = amdgpu_pasid_free,
138 .program_sh_mem_settings = kgd_program_sh_mem_settings, 131 .program_sh_mem_settings = kgd_program_sh_mem_settings,
139 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 132 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
140 .init_interrupts = kgd_init_interrupts, 133 .init_interrupts = kgd_init_interrupts,
@@ -157,27 +150,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
157 .get_fw_version = get_fw_version, 150 .get_fw_version = get_fw_version,
158 .set_scratch_backing_va = set_scratch_backing_va, 151 .set_scratch_backing_va = set_scratch_backing_va,
159 .get_tile_config = get_tile_config, 152 .get_tile_config = get_tile_config,
160 .get_cu_info = get_cu_info,
161 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
162 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
163 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
164 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
165 .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
166 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
167 .set_vm_context_page_table_base = set_vm_context_page_table_base, 153 .set_vm_context_page_table_base = set_vm_context_page_table_base,
168 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
169 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
170 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
171 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
172 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
173 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
174 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
175 .invalidate_tlbs = invalidate_tlbs, 154 .invalidate_tlbs = invalidate_tlbs,
176 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 155 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
177 .submit_ib = amdgpu_amdkfd_submit_ib,
178 .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
179 .gpu_recover = amdgpu_amdkfd_gpu_reset,
180 .set_compute_idle = amdgpu_amdkfd_set_compute_idle
181}; 156};
182 157
183struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 158struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 42cb4c4e0929..03b604c96d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -46,38 +46,9 @@
46#include "v9_structs.h" 46#include "v9_structs.h"
47#include "soc15.h" 47#include "soc15.h"
48#include "soc15d.h" 48#include "soc15d.h"
49#include "mmhub_v1_0.h"
50#include "gfxhub_v1_0.h"
49 51
50/* HACK: MMHUB and GC both have VM-related register with the same
51 * names but different offsets. Define the MMHUB register we need here
52 * with a prefix. A proper solution would be to move the functions
53 * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
54 * respectively.
55 */
56#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
57#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
58
59#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
60#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
61
62#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
63#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
64#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
65#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
66
67#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
68#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
69#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
70#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
71
72#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
73#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
74#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
75#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
76
77#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
78#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
79#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
80#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
81 52
82#define V9_PIPE_PER_MEC (4) 53#define V9_PIPE_PER_MEC (4)
83#define V9_QUEUES_PER_PIPE_MEC (8) 54#define V9_QUEUES_PER_PIPE_MEC (8)
@@ -167,13 +138,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
167} 138}
168 139
169static const struct kfd2kgd_calls kfd2kgd = { 140static const struct kfd2kgd_calls kfd2kgd = {
170 .init_gtt_mem_allocation = alloc_gtt_mem,
171 .free_gtt_mem = free_gtt_mem,
172 .get_local_mem_info = get_local_mem_info,
173 .get_gpu_clock_counter = get_gpu_clock_counter,
174 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
175 .alloc_pasid = amdgpu_pasid_alloc,
176 .free_pasid = amdgpu_pasid_free,
177 .program_sh_mem_settings = kgd_program_sh_mem_settings, 141 .program_sh_mem_settings = kgd_program_sh_mem_settings,
178 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 142 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
179 .init_interrupts = kgd_init_interrupts, 143 .init_interrupts = kgd_init_interrupts,
@@ -196,26 +160,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
196 .get_fw_version = get_fw_version, 160 .get_fw_version = get_fw_version,
197 .set_scratch_backing_va = set_scratch_backing_va, 161 .set_scratch_backing_va = set_scratch_backing_va,
198 .get_tile_config = amdgpu_amdkfd_get_tile_config, 162 .get_tile_config = amdgpu_amdkfd_get_tile_config,
199 .get_cu_info = get_cu_info,
200 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
201 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
202 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
203 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
204 .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
205 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
206 .set_vm_context_page_table_base = set_vm_context_page_table_base, 163 .set_vm_context_page_table_base = set_vm_context_page_table_base,
207 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
208 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
209 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
210 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
211 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
212 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
213 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
214 .invalidate_tlbs = invalidate_tlbs, 164 .invalidate_tlbs = invalidate_tlbs,
215 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 165 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
216 .submit_ib = amdgpu_amdkfd_submit_ib,
217 .gpu_recover = amdgpu_amdkfd_gpu_reset,
218 .set_compute_idle = amdgpu_amdkfd_set_compute_idle,
219 .get_hive_id = amdgpu_amdkfd_get_hive_id, 166 .get_hive_id = amdgpu_amdkfd_get_hive_id,
220}; 167};
221 168
@@ -785,15 +732,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
785static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) 732static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
786{ 733{
787 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 734 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
788 uint32_t req = (1 << vmid) |
789 (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
790 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
791 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
792 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
793 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
794 VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
795
796 mutex_lock(&adev->srbm_mutex);
797 735
798 /* Use legacy mode tlb invalidation. 736 /* Use legacy mode tlb invalidation.
799 * 737 *
@@ -810,34 +748,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
810 * TODO 2: support range-based invalidation, requires kfg2kgd 748 * TODO 2: support range-based invalidation, requires kfg2kgd
811 * interface change 749 * interface change
812 */ 750 */
813 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 751 amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
814 0xffffffff);
815 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
816 0x0000001f);
817
818 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
819 mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
820 0xffffffff);
821 WREG32(SOC15_REG_OFFSET(MMHUB, 0,
822 mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
823 0x0000001f);
824
825 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
826
827 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
828 req);
829
830 while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
831 (1 << vmid)))
832 cpu_relax();
833
834 while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
835 mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
836 (1 << vmid)))
837 cpu_relax();
838
839 mutex_unlock(&adev->srbm_mutex);
840
841} 752}
842 753
843static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) 754static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@@ -876,7 +787,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
876 if (adev->in_gpu_reset) 787 if (adev->in_gpu_reset)
877 return -EIO; 788 return -EIO;
878 789
879 if (ring->ready) 790 if (ring->sched.ready)
880 return invalidate_tlbs_with_kiq(adev, pasid); 791 return invalidate_tlbs_with_kiq(adev, pasid);
881 792
882 for (vmid = 0; vmid < 16; vmid++) { 793 for (vmid = 0; vmid < 16; vmid++) {
@@ -1016,7 +927,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
1016 uint64_t page_table_base) 927 uint64_t page_table_base)
1017{ 928{
1018 struct amdgpu_device *adev = get_amdgpu_device(kgd); 929 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1019 uint64_t base = page_table_base | AMDGPU_PTE_VALID;
1020 930
1021 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { 931 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
1022 pr_err("trying to set page table base for wrong VMID %u\n", 932 pr_err("trying to set page table base for wrong VMID %u\n",
@@ -1028,25 +938,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
1028 * now, all processes share the same address space size, like 938 * now, all processes share the same address space size, like
1029 * on GFX8 and older. 939 * on GFX8 and older.
1030 */ 940 */
1031 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); 941 mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
1032 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1033
1034 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1035 lower_32_bits(adev->vm_manager.max_pfn - 1));
1036 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1037 upper_32_bits(adev->vm_manager.max_pfn - 1));
1038
1039 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
1040 WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1041
1042 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
1043 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
1044
1045 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
1046 lower_32_bits(adev->vm_manager.max_pfn - 1));
1047 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
1048 upper_32_bits(adev->vm_manager.max_pfn - 1));
1049 942
1050 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); 943 gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
1051 WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
1052} 944}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 8816c697b205..ceadeeadfa56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -330,7 +330,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
330 case CHIP_TOPAZ: 330 case CHIP_TOPAZ:
331 if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || 331 if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
332 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || 332 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
333 ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) { 333 ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) ||
334 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
335 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
334 info->is_kicker = true; 336 info->is_kicker = true;
335 strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); 337 strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
336 } else 338 } else
@@ -351,7 +353,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
351 if (type == CGS_UCODE_ID_SMU) { 353 if (type == CGS_UCODE_ID_SMU) {
352 if (((adev->pdev->device == 0x67ef) && 354 if (((adev->pdev->device == 0x67ef) &&
353 ((adev->pdev->revision == 0xe0) || 355 ((adev->pdev->revision == 0xe0) ||
354 (adev->pdev->revision == 0xe2) ||
355 (adev->pdev->revision == 0xe5))) || 356 (adev->pdev->revision == 0xe5))) ||
356 ((adev->pdev->device == 0x67ff) && 357 ((adev->pdev->device == 0x67ff) &&
357 ((adev->pdev->revision == 0xcf) || 358 ((adev->pdev->revision == 0xcf) ||
@@ -359,8 +360,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
359 (adev->pdev->revision == 0xff)))) { 360 (adev->pdev->revision == 0xff)))) {
360 info->is_kicker = true; 361 info->is_kicker = true;
361 strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); 362 strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
362 } else 363 } else if ((adev->pdev->device == 0x67ef) &&
364 (adev->pdev->revision == 0xe2)) {
365 info->is_kicker = true;
366 strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
367 } else {
363 strcpy(fw_name, "amdgpu/polaris11_smc.bin"); 368 strcpy(fw_name, "amdgpu/polaris11_smc.bin");
369 }
364 } else if (type == CGS_UCODE_ID_SMU_SK) { 370 } else if (type == CGS_UCODE_ID_SMU_SK) {
365 strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); 371 strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
366 } 372 }
@@ -378,14 +384,31 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
378 (adev->pdev->revision == 0xef))) { 384 (adev->pdev->revision == 0xef))) {
379 info->is_kicker = true; 385 info->is_kicker = true;
380 strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); 386 strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
381 } else 387 } else if ((adev->pdev->device == 0x67df) &&
388 ((adev->pdev->revision == 0xe1) ||
389 (adev->pdev->revision == 0xf7))) {
390 info->is_kicker = true;
391 strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
392 } else {
382 strcpy(fw_name, "amdgpu/polaris10_smc.bin"); 393 strcpy(fw_name, "amdgpu/polaris10_smc.bin");
394 }
383 } else if (type == CGS_UCODE_ID_SMU_SK) { 395 } else if (type == CGS_UCODE_ID_SMU_SK) {
384 strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); 396 strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
385 } 397 }
386 break; 398 break;
387 case CHIP_POLARIS12: 399 case CHIP_POLARIS12:
388 strcpy(fw_name, "amdgpu/polaris12_smc.bin"); 400 if (((adev->pdev->device == 0x6987) &&
401 ((adev->pdev->revision == 0xc0) ||
402 (adev->pdev->revision == 0xc3))) ||
403 ((adev->pdev->device == 0x6981) &&
404 ((adev->pdev->revision == 0x00) ||
405 (adev->pdev->revision == 0x01) ||
406 (adev->pdev->revision == 0x10)))) {
407 info->is_kicker = true;
408 strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
409 } else {
410 strcpy(fw_name, "amdgpu/polaris12_smc.bin");
411 }
389 break; 412 break;
390 case CHIP_VEGAM: 413 case CHIP_VEGAM:
391 strcpy(fw_name, "amdgpu/vegam_smc.bin"); 414 strcpy(fw_name, "amdgpu/vegam_smc.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 35bc8fc3bc70..024dfbd87f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1260,8 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1260 return 0; 1260 return 0;
1261 1261
1262error_abort: 1262error_abort:
1263 dma_fence_put(&job->base.s_fence->finished); 1263 drm_sched_job_cleanup(&job->base);
1264 job->base.s_fence = NULL;
1265 amdgpu_mn_unlock(p->mn); 1264 amdgpu_mn_unlock(p->mn);
1266 1265
1267error_unlock: 1266error_unlock:
@@ -1285,7 +1284,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1285 1284
1286 r = amdgpu_cs_parser_init(&parser, data); 1285 r = amdgpu_cs_parser_init(&parser, data);
1287 if (r) { 1286 if (r) {
1288 DRM_ERROR("Failed to initialize parser !\n"); 1287 DRM_ERROR("Failed to initialize parser %d!\n", r);
1289 goto out; 1288 goto out;
1290 } 1289 }
1291 1290
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
new file mode 100644
index 000000000000..0c590ddf250a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21
22 * * Author: Monk.liu@amd.com
23 */
24
25#include "amdgpu.h"
26
27uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
28{
29 uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
30
31 addr -= AMDGPU_VA_RESERVED_SIZE;
32 addr = amdgpu_gmc_sign_extend(addr);
33
34 return addr;
35}
36
37int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
38 u32 domain, uint32_t size)
39{
40 int r;
41 void *ptr;
42
43 r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
44 domain, bo,
45 NULL, &ptr);
46 if (!bo)
47 return -ENOMEM;
48
49 memset(ptr, 0, size);
50 return 0;
51}
52
53void amdgpu_free_static_csa(struct amdgpu_bo **bo)
54{
55 amdgpu_bo_free_kernel(bo, NULL, NULL);
56}
57
58/*
59 * amdgpu_map_static_csa should be called during amdgpu_vm_init
60 * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
61 * submission of GFX should use this virtual address within META_DATA init
62 * package to support SRIOV gfx preemption.
63 */
64int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
65 struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
66 uint64_t csa_addr, uint32_t size)
67{
68 struct ww_acquire_ctx ticket;
69 struct list_head list;
70 struct amdgpu_bo_list_entry pd;
71 struct ttm_validate_buffer csa_tv;
72 int r;
73
74 INIT_LIST_HEAD(&list);
75 INIT_LIST_HEAD(&csa_tv.head);
76 csa_tv.bo = &bo->tbo;
77 csa_tv.shared = true;
78
79 list_add(&csa_tv.head, &list);
80 amdgpu_vm_get_pd_bo(vm, &list, &pd);
81
82 r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
83 if (r) {
84 DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
85 return r;
86 }
87
88 *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
89 if (!*bo_va) {
90 ttm_eu_backoff_reservation(&ticket, &list);
91 DRM_ERROR("failed to create bo_va for static CSA\n");
92 return -ENOMEM;
93 }
94
95 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
96 size);
97 if (r) {
98 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
99 amdgpu_vm_bo_rmv(adev, *bo_va);
100 ttm_eu_backoff_reservation(&ticket, &list);
101 return r;
102 }
103
104 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
105 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
106 AMDGPU_PTE_EXECUTABLE);
107
108 if (r) {
109 DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
110 amdgpu_vm_bo_rmv(adev, *bo_va);
111 ttm_eu_backoff_reservation(&ticket, &list);
112 return r;
113 }
114
115 ttm_eu_backoff_reservation(&ticket, &list);
116 return 0;
117}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
new file mode 100644
index 000000000000..524b4437a021
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Author: Monk.liu@amd.com
23 */
24
25#ifndef AMDGPU_CSA_MANAGER_H
26#define AMDGPU_CSA_MANAGER_H
27
28#define AMDGPU_CSA_SIZE (128 * 1024)
29
30uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
31uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
32int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
33 u32 domain, uint32_t size);
34int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
35 struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
36 uint64_t csa_addr, uint32_t size);
37void amdgpu_free_static_csa(struct amdgpu_bo **bo);
38
39#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 30bc345d6fdf..590588a82471 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1656,7 +1656,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1656 1656
1657 /* right after GMC hw init, we create CSA */ 1657 /* right after GMC hw init, we create CSA */
1658 if (amdgpu_sriov_vf(adev)) { 1658 if (amdgpu_sriov_vf(adev)) {
1659 r = amdgpu_allocate_static_csa(adev); 1659 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1660 AMDGPU_GEM_DOMAIN_VRAM,
1661 AMDGPU_CSA_SIZE);
1660 if (r) { 1662 if (r) {
1661 DRM_ERROR("allocate CSA failed %d\n", r); 1663 DRM_ERROR("allocate CSA failed %d\n", r);
1662 return r; 1664 return r;
@@ -1681,7 +1683,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1681 if (r) 1683 if (r)
1682 return r; 1684 return r;
1683 1685
1684 amdgpu_xgmi_add_device(adev); 1686 if (adev->gmc.xgmi.num_physical_nodes > 1)
1687 amdgpu_xgmi_add_device(adev);
1685 amdgpu_amdkfd_device_init(adev); 1688 amdgpu_amdkfd_device_init(adev);
1686 1689
1687 if (amdgpu_sriov_vf(adev)) 1690 if (amdgpu_sriov_vf(adev))
@@ -1890,7 +1893,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1890 1893
1891 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1894 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1892 amdgpu_ucode_free_bo(adev); 1895 amdgpu_ucode_free_bo(adev);
1893 amdgpu_free_static_csa(adev); 1896 amdgpu_free_static_csa(&adev->virt.csa_obj);
1894 amdgpu_device_wb_fini(adev); 1897 amdgpu_device_wb_fini(adev);
1895 amdgpu_device_vram_scratch_fini(adev); 1898 amdgpu_device_vram_scratch_fini(adev);
1896 } 1899 }
@@ -3295,13 +3298,35 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3295 return false; 3298 return false;
3296 } 3299 }
3297 3300
3298 if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && 3301 if (amdgpu_gpu_recovery == 0)
3299 !amdgpu_sriov_vf(adev))) { 3302 goto disabled;
3300 DRM_INFO("GPU recovery disabled.\n"); 3303
3301 return false; 3304 if (amdgpu_sriov_vf(adev))
3305 return true;
3306
3307 if (amdgpu_gpu_recovery == -1) {
3308 switch (adev->asic_type) {
3309 case CHIP_TOPAZ:
3310 case CHIP_TONGA:
3311 case CHIP_FIJI:
3312 case CHIP_POLARIS10:
3313 case CHIP_POLARIS11:
3314 case CHIP_POLARIS12:
3315 case CHIP_VEGAM:
3316 case CHIP_VEGA20:
3317 case CHIP_VEGA10:
3318 case CHIP_VEGA12:
3319 break;
3320 default:
3321 goto disabled;
3322 }
3302 } 3323 }
3303 3324
3304 return true; 3325 return true;
3326
3327disabled:
3328 DRM_INFO("GPU recovery disabled.\n");
3329 return false;
3305} 3330}
3306 3331
3307/** 3332/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 943dbf3c5da1..8de55f7f1a3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -127,6 +127,9 @@ int amdgpu_compute_multipipe = -1;
127int amdgpu_gpu_recovery = -1; /* auto */ 127int amdgpu_gpu_recovery = -1; /* auto */
128int amdgpu_emu_mode = 0; 128int amdgpu_emu_mode = 0;
129uint amdgpu_smu_memory_pool_size = 0; 129uint amdgpu_smu_memory_pool_size = 0;
130/* FBC (bit 0) disabled by default*/
131uint amdgpu_dc_feature_mask = 0;
132
130struct amdgpu_mgpu_info mgpu_info = { 133struct amdgpu_mgpu_info mgpu_info = {
131 .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), 134 .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
132}; 135};
@@ -631,6 +634,14 @@ module_param(halt_if_hws_hang, int, 0644);
631MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); 634MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
632#endif 635#endif
633 636
637/**
638 * DOC: dcfeaturemask (uint)
639 * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
640 * The default is the current set of stable display features.
641 */
642MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
643module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
644
634static const struct pci_device_id pciidlist[] = { 645static const struct pci_device_id pciidlist[] = {
635#ifdef CONFIG_DRM_AMDGPU_SI 646#ifdef CONFIG_DRM_AMDGPU_SI
636 {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, 647 {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5448cf27654e..ee47c11e92ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
398 ring->fence_drv.irq_type = irq_type; 398 ring->fence_drv.irq_type = irq_type;
399 ring->fence_drv.initialized = true; 399 ring->fence_drv.initialized = true;
400 400
401 dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " 401 DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
402 "cpu addr 0x%p\n", ring->idx, 402 "0x%016llx, cpu addr 0x%p\n", ring->name,
403 ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); 403 ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
404 return 0; 404 return 0;
405} 405}
406 406
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 11fea28f8ad3..6d11e1721147 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
248 } 248 }
249 mb(); 249 mb();
250 amdgpu_asic_flush_hdp(adev, NULL); 250 amdgpu_asic_flush_hdp(adev, NULL);
251 amdgpu_gmc_flush_gpu_tlb(adev, 0); 251 amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
252 return 0; 252 return 0;
253} 253}
254 254
@@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
259 * @offset: offset into the GPU's gart aperture 259 * @offset: offset into the GPU's gart aperture
260 * @pages: number of pages to bind 260 * @pages: number of pages to bind
261 * @dma_addr: DMA addresses of pages 261 * @dma_addr: DMA addresses of pages
262 * @flags: page table entry flags
263 * @dst: CPU address of the gart table
262 * 264 *
263 * Map the dma_addresses into GART entries (all asics). 265 * Map the dma_addresses into GART entries (all asics).
264 * Returns 0 for success, -EINVAL for failure. 266 * Returns 0 for success, -EINVAL for failure.
@@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
331 333
332 mb(); 334 mb();
333 amdgpu_asic_flush_hdp(adev, NULL); 335 amdgpu_asic_flush_hdp(adev, NULL);
334 amdgpu_gmc_flush_gpu_tlb(adev, 0); 336 amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
335 return 0; 337 return 0;
336} 338}
337 339
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 9ff62887e4e3..afa2e2877d87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -41,6 +41,7 @@ struct amdgpu_bo;
41 41
42struct amdgpu_gart { 42struct amdgpu_gart {
43 struct amdgpu_bo *bo; 43 struct amdgpu_bo *bo;
44 /* CPU kmapped address of gart table */
44 void *ptr; 45 void *ptr;
45 unsigned num_gpu_pages; 46 unsigned num_gpu_pages;
46 unsigned num_cpu_pages; 47 unsigned num_cpu_pages;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1a656b8657f7..6a70c0b7105f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -25,6 +25,7 @@
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_gfx.h" 27#include "amdgpu_gfx.h"
28#include "amdgpu_rlc.h"
28 29
29/* delay 0.1 second to enable gfx off feature */ 30/* delay 0.1 second to enable gfx off feature */
30#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) 31#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index b61b5c11aead..f790e15bcd08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -29,6 +29,7 @@
29 */ 29 */
30#include "clearstate_defs.h" 30#include "clearstate_defs.h"
31#include "amdgpu_ring.h" 31#include "amdgpu_ring.h"
32#include "amdgpu_rlc.h"
32 33
33/* GFX current status */ 34/* GFX current status */
34#define AMDGPU_GFX_NORMAL_MODE 0x00000000L 35#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
@@ -37,59 +38,6 @@
37#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L 38#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
38#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L 39#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
39 40
40
41struct amdgpu_rlc_funcs {
42 void (*enter_safe_mode)(struct amdgpu_device *adev);
43 void (*exit_safe_mode)(struct amdgpu_device *adev);
44};
45
46struct amdgpu_rlc {
47 /* for power gating */
48 struct amdgpu_bo *save_restore_obj;
49 uint64_t save_restore_gpu_addr;
50 volatile uint32_t *sr_ptr;
51 const u32 *reg_list;
52 u32 reg_list_size;
53 /* for clear state */
54 struct amdgpu_bo *clear_state_obj;
55 uint64_t clear_state_gpu_addr;
56 volatile uint32_t *cs_ptr;
57 const struct cs_section_def *cs_data;
58 u32 clear_state_size;
59 /* for cp tables */
60 struct amdgpu_bo *cp_table_obj;
61 uint64_t cp_table_gpu_addr;
62 volatile uint32_t *cp_table_ptr;
63 u32 cp_table_size;
64
65 /* safe mode for updating CG/PG state */
66 bool in_safe_mode;
67 const struct amdgpu_rlc_funcs *funcs;
68
69 /* for firmware data */
70 u32 save_and_restore_offset;
71 u32 clear_state_descriptor_offset;
72 u32 avail_scratch_ram_locations;
73 u32 reg_restore_list_size;
74 u32 reg_list_format_start;
75 u32 reg_list_format_separate_start;
76 u32 starting_offsets_start;
77 u32 reg_list_format_size_bytes;
78 u32 reg_list_size_bytes;
79 u32 reg_list_format_direct_reg_list_length;
80 u32 save_restore_list_cntl_size_bytes;
81 u32 save_restore_list_gpm_size_bytes;
82 u32 save_restore_list_srm_size_bytes;
83
84 u32 *register_list_format;
85 u32 *register_restore;
86 u8 *save_restore_list_cntl;
87 u8 *save_restore_list_gpm;
88 u8 *save_restore_list_srm;
89
90 bool is_rlc_v2_1;
91};
92
93#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES 41#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
94 42
95struct amdgpu_mec { 43struct amdgpu_mec {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6fa7ef446e46..8c57924c075f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -64,7 +64,7 @@ struct amdgpu_vmhub {
64struct amdgpu_gmc_funcs { 64struct amdgpu_gmc_funcs {
65 /* flush the vm tlb via mmio */ 65 /* flush the vm tlb via mmio */
66 void (*flush_gpu_tlb)(struct amdgpu_device *adev, 66 void (*flush_gpu_tlb)(struct amdgpu_device *adev,
67 uint32_t vmid); 67 uint32_t vmid, uint32_t flush_type);
68 /* flush the vm tlb via ring */ 68 /* flush the vm tlb via ring */
69 uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, 69 uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
70 uint64_t pd_addr); 70 uint64_t pd_addr);
@@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs {
89 89
90struct amdgpu_xgmi { 90struct amdgpu_xgmi {
91 /* from psp */ 91 /* from psp */
92 u64 device_id; 92 u64 node_id;
93 u64 hive_id; 93 u64 hive_id;
94 /* fixed per family */ 94 /* fixed per family */
95 u64 node_segment_size; 95 u64 node_segment_size;
@@ -151,7 +151,7 @@ struct amdgpu_gmc {
151 struct amdgpu_xgmi xgmi; 151 struct amdgpu_xgmi xgmi;
152}; 152};
153 153
154#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) 154#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
155#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) 155#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
156#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) 156#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
157#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) 157#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b8963b725dfa..c48207b377bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
146 fence_ctx = 0; 146 fence_ctx = 0;
147 } 147 }
148 148
149 if (!ring->ready) { 149 if (!ring->sched.ready) {
150 dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); 150 dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
151 return -EINVAL; 151 return -EINVAL;
152 } 152 }
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
221 !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ 221 !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
222 continue; 222 continue;
223 223
224 amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, 224 amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
225 need_ctx_switch);
226 need_ctx_switch = false; 225 need_ctx_switch = false;
227 } 226 }
228 227
@@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
347 tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; 346 tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
348 } 347 }
349 348
350 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 349 for (i = 0; i < adev->num_rings; ++i) {
351 struct amdgpu_ring *ring = adev->rings[i]; 350 struct amdgpu_ring *ring = adev->rings[i];
352 long tmo; 351 long tmo;
353 352
354 if (!ring || !ring->ready) 353 /* KIQ rings don't have an IB test because we never submit IBs
355 continue; 354 * to them and they have no interrupt support.
356
357 /* skip IB tests for KIQ in general for the below reasons:
358 * 1. We never submit IBs to the KIQ
359 * 2. KIQ doesn't use the EOP interrupts,
360 * we use some other CP interrupt.
361 */ 355 */
362 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 356 if (!ring->sched.ready || !ring->funcs->test_ib)
363 continue; 357 continue;
364 358
365 /* MM engine need more time */ 359 /* MM engine need more time */
@@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
374 tmo = tmo_gfx; 368 tmo = tmo_gfx;
375 369
376 r = amdgpu_ring_test_ib(ring, tmo); 370 r = amdgpu_ring_test_ib(ring, tmo);
377 if (r) { 371 if (!r) {
378 ring->ready = false; 372 DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
379 373 ring->name);
380 if (ring == &adev->gfx.gfx_ring[0]) { 374 continue;
381 /* oh, oh, that's really bad */ 375 }
382 DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); 376
383 adev->accel_working = false; 377 ring->sched.ready = false;
384 return r; 378 DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
385 379 ring->name, r);
386 } else { 380
387 /* still not good, but we can live with it */ 381 if (ring == &adev->gfx.gfx_ring[0]) {
388 DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); 382 /* oh, oh, that's really bad */
389 ret = r; 383 adev->accel_working = false;
390 } 384 return r;
385
386 } else {
387 ret = r;
391 } 388 }
392 } 389 }
393 return ret; 390 return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 52c17f6219a7..6b6524f04ce0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -94,23 +94,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
94} 94}
95 95
96/** 96/**
97 * amdgpu_irq_reset_work_func - execute GPU reset
98 *
99 * @work: work struct pointer
100 *
101 * Execute scheduled GPU reset (Cayman+).
102 * This function is called when the IRQ handler thinks we need a GPU reset.
103 */
104static void amdgpu_irq_reset_work_func(struct work_struct *work)
105{
106 struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
107 reset_work);
108
109 if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
110 amdgpu_device_gpu_recover(adev, NULL);
111}
112
113/**
114 * amdgpu_irq_disable_all - disable *all* interrupts 97 * amdgpu_irq_disable_all - disable *all* interrupts
115 * 98 *
116 * @adev: amdgpu device pointer 99 * @adev: amdgpu device pointer
@@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
262 amdgpu_hotplug_work_func); 245 amdgpu_hotplug_work_func);
263 } 246 }
264 247
265 INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
266
267 adev->irq.installed = true; 248 adev->irq.installed = true;
268 r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); 249 r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
269 if (r) { 250 if (r) {
270 adev->irq.installed = false; 251 adev->irq.installed = false;
271 if (!amdgpu_device_has_dc_support(adev)) 252 if (!amdgpu_device_has_dc_support(adev))
272 flush_work(&adev->hotplug_work); 253 flush_work(&adev->hotplug_work);
273 cancel_work_sync(&adev->reset_work);
274 return r; 254 return r;
275 } 255 }
276 adev->ddev->max_vblank_count = 0x00ffffff; 256 adev->ddev->max_vblank_count = 0x00ffffff;
@@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
299 pci_disable_msi(adev->pdev); 279 pci_disable_msi(adev->pdev);
300 if (!amdgpu_device_has_dc_support(adev)) 280 if (!amdgpu_device_has_dc_support(adev))
301 flush_work(&adev->hotplug_work); 281 flush_work(&adev->hotplug_work);
302 cancel_work_sync(&adev->reset_work);
303 } 282 }
304 283
305 for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { 284 for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 755f733bf0d9..e0af44fd6a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
112 struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); 112 struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
113 struct amdgpu_job *job = to_amdgpu_job(s_job); 113 struct amdgpu_job *job = to_amdgpu_job(s_job);
114 114
115 drm_sched_job_cleanup(s_job);
116
115 amdgpu_ring_priority_put(ring, s_job->s_priority); 117 amdgpu_ring_priority_put(ring, s_job->s_priority);
116 dma_fence_put(job->fence); 118 dma_fence_put(job->fence);
117 amdgpu_sync_free(&job->sync); 119 amdgpu_sync_free(&job->sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 57cfe78a262b..e1b46a6703de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -33,6 +33,8 @@
33#define to_amdgpu_job(sched_job) \ 33#define to_amdgpu_job(sched_job) \
34 container_of((sched_job), struct amdgpu_job, base) 34 container_of((sched_job), struct amdgpu_job, base)
35 35
36#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
37
36struct amdgpu_fence; 38struct amdgpu_fence;
37 39
38struct amdgpu_job { 40struct amdgpu_job {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 81732a84c2ab..9b3164c0f861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
336 case AMDGPU_HW_IP_GFX: 336 case AMDGPU_HW_IP_GFX:
337 type = AMD_IP_BLOCK_TYPE_GFX; 337 type = AMD_IP_BLOCK_TYPE_GFX;
338 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 338 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
339 if (adev->gfx.gfx_ring[i].ready) 339 if (adev->gfx.gfx_ring[i].sched.ready)
340 ++num_rings; 340 ++num_rings;
341 ib_start_alignment = 32; 341 ib_start_alignment = 32;
342 ib_size_alignment = 32; 342 ib_size_alignment = 32;
@@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
344 case AMDGPU_HW_IP_COMPUTE: 344 case AMDGPU_HW_IP_COMPUTE:
345 type = AMD_IP_BLOCK_TYPE_GFX; 345 type = AMD_IP_BLOCK_TYPE_GFX;
346 for (i = 0; i < adev->gfx.num_compute_rings; i++) 346 for (i = 0; i < adev->gfx.num_compute_rings; i++)
347 if (adev->gfx.compute_ring[i].ready) 347 if (adev->gfx.compute_ring[i].sched.ready)
348 ++num_rings; 348 ++num_rings;
349 ib_start_alignment = 32; 349 ib_start_alignment = 32;
350 ib_size_alignment = 32; 350 ib_size_alignment = 32;
@@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
352 case AMDGPU_HW_IP_DMA: 352 case AMDGPU_HW_IP_DMA:
353 type = AMD_IP_BLOCK_TYPE_SDMA; 353 type = AMD_IP_BLOCK_TYPE_SDMA;
354 for (i = 0; i < adev->sdma.num_instances; i++) 354 for (i = 0; i < adev->sdma.num_instances; i++)
355 if (adev->sdma.instance[i].ring.ready) 355 if (adev->sdma.instance[i].ring.sched.ready)
356 ++num_rings; 356 ++num_rings;
357 ib_start_alignment = 256; 357 ib_start_alignment = 256;
358 ib_size_alignment = 4; 358 ib_size_alignment = 4;
@@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
363 if (adev->uvd.harvest_config & (1 << i)) 363 if (adev->uvd.harvest_config & (1 << i))
364 continue; 364 continue;
365 365
366 if (adev->uvd.inst[i].ring.ready) 366 if (adev->uvd.inst[i].ring.sched.ready)
367 ++num_rings; 367 ++num_rings;
368 } 368 }
369 ib_start_alignment = 64; 369 ib_start_alignment = 64;
@@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
372 case AMDGPU_HW_IP_VCE: 372 case AMDGPU_HW_IP_VCE:
373 type = AMD_IP_BLOCK_TYPE_VCE; 373 type = AMD_IP_BLOCK_TYPE_VCE;
374 for (i = 0; i < adev->vce.num_rings; i++) 374 for (i = 0; i < adev->vce.num_rings; i++)
375 if (adev->vce.ring[i].ready) 375 if (adev->vce.ring[i].sched.ready)
376 ++num_rings; 376 ++num_rings;
377 ib_start_alignment = 4; 377 ib_start_alignment = 4;
378 ib_size_alignment = 1; 378 ib_size_alignment = 1;
@@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
384 continue; 384 continue;
385 385
386 for (j = 0; j < adev->uvd.num_enc_rings; j++) 386 for (j = 0; j < adev->uvd.num_enc_rings; j++)
387 if (adev->uvd.inst[i].ring_enc[j].ready) 387 if (adev->uvd.inst[i].ring_enc[j].sched.ready)
388 ++num_rings; 388 ++num_rings;
389 } 389 }
390 ib_start_alignment = 64; 390 ib_start_alignment = 64;
@@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
392 break; 392 break;
393 case AMDGPU_HW_IP_VCN_DEC: 393 case AMDGPU_HW_IP_VCN_DEC:
394 type = AMD_IP_BLOCK_TYPE_VCN; 394 type = AMD_IP_BLOCK_TYPE_VCN;
395 if (adev->vcn.ring_dec.ready) 395 if (adev->vcn.ring_dec.sched.ready)
396 ++num_rings; 396 ++num_rings;
397 ib_start_alignment = 16; 397 ib_start_alignment = 16;
398 ib_size_alignment = 16; 398 ib_size_alignment = 16;
@@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
400 case AMDGPU_HW_IP_VCN_ENC: 400 case AMDGPU_HW_IP_VCN_ENC:
401 type = AMD_IP_BLOCK_TYPE_VCN; 401 type = AMD_IP_BLOCK_TYPE_VCN;
402 for (i = 0; i < adev->vcn.num_enc_rings; i++) 402 for (i = 0; i < adev->vcn.num_enc_rings; i++)
403 if (adev->vcn.ring_enc[i].ready) 403 if (adev->vcn.ring_enc[i].sched.ready)
404 ++num_rings; 404 ++num_rings;
405 ib_start_alignment = 64; 405 ib_start_alignment = 64;
406 ib_size_alignment = 1; 406 ib_size_alignment = 1;
407 break; 407 break;
408 case AMDGPU_HW_IP_VCN_JPEG: 408 case AMDGPU_HW_IP_VCN_JPEG:
409 type = AMD_IP_BLOCK_TYPE_VCN; 409 type = AMD_IP_BLOCK_TYPE_VCN;
410 if (adev->vcn.ring_jpeg.ready) 410 if (adev->vcn.ring_jpeg.sched.ready)
411 ++num_rings; 411 ++num_rings;
412 ib_start_alignment = 16; 412 ib_start_alignment = 16;
413 ib_size_alignment = 16; 413 ib_size_alignment = 16;
@@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
978 } 978 }
979 979
980 if (amdgpu_sriov_vf(adev)) { 980 if (amdgpu_sriov_vf(adev)) {
981 r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); 981 uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
982
983 r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
984 &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
982 if (r) 985 if (r)
983 goto error_vm; 986 goto error_vm;
984 } 987 }
@@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
1048 pasid = fpriv->vm.pasid; 1051 pasid = fpriv->vm.pasid;
1049 pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); 1052 pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
1050 1053
1051 amdgpu_vm_fini(adev, &fpriv->vm);
1052 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); 1054 amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
1055 amdgpu_vm_fini(adev, &fpriv->vm);
1053 1056
1054 if (pasid) 1057 if (pasid)
1055 amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); 1058 amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index b9e9e8b02fb7..11723d8fffbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -57,7 +57,6 @@ struct amdgpu_hpd;
57#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) 57#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
58#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) 58#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
59#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) 59#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
60#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base)
61 60
62#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); 61#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base);
63 62
@@ -325,7 +324,7 @@ struct amdgpu_mode_info {
325 struct card_info *atom_card_info; 324 struct card_info *atom_card_info;
326 bool mode_config_initialized; 325 bool mode_config_initialized;
327 struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; 326 struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
328 struct amdgpu_plane *planes[AMDGPU_MAX_PLANES]; 327 struct drm_plane *planes[AMDGPU_MAX_PLANES];
329 struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; 328 struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
330 /* DVI-I properties */ 329 /* DVI-I properties */
331 struct drm_property *coherent_mode_property; 330 struct drm_property *coherent_mode_property;
@@ -434,11 +433,6 @@ struct amdgpu_crtc {
434 struct drm_pending_vblank_event *event; 433 struct drm_pending_vblank_event *event;
435}; 434};
436 435
437struct amdgpu_plane {
438 struct drm_plane base;
439 enum drm_plane_type plane_type;
440};
441
442struct amdgpu_encoder_atom_dig { 436struct amdgpu_encoder_atom_dig {
443 bool linkb; 437 bool linkb;
444 /* atom dig */ 438 /* atom dig */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 59cc678de8c1..7235cd0b0fa9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -2129,7 +2129,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
2129 2129
2130 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 2130 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
2131 struct amdgpu_ring *ring = adev->rings[i]; 2131 struct amdgpu_ring *ring = adev->rings[i];
2132 if (ring && ring->ready) 2132 if (ring && ring->sched.ready)
2133 amdgpu_fence_wait_empty(ring); 2133 amdgpu_fence_wait_empty(ring);
2134 } 2134 }
2135 2135
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 25d2f3e757f1..e05dc66b1090 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle)
90 adev->psp.sos_fw = NULL; 90 adev->psp.sos_fw = NULL;
91 release_firmware(adev->psp.asd_fw); 91 release_firmware(adev->psp.asd_fw);
92 adev->psp.asd_fw = NULL; 92 adev->psp.asd_fw = NULL;
93 release_firmware(adev->psp.ta_fw);
94 adev->psp.ta_fw = NULL;
93 return 0; 95 return 0;
94} 96}
95 97
@@ -118,21 +120,25 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
118static int 120static int
119psp_cmd_submit_buf(struct psp_context *psp, 121psp_cmd_submit_buf(struct psp_context *psp,
120 struct amdgpu_firmware_info *ucode, 122 struct amdgpu_firmware_info *ucode,
121 struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, 123 struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
122 int index)
123{ 124{
124 int ret; 125 int ret;
126 int index;
125 127
126 memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); 128 memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
127 129
128 memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); 130 memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
129 131
132 index = atomic_inc_return(&psp->fence_value);
130 ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, 133 ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
131 fence_mc_addr, index); 134 fence_mc_addr, index);
135 if (ret) {
136 atomic_dec(&psp->fence_value);
137 return ret;
138 }
132 139
133 while (*((unsigned int *)psp->fence_buf) != index) { 140 while (*((unsigned int *)psp->fence_buf) != index)
134 msleep(1); 141 msleep(1);
135 }
136 142
137 /* the status field must be 0 after FW is loaded */ 143 /* the status field must be 0 after FW is loaded */
138 if (ucode && psp->cmd_buf_mem->resp.status) { 144 if (ucode && psp->cmd_buf_mem->resp.status) {
@@ -191,7 +197,7 @@ static int psp_tmr_load(struct psp_context *psp)
191 PSP_TMR_SIZE, psp->tmr_mc_addr); 197 PSP_TMR_SIZE, psp->tmr_mc_addr);
192 198
193 ret = psp_cmd_submit_buf(psp, NULL, cmd, 199 ret = psp_cmd_submit_buf(psp, NULL, cmd,
194 psp->fence_buf_mc_addr, 1); 200 psp->fence_buf_mc_addr);
195 if (ret) 201 if (ret)
196 goto failed; 202 goto failed;
197 203
@@ -258,13 +264,194 @@ static int psp_asd_load(struct psp_context *psp)
258 psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); 264 psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
259 265
260 ret = psp_cmd_submit_buf(psp, NULL, cmd, 266 ret = psp_cmd_submit_buf(psp, NULL, cmd,
261 psp->fence_buf_mc_addr, 2); 267 psp->fence_buf_mc_addr);
268
269 kfree(cmd);
270
271 return ret;
272}
273
274static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
275 uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
276 uint32_t xgmi_ta_size, uint32_t shared_size)
277{
278 cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
279 cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
280 cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
281 cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
282
283 cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
284 cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
285 cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
286}
287
288static int psp_xgmi_init_shared_buf(struct psp_context *psp)
289{
290 int ret;
291
292 /*
293 * Allocate 16k memory aligned to 4k from Frame Buffer (local
294 * physical) for xgmi ta <-> Driver
295 */
296 ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
297 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
298 &psp->xgmi_context.xgmi_shared_bo,
299 &psp->xgmi_context.xgmi_shared_mc_addr,
300 &psp->xgmi_context.xgmi_shared_buf);
301
302 return ret;
303}
304
305static int psp_xgmi_load(struct psp_context *psp)
306{
307 int ret;
308 struct psp_gfx_cmd_resp *cmd;
309
310 /*
311 * TODO: bypass the loading in sriov for now
312 */
313 if (amdgpu_sriov_vf(psp->adev))
314 return 0;
315
316 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
317 if (!cmd)
318 return -ENOMEM;
319
320 memset(psp->fw_pri_buf, 0, PSP_1_MEG);
321 memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
322
323 psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
324 psp->xgmi_context.xgmi_shared_mc_addr,
325 psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
326
327 ret = psp_cmd_submit_buf(psp, NULL, cmd,
328 psp->fence_buf_mc_addr);
329
330 if (!ret) {
331 psp->xgmi_context.initialized = 1;
332 psp->xgmi_context.session_id = cmd->resp.session_id;
333 }
334
335 kfree(cmd);
336
337 return ret;
338}
339
340static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
341 uint32_t xgmi_session_id)
342{
343 cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
344 cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
345}
346
347static int psp_xgmi_unload(struct psp_context *psp)
348{
349 int ret;
350 struct psp_gfx_cmd_resp *cmd;
351
352 /*
353 * TODO: bypass the unloading in sriov for now
354 */
355 if (amdgpu_sriov_vf(psp->adev))
356 return 0;
357
358 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
359 if (!cmd)
360 return -ENOMEM;
361
362 psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
363
364 ret = psp_cmd_submit_buf(psp, NULL, cmd,
365 psp->fence_buf_mc_addr);
262 366
263 kfree(cmd); 367 kfree(cmd);
264 368
265 return ret; 369 return ret;
266} 370}
267 371
372static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
373 uint32_t ta_cmd_id,
374 uint32_t xgmi_session_id)
375{
376 cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
377 cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
378 cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
379 /* Note: cmd_invoke_cmd.buf is not used for now */
380}
381
382int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
383{
384 int ret;
385 struct psp_gfx_cmd_resp *cmd;
386
387 /*
388 * TODO: bypass the loading in sriov for now
389 */
390 if (amdgpu_sriov_vf(psp->adev))
391 return 0;
392
393 cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
394 if (!cmd)
395 return -ENOMEM;
396
397 psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
398 psp->xgmi_context.session_id);
399
400 ret = psp_cmd_submit_buf(psp, NULL, cmd,
401 psp->fence_buf_mc_addr);
402
403 kfree(cmd);
404
405 return ret;
406}
407
408static int psp_xgmi_terminate(struct psp_context *psp)
409{
410 int ret;
411
412 if (!psp->xgmi_context.initialized)
413 return 0;
414
415 ret = psp_xgmi_unload(psp);
416 if (ret)
417 return ret;
418
419 psp->xgmi_context.initialized = 0;
420
421 /* free xgmi shared memory */
422 amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
423 &psp->xgmi_context.xgmi_shared_mc_addr,
424 &psp->xgmi_context.xgmi_shared_buf);
425
426 return 0;
427}
428
429static int psp_xgmi_initialize(struct psp_context *psp)
430{
431 struct ta_xgmi_shared_memory *xgmi_cmd;
432 int ret;
433
434 if (!psp->xgmi_context.initialized) {
435 ret = psp_xgmi_init_shared_buf(psp);
436 if (ret)
437 return ret;
438 }
439
440 /* Load XGMI TA */
441 ret = psp_xgmi_load(psp);
442 if (ret)
443 return ret;
444
445 /* Initialize XGMI session */
446 xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
447 memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
448 xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
449
450 ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
451
452 return ret;
453}
454
268static int psp_hw_start(struct psp_context *psp) 455static int psp_hw_start(struct psp_context *psp)
269{ 456{
270 struct amdgpu_device *adev = psp->adev; 457 struct amdgpu_device *adev = psp->adev;
@@ -292,6 +479,15 @@ static int psp_hw_start(struct psp_context *psp)
292 if (ret) 479 if (ret)
293 return ret; 480 return ret;
294 481
482 if (adev->gmc.xgmi.num_physical_nodes > 1) {
483 ret = psp_xgmi_initialize(psp);
484 /* Warning the XGMI seesion initialize failure
485 * Instead of stop driver initialization
486 */
487 if (ret)
488 dev_err(psp->adev->dev,
489 "XGMI: Failed to initialize XGMI session\n");
490 }
295 return 0; 491 return 0;
296} 492}
297 493
@@ -321,7 +517,7 @@ static int psp_np_fw_load(struct psp_context *psp)
321 return ret; 517 return ret;
322 518
323 ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, 519 ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
324 psp->fence_buf_mc_addr, i + 3); 520 psp->fence_buf_mc_addr);
325 if (ret) 521 if (ret)
326 return ret; 522 return ret;
327 523
@@ -452,6 +648,10 @@ static int psp_hw_fini(void *handle)
452 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 648 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
453 return 0; 649 return 0;
454 650
651 if (adev->gmc.xgmi.num_physical_nodes > 1 &&
652 psp->xgmi_context.initialized == 1)
653 psp_xgmi_terminate(psp);
654
455 psp_ring_destroy(psp, PSP_RING_TYPE__KM); 655 psp_ring_destroy(psp, PSP_RING_TYPE__KM);
456 656
457 amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); 657 amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -479,6 +679,15 @@ static int psp_suspend(void *handle)
479 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 679 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
480 return 0; 680 return 0;
481 681
682 if (adev->gmc.xgmi.num_physical_nodes > 1 &&
683 psp->xgmi_context.initialized == 1) {
684 ret = psp_xgmi_terminate(psp);
685 if (ret) {
686 DRM_ERROR("Failed to terminate xgmi ta\n");
687 return ret;
688 }
689 }
690
482 ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); 691 ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
483 if (ret) { 692 if (ret) {
484 DRM_ERROR("PSP ring stop failed\n"); 693 DRM_ERROR("PSP ring stop failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 8b8720e9c3f0..9ec5d1a666a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -27,14 +27,17 @@
27 27
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "psp_gfx_if.h" 29#include "psp_gfx_if.h"
30#include "ta_xgmi_if.h"
30 31
31#define PSP_FENCE_BUFFER_SIZE 0x1000 32#define PSP_FENCE_BUFFER_SIZE 0x1000
32#define PSP_CMD_BUFFER_SIZE 0x1000 33#define PSP_CMD_BUFFER_SIZE 0x1000
33#define PSP_ASD_SHARED_MEM_SIZE 0x4000 34#define PSP_ASD_SHARED_MEM_SIZE 0x4000
35#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
34#define PSP_1_MEG 0x100000 36#define PSP_1_MEG 0x100000
35#define PSP_TMR_SIZE 0x400000 37#define PSP_TMR_SIZE 0x400000
36 38
37struct psp_context; 39struct psp_context;
40struct psp_xgmi_node_info;
38struct psp_xgmi_topology_info; 41struct psp_xgmi_topology_info;
39 42
40enum psp_ring_type 43enum psp_ring_type
@@ -80,12 +83,20 @@ struct psp_funcs
80 enum AMDGPU_UCODE_ID ucode_type); 83 enum AMDGPU_UCODE_ID ucode_type);
81 bool (*smu_reload_quirk)(struct psp_context *psp); 84 bool (*smu_reload_quirk)(struct psp_context *psp);
82 int (*mode1_reset)(struct psp_context *psp); 85 int (*mode1_reset)(struct psp_context *psp);
83 uint64_t (*xgmi_get_device_id)(struct psp_context *psp); 86 uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
84 uint64_t (*xgmi_get_hive_id)(struct psp_context *psp); 87 uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
85 int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, 88 int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
86 struct psp_xgmi_topology_info *topology); 89 struct psp_xgmi_topology_info *topology);
87 int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, 90 int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
88 struct psp_xgmi_topology_info *topology); 91 struct psp_xgmi_topology_info *topology);
92};
93
94struct psp_xgmi_context {
95 uint8_t initialized;
96 uint32_t session_id;
97 struct amdgpu_bo *xgmi_shared_bo;
98 uint64_t xgmi_shared_mc_addr;
99 void *xgmi_shared_buf;
89}; 100};
90 101
91struct psp_context 102struct psp_context
@@ -96,7 +107,7 @@ struct psp_context
96 107
97 const struct psp_funcs *funcs; 108 const struct psp_funcs *funcs;
98 109
99 /* fence buffer */ 110 /* firmware buffer */
100 struct amdgpu_bo *fw_pri_bo; 111 struct amdgpu_bo *fw_pri_bo;
101 uint64_t fw_pri_mc_addr; 112 uint64_t fw_pri_mc_addr;
102 void *fw_pri_buf; 113 void *fw_pri_buf;
@@ -134,6 +145,16 @@ struct psp_context
134 struct amdgpu_bo *cmd_buf_bo; 145 struct amdgpu_bo *cmd_buf_bo;
135 uint64_t cmd_buf_mc_addr; 146 uint64_t cmd_buf_mc_addr;
136 struct psp_gfx_cmd_resp *cmd_buf_mem; 147 struct psp_gfx_cmd_resp *cmd_buf_mem;
148
149 /* fence value associated with cmd buffer */
150 atomic_t fence_value;
151
152 /* xgmi ta firmware and buffer */
153 const struct firmware *ta_fw;
154 uint32_t ta_xgmi_ucode_version;
155 uint32_t ta_xgmi_ucode_size;
156 uint8_t *ta_xgmi_start_addr;
157 struct psp_xgmi_context xgmi_context;
137}; 158};
138 159
139struct amdgpu_psp_funcs { 160struct amdgpu_psp_funcs {
@@ -141,21 +162,17 @@ struct amdgpu_psp_funcs {
141 enum AMDGPU_UCODE_ID); 162 enum AMDGPU_UCODE_ID);
142}; 163};
143 164
165#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
166struct psp_xgmi_node_info {
167 uint64_t node_id;
168 uint8_t num_hops;
169 uint8_t is_sharing_enabled;
170 enum ta_xgmi_assigned_sdma_engine sdma_engine;
171};
172
144struct psp_xgmi_topology_info { 173struct psp_xgmi_topology_info {
145 /* Generated by PSP to identify the GPU instance within xgmi connection */ 174 uint32_t num_nodes;
146 uint64_t device_id; 175 struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
147 /*
148 * If all bits set to 0 , driver indicates it wants to retrieve the xgmi
149 * connection vector topology, but not access enable the connections
150 * if some or all bits are set to 1, driver indicates it want to retrieve the
151 * current xgmi topology and access enable the link to GPU[i] associated
152 * with the bit position in the vector.
153 * On return,: bits indicated which xgmi links are present/active depending
154 * on the value passed in. The relative bit offset for the relative GPU index
155 * within the hive is always marked active.
156 */
157 uint32_t connection_mask;
158 uint32_t reserved; /* must be 0 */
159}; 176};
160 177
161#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) 178#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
@@ -177,8 +194,8 @@ struct psp_xgmi_topology_info {
177 ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) 194 ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
178#define psp_mode1_reset(psp) \ 195#define psp_mode1_reset(psp) \
179 ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) 196 ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
180#define psp_xgmi_get_device_id(psp) \ 197#define psp_xgmi_get_node_id(psp) \
181 ((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0) 198 ((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
182#define psp_xgmi_get_hive_id(psp) \ 199#define psp_xgmi_get_hive_id(psp) \
183 ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0) 200 ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
184#define psp_xgmi_get_topology_info(psp, num_device, topology) \ 201#define psp_xgmi_get_topology_info(psp, num_device, topology) \
@@ -199,6 +216,8 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
199extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; 216extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
200 217
201int psp_gpu_reset(struct amdgpu_device *adev); 218int psp_gpu_reset(struct amdgpu_device *adev);
219int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
220
202extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; 221extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
203 222
204#endif 223#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b70e85ec147d..5b75bdc8dc28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
338 */ 338 */
339void amdgpu_ring_fini(struct amdgpu_ring *ring) 339void amdgpu_ring_fini(struct amdgpu_ring *ring)
340{ 340{
341 ring->ready = false; 341 ring->sched.ready = false;
342 342
343 /* Not to finish a ring which is not initialized */ 343 /* Not to finish a ring which is not initialized */
344 if (!(ring->adev) || !(ring->adev->rings[ring->idx])) 344 if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
@@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
500 debugfs_remove(ring->ent); 500 debugfs_remove(ring->ent);
501#endif 501#endif
502} 502}
503
504/**
505 * amdgpu_ring_test_helper - tests ring and set sched readiness status
506 *
507 * @ring: ring to try the recovery on
508 *
509 * Tests ring and set sched readiness status
510 *
511 * Returns 0 on success, error on failure.
512 */
513int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
514{
515 struct amdgpu_device *adev = ring->adev;
516 int r;
517
518 r = amdgpu_ring_test_ring(ring);
519 if (r)
520 DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
521 ring->name, r);
522 else
523 DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
524 ring->name);
525
526 ring->sched.ready = !r;
527 return r;
528}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4caa301ce454..0beb01fef83f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -129,8 +129,9 @@ struct amdgpu_ring_funcs {
129 unsigned emit_ib_size; 129 unsigned emit_ib_size;
130 /* command emit functions */ 130 /* command emit functions */
131 void (*emit_ib)(struct amdgpu_ring *ring, 131 void (*emit_ib)(struct amdgpu_ring *ring,
132 struct amdgpu_job *job,
132 struct amdgpu_ib *ib, 133 struct amdgpu_ib *ib,
133 unsigned vmid, bool ctx_switch); 134 bool ctx_switch);
134 void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, 135 void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
135 uint64_t seq, unsigned flags); 136 uint64_t seq, unsigned flags);
136 void (*emit_pipeline_sync)(struct amdgpu_ring *ring); 137 void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@@ -189,7 +190,6 @@ struct amdgpu_ring {
189 uint64_t gpu_addr; 190 uint64_t gpu_addr;
190 uint64_t ptr_mask; 191 uint64_t ptr_mask;
191 uint32_t buf_mask; 192 uint32_t buf_mask;
192 bool ready;
193 u32 idx; 193 u32 idx;
194 u32 me; 194 u32 me;
195 u32 pipe; 195 u32 pipe;
@@ -229,7 +229,7 @@ struct amdgpu_ring {
229#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) 229#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
230#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) 230#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
231#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) 231#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
232#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) 232#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
233#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) 233#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
234#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) 234#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
235#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) 235#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
@@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
313 ring->count_dw -= count_dw; 313 ring->count_dw -= count_dw;
314} 314}
315 315
316int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
317
316#endif 318#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
new file mode 100644
index 000000000000..c8793e6cc3c5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -0,0 +1,282 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25#include <linux/firmware.h>
26#include "amdgpu.h"
27#include "amdgpu_gfx.h"
28#include "amdgpu_rlc.h"
29
30/**
31 * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
32 *
33 * @adev: amdgpu_device pointer
34 *
35 * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
36 */
37void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
38{
39 if (adev->gfx.rlc.in_safe_mode)
40 return;
41
42 /* if RLC is not enabled, do nothing */
43 if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
44 return;
45
46 if (adev->cg_flags &
47 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
48 AMD_CG_SUPPORT_GFX_3D_CGCG)) {
49 adev->gfx.rlc.funcs->set_safe_mode(adev);
50 adev->gfx.rlc.in_safe_mode = true;
51 }
52}
53
54/**
55 * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
56 *
57 * @adev: amdgpu_device pointer
58 *
59 * Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
60 */
61void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
62{
63 if (!(adev->gfx.rlc.in_safe_mode))
64 return;
65
66 /* if RLC is not enabled, do nothing */
67 if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
68 return;
69
70 if (adev->cg_flags &
71 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
72 AMD_CG_SUPPORT_GFX_3D_CGCG)) {
73 adev->gfx.rlc.funcs->unset_safe_mode(adev);
74 adev->gfx.rlc.in_safe_mode = false;
75 }
76}
77
78/**
79 * amdgpu_gfx_rlc_init_sr - Init save restore block
80 *
81 * @adev: amdgpu_device pointer
82 * @dws: the size of save restore block
83 *
84 * Allocate and setup value to save restore block of rlc.
85 * Returns 0 on succeess or negative error code if allocate failed.
86 */
87int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
88{
89 const u32 *src_ptr;
90 volatile u32 *dst_ptr;
91 u32 i;
92 int r;
93
94 /* allocate save restore block */
95 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
96 AMDGPU_GEM_DOMAIN_VRAM,
97 &adev->gfx.rlc.save_restore_obj,
98 &adev->gfx.rlc.save_restore_gpu_addr,
99 (void **)&adev->gfx.rlc.sr_ptr);
100 if (r) {
101 dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
102 amdgpu_gfx_rlc_fini(adev);
103 return r;
104 }
105
106 /* write the sr buffer */
107 src_ptr = adev->gfx.rlc.reg_list;
108 dst_ptr = adev->gfx.rlc.sr_ptr;
109 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
110 dst_ptr[i] = cpu_to_le32(src_ptr[i]);
111 amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
112 amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
113
114 return 0;
115}
116
117/**
118 * amdgpu_gfx_rlc_init_csb - Init clear state block
119 *
120 * @adev: amdgpu_device pointer
121 *
122 * Allocate and setup value to clear state block of rlc.
123 * Returns 0 on succeess or negative error code if allocate failed.
124 */
125int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
126{
127 volatile u32 *dst_ptr;
128 u32 dws;
129 int r;
130
131 /* allocate clear state block */
132 adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
133 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
134 AMDGPU_GEM_DOMAIN_VRAM,
135 &adev->gfx.rlc.clear_state_obj,
136 &adev->gfx.rlc.clear_state_gpu_addr,
137 (void **)&adev->gfx.rlc.cs_ptr);
138 if (r) {
139 dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
140 amdgpu_gfx_rlc_fini(adev);
141 return r;
142 }
143
144 /* set up the cs buffer */
145 dst_ptr = adev->gfx.rlc.cs_ptr;
146 adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
147 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
148 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
149 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
150
151 return 0;
152}
153
154/**
155 * amdgpu_gfx_rlc_init_cpt - Init cp table
156 *
157 * @adev: amdgpu_device pointer
158 *
159 * Allocate and setup value to cp table of rlc.
160 * Returns 0 on succeess or negative error code if allocate failed.
161 */
162int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
163{
164 int r;
165
166 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
167 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
168 &adev->gfx.rlc.cp_table_obj,
169 &adev->gfx.rlc.cp_table_gpu_addr,
170 (void **)&adev->gfx.rlc.cp_table_ptr);
171 if (r) {
172 dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
173 amdgpu_gfx_rlc_fini(adev);
174 return r;
175 }
176
177 /* set up the cp table */
178 amdgpu_gfx_rlc_setup_cp_table(adev);
179 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
180 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
181
182 return 0;
183}
184
185/**
186 * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
187 *
188 * @adev: amdgpu_device pointer
189 *
190 * Write cp firmware data into cp table.
191 */
192void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
193{
194 const __le32 *fw_data;
195 volatile u32 *dst_ptr;
196 int me, i, max_me;
197 u32 bo_offset = 0;
198 u32 table_offset, table_size;
199
200 max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
201
202 /* write the cp table buffer */
203 dst_ptr = adev->gfx.rlc.cp_table_ptr;
204 for (me = 0; me < max_me; me++) {
205 if (me == 0) {
206 const struct gfx_firmware_header_v1_0 *hdr =
207 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
208 fw_data = (const __le32 *)
209 (adev->gfx.ce_fw->data +
210 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
211 table_offset = le32_to_cpu(hdr->jt_offset);
212 table_size = le32_to_cpu(hdr->jt_size);
213 } else if (me == 1) {
214 const struct gfx_firmware_header_v1_0 *hdr =
215 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
216 fw_data = (const __le32 *)
217 (adev->gfx.pfp_fw->data +
218 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
219 table_offset = le32_to_cpu(hdr->jt_offset);
220 table_size = le32_to_cpu(hdr->jt_size);
221 } else if (me == 2) {
222 const struct gfx_firmware_header_v1_0 *hdr =
223 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
224 fw_data = (const __le32 *)
225 (adev->gfx.me_fw->data +
226 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
227 table_offset = le32_to_cpu(hdr->jt_offset);
228 table_size = le32_to_cpu(hdr->jt_size);
229 } else if (me == 3) {
230 const struct gfx_firmware_header_v1_0 *hdr =
231 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
232 fw_data = (const __le32 *)
233 (adev->gfx.mec_fw->data +
234 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
235 table_offset = le32_to_cpu(hdr->jt_offset);
236 table_size = le32_to_cpu(hdr->jt_size);
237 } else if (me == 4) {
238 const struct gfx_firmware_header_v1_0 *hdr =
239 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
240 fw_data = (const __le32 *)
241 (adev->gfx.mec2_fw->data +
242 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
243 table_offset = le32_to_cpu(hdr->jt_offset);
244 table_size = le32_to_cpu(hdr->jt_size);
245 }
246
247 for (i = 0; i < table_size; i ++) {
248 dst_ptr[bo_offset + i] =
249 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
250 }
251
252 bo_offset += table_size;
253 }
254}
255
256/**
257 * amdgpu_gfx_rlc_fini - Free BO which used for RLC
258 *
259 * @adev: amdgpu_device pointer
260 *
261 * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
262 * and rlc_jump_table_block.
263 */
264void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
265{
266 /* save restore block */
267 if (adev->gfx.rlc.save_restore_obj) {
268 amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
269 &adev->gfx.rlc.save_restore_gpu_addr,
270 (void **)&adev->gfx.rlc.sr_ptr);
271 }
272
273 /* clear state block */
274 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
275 &adev->gfx.rlc.clear_state_gpu_addr,
276 (void **)&adev->gfx.rlc.cs_ptr);
277
278 /* jump table block */
279 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
280 &adev->gfx.rlc.cp_table_gpu_addr,
281 (void **)&adev->gfx.rlc.cp_table_ptr);
282}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
new file mode 100644
index 000000000000..49a8ab52113b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -0,0 +1,98 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef __AMDGPU_RLC_H__
25#define __AMDGPU_RLC_H__
26
27#include "clearstate_defs.h"
28
29struct amdgpu_rlc_funcs {
30 bool (*is_rlc_enabled)(struct amdgpu_device *adev);
31 void (*set_safe_mode)(struct amdgpu_device *adev);
32 void (*unset_safe_mode)(struct amdgpu_device *adev);
33 int (*init)(struct amdgpu_device *adev);
34 u32 (*get_csb_size)(struct amdgpu_device *adev);
35 void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
36 int (*get_cp_table_num)(struct amdgpu_device *adev);
37 int (*resume)(struct amdgpu_device *adev);
38 void (*stop)(struct amdgpu_device *adev);
39 void (*reset)(struct amdgpu_device *adev);
40 void (*start)(struct amdgpu_device *adev);
41};
42
43struct amdgpu_rlc {
44 /* for power gating */
45 struct amdgpu_bo *save_restore_obj;
46 uint64_t save_restore_gpu_addr;
47 volatile uint32_t *sr_ptr;
48 const u32 *reg_list;
49 u32 reg_list_size;
50 /* for clear state */
51 struct amdgpu_bo *clear_state_obj;
52 uint64_t clear_state_gpu_addr;
53 volatile uint32_t *cs_ptr;
54 const struct cs_section_def *cs_data;
55 u32 clear_state_size;
56 /* for cp tables */
57 struct amdgpu_bo *cp_table_obj;
58 uint64_t cp_table_gpu_addr;
59 volatile uint32_t *cp_table_ptr;
60 u32 cp_table_size;
61
62 /* safe mode for updating CG/PG state */
63 bool in_safe_mode;
64 const struct amdgpu_rlc_funcs *funcs;
65
66 /* for firmware data */
67 u32 save_and_restore_offset;
68 u32 clear_state_descriptor_offset;
69 u32 avail_scratch_ram_locations;
70 u32 reg_restore_list_size;
71 u32 reg_list_format_start;
72 u32 reg_list_format_separate_start;
73 u32 starting_offsets_start;
74 u32 reg_list_format_size_bytes;
75 u32 reg_list_size_bytes;
76 u32 reg_list_format_direct_reg_list_length;
77 u32 save_restore_list_cntl_size_bytes;
78 u32 save_restore_list_gpm_size_bytes;
79 u32 save_restore_list_srm_size_bytes;
80
81 u32 *register_list_format;
82 u32 *register_restore;
83 u8 *save_restore_list_cntl;
84 u8 *save_restore_list_gpm;
85 u8 *save_restore_list_srm;
86
87 bool is_rlc_v2_1;
88};
89
90void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
91void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
92int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
93int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
94int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
95void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
96void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
97
98#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index bc9244b429ef..115bb0c99b0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -28,17 +28,31 @@
28 * GPU SDMA IP block helpers function. 28 * GPU SDMA IP block helpers function.
29 */ 29 */
30 30
31struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) 31struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
32{ 32{
33 struct amdgpu_device *adev = ring->adev; 33 struct amdgpu_device *adev = ring->adev;
34 int i; 34 int i;
35 35
36 for (i = 0; i < adev->sdma.num_instances; i++) 36 for (i = 0; i < adev->sdma.num_instances; i++)
37 if (&adev->sdma.instance[i].ring == ring) 37 if (ring == &adev->sdma.instance[i].ring ||
38 break; 38 ring == &adev->sdma.instance[i].page)
39 return &adev->sdma.instance[i];
39 40
40 if (i < AMDGPU_MAX_SDMA_INSTANCES) 41 return NULL;
41 return &adev->sdma.instance[i]; 42}
42 else 43
43 return NULL; 44int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
45{
46 struct amdgpu_device *adev = ring->adev;
47 int i;
48
49 for (i = 0; i < adev->sdma.num_instances; i++) {
50 if (ring == &adev->sdma.instance[i].ring ||
51 ring == &adev->sdma.instance[i].page) {
52 *index = i;
53 return 0;
54 }
55 }
56
57 return -EINVAL;
44} 58}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 500113ec65ca..16b1a6ae5ba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
41 uint32_t feature_version; 41 uint32_t feature_version;
42 42
43 struct amdgpu_ring ring; 43 struct amdgpu_ring ring;
44 struct amdgpu_ring page;
44 bool burst_nop; 45 bool burst_nop;
45}; 46};
46 47
@@ -50,6 +51,7 @@ struct amdgpu_sdma {
50 struct amdgpu_irq_src illegal_inst_irq; 51 struct amdgpu_irq_src illegal_inst_irq;
51 int num_instances; 52 int num_instances;
52 uint32_t srbm_soft_reset; 53 uint32_t srbm_soft_reset;
54 bool has_page_queue;
53}; 55};
54 56
55/* 57/*
@@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs {
92#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) 94#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
93 95
94struct amdgpu_sdma_instance * 96struct amdgpu_sdma_instance *
95amdgpu_get_sdma_instance(struct amdgpu_ring *ring); 97amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
98int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
96 99
97#endif 100#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index e9bf70e2ac51..626abca770a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
218 TP_ARGS(vm, ring, job), 218 TP_ARGS(vm, ring, job),
219 TP_STRUCT__entry( 219 TP_STRUCT__entry(
220 __field(u32, pasid) 220 __field(u32, pasid)
221 __string(ring, ring->name)
221 __field(u32, ring) 222 __field(u32, ring)
222 __field(u32, vmid) 223 __field(u32, vmid)
223 __field(u32, vm_hub) 224 __field(u32, vm_hub)
@@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id,
227 228
228 TP_fast_assign( 229 TP_fast_assign(
229 __entry->pasid = vm->pasid; 230 __entry->pasid = vm->pasid;
230 __entry->ring = ring->idx; 231 __assign_str(ring, ring->name)
231 __entry->vmid = job->vmid; 232 __entry->vmid = job->vmid;
232 __entry->vm_hub = ring->funcs->vmhub, 233 __entry->vm_hub = ring->funcs->vmhub,
233 __entry->pd_addr = job->vm_pd_addr; 234 __entry->pd_addr = job->vm_pd_addr;
234 __entry->needs_flush = job->vm_needs_flush; 235 __entry->needs_flush = job->vm_needs_flush;
235 ), 236 ),
236 TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", 237 TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
237 __entry->pasid, __entry->ring, __entry->vmid, 238 __entry->pasid, __get_str(ring), __entry->vmid,
238 __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) 239 __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
239); 240);
240 241
@@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush,
366 uint64_t pd_addr), 367 uint64_t pd_addr),
367 TP_ARGS(ring, vmid, pd_addr), 368 TP_ARGS(ring, vmid, pd_addr),
368 TP_STRUCT__entry( 369 TP_STRUCT__entry(
369 __field(u32, ring) 370 __string(ring, ring->name)
370 __field(u32, vmid) 371 __field(u32, vmid)
371 __field(u32, vm_hub) 372 __field(u32, vm_hub)
372 __field(u64, pd_addr) 373 __field(u64, pd_addr)
373 ), 374 ),
374 375
375 TP_fast_assign( 376 TP_fast_assign(
376 __entry->ring = ring->idx; 377 __assign_str(ring, ring->name)
377 __entry->vmid = vmid; 378 __entry->vmid = vmid;
378 __entry->vm_hub = ring->funcs->vmhub; 379 __entry->vm_hub = ring->funcs->vmhub;
379 __entry->pd_addr = pd_addr; 380 __entry->pd_addr = pd_addr;
380 ), 381 ),
381 TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", 382 TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
382 __entry->ring, __entry->vmid, 383 __get_str(ring), __entry->vmid,
383 __entry->vm_hub,__entry->pd_addr) 384 __entry->vm_hub,__entry->pd_addr)
384); 385);
385 386
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a44fc12ae1f9..c91ec3101d00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
61static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); 61static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
62static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); 62static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
63 63
64/*
65 * Global memory.
66 */
67
68/**
69 * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
70 * memory object
71 *
72 * @ref: Object for initialization.
73 *
74 * This is called by drm_global_item_ref() when an object is being
75 * initialized.
76 */
77static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
78{
79 return ttm_mem_global_init(ref->object);
80}
81
82/**
83 * amdgpu_ttm_mem_global_release - Drop reference to a memory object
84 *
85 * @ref: Object being removed
86 *
87 * This is called by drm_global_item_unref() when an object is being
88 * released.
89 */
90static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
91{
92 ttm_mem_global_release(ref->object);
93}
94
95/**
96 * amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
97 *
98 * @adev: AMDGPU device for which the global structures need to be registered.
99 *
100 * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
101 * during bring up.
102 */
103static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
104{
105 struct drm_global_reference *global_ref;
106 int r;
107
108 /* ensure reference is false in case init fails */
109 adev->mman.mem_global_referenced = false;
110
111 global_ref = &adev->mman.mem_global_ref;
112 global_ref->global_type = DRM_GLOBAL_TTM_MEM;
113 global_ref->size = sizeof(struct ttm_mem_global);
114 global_ref->init = &amdgpu_ttm_mem_global_init;
115 global_ref->release = &amdgpu_ttm_mem_global_release;
116 r = drm_global_item_ref(global_ref);
117 if (r) {
118 DRM_ERROR("Failed setting up TTM memory accounting "
119 "subsystem.\n");
120 goto error_mem;
121 }
122
123 adev->mman.bo_global_ref.mem_glob =
124 adev->mman.mem_global_ref.object;
125 global_ref = &adev->mman.bo_global_ref.ref;
126 global_ref->global_type = DRM_GLOBAL_TTM_BO;
127 global_ref->size = sizeof(struct ttm_bo_global);
128 global_ref->init = &ttm_bo_global_init;
129 global_ref->release = &ttm_bo_global_release;
130 r = drm_global_item_ref(global_ref);
131 if (r) {
132 DRM_ERROR("Failed setting up TTM BO subsystem.\n");
133 goto error_bo;
134 }
135
136 mutex_init(&adev->mman.gtt_window_lock);
137
138 adev->mman.mem_global_referenced = true;
139
140 return 0;
141
142error_bo:
143 drm_global_item_unref(&adev->mman.mem_global_ref);
144error_mem:
145 return r;
146}
147
148static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
149{
150 if (adev->mman.mem_global_referenced) {
151 mutex_destroy(&adev->mman.gtt_window_lock);
152 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
153 drm_global_item_unref(&adev->mman.mem_global_ref);
154 adev->mman.mem_global_referenced = false;
155 }
156}
157
158static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) 64static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
159{ 65{
160 return 0; 66 return 0;
@@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1758 int r; 1664 int r;
1759 u64 vis_vram_limit; 1665 u64 vis_vram_limit;
1760 1666
1761 /* initialize global references for vram/gtt */ 1667 mutex_init(&adev->mman.gtt_window_lock);
1762 r = amdgpu_ttm_global_init(adev); 1668
1763 if (r) {
1764 return r;
1765 }
1766 /* No others user of address space so set it to 0 */ 1669 /* No others user of address space so set it to 0 */
1767 r = ttm_bo_device_init(&adev->mman.bdev, 1670 r = ttm_bo_device_init(&adev->mman.bdev,
1768 adev->mman.bo_global_ref.ref.object,
1769 &amdgpu_bo_driver, 1671 &amdgpu_bo_driver,
1770 adev->ddev->anon_inode->i_mapping, 1672 adev->ddev->anon_inode->i_mapping,
1771 DRM_FILE_PAGE_OFFSET, 1673 DRM_FILE_PAGE_OFFSET,
@@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
1922 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); 1824 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
1923 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); 1825 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
1924 ttm_bo_device_release(&adev->mman.bdev); 1826 ttm_bo_device_release(&adev->mman.bdev);
1925 amdgpu_ttm_global_fini(adev);
1926 adev->mman.initialized = false; 1827 adev->mman.initialized = false;
1927 DRM_INFO("amdgpu: ttm finalized\n"); 1828 DRM_INFO("amdgpu: ttm finalized\n");
1928} 1829}
@@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
2069 unsigned i; 1970 unsigned i;
2070 int r; 1971 int r;
2071 1972
2072 if (direct_submit && !ring->ready) { 1973 if (direct_submit && !ring->sched.ready) {
2073 DRM_ERROR("Trying to move memory with ring turned off.\n"); 1974 DRM_ERROR("Trying to move memory with ring turned off.\n");
2074 return -EINVAL; 1975 return -EINVAL;
2075 } 1976 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index fe8f276e9811..b5b2d101f7db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -39,8 +39,6 @@
39#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 39#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
40 40
41struct amdgpu_mman { 41struct amdgpu_mman {
42 struct ttm_bo_global_ref bo_global_ref;
43 struct drm_global_reference mem_global_ref;
44 struct ttm_bo_device bdev; 42 struct ttm_bo_device bdev;
45 bool mem_global_referenced; 43 bool mem_global_referenced;
46 bool initialized; 44 bool initialized;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index aa6641b944a0..7ac25a1c7853 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -58,6 +58,17 @@ struct psp_firmware_header_v1_0 {
58}; 58};
59 59
60/* version_major=1, version_minor=0 */ 60/* version_major=1, version_minor=0 */
61struct ta_firmware_header_v1_0 {
62 struct common_firmware_header header;
63 uint32_t ta_xgmi_ucode_version;
64 uint32_t ta_xgmi_offset_bytes;
65 uint32_t ta_xgmi_size_bytes;
66 uint32_t ta_ras_ucode_version;
67 uint32_t ta_ras_offset_bytes;
68 uint32_t ta_ras_size_bytes;
69};
70
71/* version_major=1, version_minor=0 */
61struct gfx_firmware_header_v1_0 { 72struct gfx_firmware_header_v1_0 {
62 struct common_firmware_header header; 73 struct common_firmware_header header;
63 uint32_t ucode_feature_version; 74 uint32_t ucode_feature_version;
@@ -170,6 +181,7 @@ union amdgpu_firmware_header {
170 struct mc_firmware_header_v1_0 mc; 181 struct mc_firmware_header_v1_0 mc;
171 struct smc_firmware_header_v1_0 smc; 182 struct smc_firmware_header_v1_0 smc;
172 struct psp_firmware_header_v1_0 psp; 183 struct psp_firmware_header_v1_0 psp;
184 struct ta_firmware_header_v1_0 ta;
173 struct gfx_firmware_header_v1_0 gfx; 185 struct gfx_firmware_header_v1_0 gfx;
174 struct rlc_firmware_header_v1_0 rlc; 186 struct rlc_firmware_header_v1_0 rlc;
175 struct rlc_firmware_header_v2_0 rlc_v2_0; 187 struct rlc_firmware_header_v2_0 rlc_v2_0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e5a6db6beab7..69896f451e8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1243,30 +1243,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1243{ 1243{
1244 struct dma_fence *fence; 1244 struct dma_fence *fence;
1245 long r; 1245 long r;
1246 uint32_t ip_instance = ring->me;
1247 1246
1248 r = amdgpu_uvd_get_create_msg(ring, 1, NULL); 1247 r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
1249 if (r) { 1248 if (r)
1250 DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
1251 goto error; 1249 goto error;
1252 }
1253 1250
1254 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); 1251 r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
1255 if (r) { 1252 if (r)
1256 DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
1257 goto error; 1253 goto error;
1258 }
1259 1254
1260 r = dma_fence_wait_timeout(fence, false, timeout); 1255 r = dma_fence_wait_timeout(fence, false, timeout);
1261 if (r == 0) { 1256 if (r == 0)
1262 DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
1263 r = -ETIMEDOUT; 1257 r = -ETIMEDOUT;
1264 } else if (r < 0) { 1258 else if (r > 0)
1265 DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
1266 } else {
1267 DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
1268 r = 0; 1259 r = 0;
1269 }
1270 1260
1271 dma_fence_put(fence); 1261 dma_fence_put(fence);
1272 1262
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 5f3f54073818..98a1b2ce2b9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -1032,8 +1032,10 @@ out:
1032 * @ib: the IB to execute 1032 * @ib: the IB to execute
1033 * 1033 *
1034 */ 1034 */
1035void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, 1035void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1036 unsigned vmid, bool ctx_switch) 1036 struct amdgpu_job *job,
1037 struct amdgpu_ib *ib,
1038 bool ctx_switch)
1037{ 1039{
1038 amdgpu_ring_write(ring, VCE_CMD_IB); 1040 amdgpu_ring_write(ring, VCE_CMD_IB);
1039 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1041 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1079 return 0; 1081 return 0;
1080 1082
1081 r = amdgpu_ring_alloc(ring, 16); 1083 r = amdgpu_ring_alloc(ring, 16);
1082 if (r) { 1084 if (r)
1083 DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
1084 ring->idx, r);
1085 return r; 1085 return r;
1086 } 1086
1087 amdgpu_ring_write(ring, VCE_CMD_END); 1087 amdgpu_ring_write(ring, VCE_CMD_END);
1088 amdgpu_ring_commit(ring); 1088 amdgpu_ring_commit(ring);
1089 1089
@@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1093 DRM_UDELAY(1); 1093 DRM_UDELAY(1);
1094 } 1094 }
1095 1095
1096 if (i < timeout) { 1096 if (i >= timeout)
1097 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
1098 ring->idx, i);
1099 } else {
1100 DRM_ERROR("amdgpu: ring %d test failed\n",
1101 ring->idx);
1102 r = -ETIMEDOUT; 1097 r = -ETIMEDOUT;
1103 }
1104 1098
1105 return r; 1099 return r;
1106} 1100}
@@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1121 return 0; 1115 return 0;
1122 1116
1123 r = amdgpu_vce_get_create_msg(ring, 1, NULL); 1117 r = amdgpu_vce_get_create_msg(ring, 1, NULL);
1124 if (r) { 1118 if (r)
1125 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
1126 goto error; 1119 goto error;
1127 }
1128 1120
1129 r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); 1121 r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1130 if (r) { 1122 if (r)
1131 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
1132 goto error; 1123 goto error;
1133 }
1134 1124
1135 r = dma_fence_wait_timeout(fence, false, timeout); 1125 r = dma_fence_wait_timeout(fence, false, timeout);
1136 if (r == 0) { 1126 if (r == 0)
1137 DRM_ERROR("amdgpu: IB test timed out.\n");
1138 r = -ETIMEDOUT; 1127 r = -ETIMEDOUT;
1139 } else if (r < 0) { 1128 else if (r > 0)
1140 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1141 } else {
1142 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
1143 r = 0; 1129 r = 0;
1144 } 1130
1145error: 1131error:
1146 dma_fence_put(fence); 1132 dma_fence_put(fence);
1147 return r; 1133 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index a1f209eed4c4..50293652af14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
65void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); 65void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
66int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); 66int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
67int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); 67int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
68void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, 68void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
69 unsigned vmid, bool ctx_switch); 69 struct amdgpu_ib *ib, bool ctx_switch);
70void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 70void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
71 unsigned flags); 71 unsigned flags);
72int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); 72int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 27da13df2f11..e2e42e3fbcf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
425 425
426 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); 426 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
427 r = amdgpu_ring_alloc(ring, 3); 427 r = amdgpu_ring_alloc(ring, 3);
428 if (r) { 428 if (r)
429 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
430 ring->idx, r);
431 return r; 429 return r;
432 } 430
433 amdgpu_ring_write(ring, 431 amdgpu_ring_write(ring,
434 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); 432 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
435 amdgpu_ring_write(ring, 0xDEADBEEF); 433 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
441 DRM_UDELAY(1); 439 DRM_UDELAY(1);
442 } 440 }
443 441
444 if (i < adev->usec_timeout) { 442 if (i >= adev->usec_timeout)
445 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 443 r = -ETIMEDOUT;
446 ring->idx, i); 444
447 } else {
448 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
449 ring->idx, tmp);
450 r = -EINVAL;
451 }
452 return r; 445 return r;
453} 446}
454 447
@@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
570 long r; 563 long r;
571 564
572 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); 565 r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
573 if (r) { 566 if (r)
574 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
575 goto error; 567 goto error;
576 }
577 568
578 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); 569 r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
579 if (r) { 570 if (r)
580 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
581 goto error; 571 goto error;
582 }
583 572
584 r = dma_fence_wait_timeout(fence, false, timeout); 573 r = dma_fence_wait_timeout(fence, false, timeout);
585 if (r == 0) { 574 if (r == 0)
586 DRM_ERROR("amdgpu: IB test timed out.\n");
587 r = -ETIMEDOUT; 575 r = -ETIMEDOUT;
588 } else if (r < 0) { 576 else if (r > 0)
589 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
590 } else {
591 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
592 r = 0; 577 r = 0;
593 }
594 578
595 dma_fence_put(fence); 579 dma_fence_put(fence);
596
597error: 580error:
598 return r; 581 return r;
599} 582}
@@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
606 int r; 589 int r;
607 590
608 r = amdgpu_ring_alloc(ring, 16); 591 r = amdgpu_ring_alloc(ring, 16);
609 if (r) { 592 if (r)
610 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
611 ring->idx, r);
612 return r; 593 return r;
613 } 594
614 amdgpu_ring_write(ring, VCN_ENC_CMD_END); 595 amdgpu_ring_write(ring, VCN_ENC_CMD_END);
615 amdgpu_ring_commit(ring); 596 amdgpu_ring_commit(ring);
616 597
@@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
620 DRM_UDELAY(1); 601 DRM_UDELAY(1);
621 } 602 }
622 603
623 if (i < adev->usec_timeout) { 604 if (i >= adev->usec_timeout)
624 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
625 ring->idx, i);
626 } else {
627 DRM_ERROR("amdgpu: ring %d test failed\n",
628 ring->idx);
629 r = -ETIMEDOUT; 605 r = -ETIMEDOUT;
630 }
631 606
632 return r; 607 return r;
633} 608}
@@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
742 long r; 717 long r;
743 718
744 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); 719 r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
745 if (r) { 720 if (r)
746 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
747 goto error; 721 goto error;
748 }
749 722
750 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); 723 r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
751 if (r) { 724 if (r)
752 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
753 goto error; 725 goto error;
754 }
755 726
756 r = dma_fence_wait_timeout(fence, false, timeout); 727 r = dma_fence_wait_timeout(fence, false, timeout);
757 if (r == 0) { 728 if (r == 0)
758 DRM_ERROR("amdgpu: IB test timed out.\n");
759 r = -ETIMEDOUT; 729 r = -ETIMEDOUT;
760 } else if (r < 0) { 730 else if (r > 0)
761 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
762 } else {
763 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
764 r = 0; 731 r = 0;
765 } 732
766error: 733error:
767 dma_fence_put(fence); 734 dma_fence_put(fence);
768 return r; 735 return r;
@@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
778 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); 745 WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
779 r = amdgpu_ring_alloc(ring, 3); 746 r = amdgpu_ring_alloc(ring, 3);
780 747
781 if (r) { 748 if (r)
782 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
783 ring->idx, r);
784 return r; 749 return r;
785 }
786 750
787 amdgpu_ring_write(ring, 751 amdgpu_ring_write(ring,
788 PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); 752 PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
@@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
796 DRM_UDELAY(1); 760 DRM_UDELAY(1);
797 } 761 }
798 762
799 if (i < adev->usec_timeout) { 763 if (i >= adev->usec_timeout)
800 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 764 r = -ETIMEDOUT;
801 ring->idx, i);
802 } else {
803 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
804 ring->idx, tmp);
805 r = -EINVAL;
806 }
807 765
808 return r; 766 return r;
809} 767}
@@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
856 long r = 0; 814 long r = 0;
857 815
858 r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); 816 r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
859 if (r) { 817 if (r)
860 DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
861 goto error; 818 goto error;
862 }
863 819
864 r = dma_fence_wait_timeout(fence, false, timeout); 820 r = dma_fence_wait_timeout(fence, false, timeout);
865 if (r == 0) { 821 if (r == 0) {
866 DRM_ERROR("amdgpu: IB test timed out.\n");
867 r = -ETIMEDOUT; 822 r = -ETIMEDOUT;
868 goto error; 823 goto error;
869 } else if (r < 0) { 824 } else if (r < 0) {
870 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
871 goto error; 825 goto error;
872 } else 826 } else {
873 r = 0; 827 r = 0;
828 }
874 829
875 for (i = 0; i < adev->usec_timeout; i++) { 830 for (i = 0; i < adev->usec_timeout; i++) {
876 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); 831 tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
@@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
879 DRM_UDELAY(1); 834 DRM_UDELAY(1);
880 } 835 }
881 836
882 if (i < adev->usec_timeout) 837 if (i >= adev->usec_timeout)
883 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 838 r = -ETIMEDOUT;
884 else {
885 DRM_ERROR("ib test failed (0x%08X)\n", tmp);
886 r = -EINVAL;
887 }
888 839
889 dma_fence_put(fence); 840 dma_fence_put(fence);
890
891error: 841error:
892 return r; 842 return r;
893} 843}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f2f358aa0597..cfee74732edb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -23,16 +23,6 @@
23 23
24#include "amdgpu.h" 24#include "amdgpu.h"
25 25
26uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
27{
28 uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
29
30 addr -= AMDGPU_VA_RESERVED_SIZE;
31 addr = amdgpu_gmc_sign_extend(addr);
32
33 return addr;
34}
35
36bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) 26bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
37{ 27{
38 /* By now all MMIO pages except mailbox are blocked */ 28 /* By now all MMIO pages except mailbox are blocked */
@@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
41 return RREG32_NO_KIQ(0xc040) == 0xffffffff; 31 return RREG32_NO_KIQ(0xc040) == 0xffffffff;
42} 32}
43 33
44int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
45{
46 int r;
47 void *ptr;
48
49 r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
50 AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
51 &adev->virt.csa_vmid0_addr, &ptr);
52 if (r)
53 return r;
54
55 memset(ptr, 0, AMDGPU_CSA_SIZE);
56 return 0;
57}
58
59void amdgpu_free_static_csa(struct amdgpu_device *adev) {
60 amdgpu_bo_free_kernel(&adev->virt.csa_obj,
61 &adev->virt.csa_vmid0_addr,
62 NULL);
63}
64
65/*
66 * amdgpu_map_static_csa should be called during amdgpu_vm_init
67 * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
68 * submission of GFX should use this virtual address within META_DATA init
69 * package to support SRIOV gfx preemption.
70 */
71int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
72 struct amdgpu_bo_va **bo_va)
73{
74 uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
75 struct ww_acquire_ctx ticket;
76 struct list_head list;
77 struct amdgpu_bo_list_entry pd;
78 struct ttm_validate_buffer csa_tv;
79 int r;
80
81 INIT_LIST_HEAD(&list);
82 INIT_LIST_HEAD(&csa_tv.head);
83 csa_tv.bo = &adev->virt.csa_obj->tbo;
84 csa_tv.shared = true;
85
86 list_add(&csa_tv.head, &list);
87 amdgpu_vm_get_pd_bo(vm, &list, &pd);
88
89 r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
90 if (r) {
91 DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
92 return r;
93 }
94
95 *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
96 if (!*bo_va) {
97 ttm_eu_backoff_reservation(&ticket, &list);
98 DRM_ERROR("failed to create bo_va for static CSA\n");
99 return -ENOMEM;
100 }
101
102 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
103 AMDGPU_CSA_SIZE);
104 if (r) {
105 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
106 amdgpu_vm_bo_rmv(adev, *bo_va);
107 ttm_eu_backoff_reservation(&ticket, &list);
108 return r;
109 }
110
111 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
112 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
113 AMDGPU_PTE_EXECUTABLE);
114
115 if (r) {
116 DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
117 amdgpu_vm_bo_rmv(adev, *bo_va);
118 ttm_eu_backoff_reservation(&ticket, &list);
119 return r;
120 }
121
122 ttm_eu_backoff_reservation(&ticket, &list);
123 return 0;
124}
125
126void amdgpu_virt_init_setting(struct amdgpu_device *adev) 34void amdgpu_virt_init_setting(struct amdgpu_device *adev)
127{ 35{
128 /* enable virtual display */ 36 /* enable virtual display */
@@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
162 if (r < 1 && (adev->in_gpu_reset || in_interrupt())) 70 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
163 goto failed_kiq_read; 71 goto failed_kiq_read;
164 72
165 if (in_interrupt()) 73 might_sleep();
166 might_sleep();
167
168 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 74 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
169 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 75 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
170 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 76 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
210 if (r < 1 && (adev->in_gpu_reset || in_interrupt())) 116 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
211 goto failed_kiq_write; 117 goto failed_kiq_write;
212 118
213 if (in_interrupt()) 119 might_sleep();
214 might_sleep();
215
216 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 120 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
217 121
218 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 122 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
@@ -228,6 +132,46 @@ failed_kiq_write:
228 pr_err("failed to write reg:%x\n", reg); 132 pr_err("failed to write reg:%x\n", reg);
229} 133}
230 134
135void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
136 uint32_t reg0, uint32_t reg1,
137 uint32_t ref, uint32_t mask)
138{
139 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
140 struct amdgpu_ring *ring = &kiq->ring;
141 signed long r, cnt = 0;
142 unsigned long flags;
143 uint32_t seq;
144
145 spin_lock_irqsave(&kiq->ring_lock, flags);
146 amdgpu_ring_alloc(ring, 32);
147 amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
148 ref, mask);
149 amdgpu_fence_emit_polling(ring, &seq);
150 amdgpu_ring_commit(ring);
151 spin_unlock_irqrestore(&kiq->ring_lock, flags);
152
153 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
154
155 /* don't wait anymore for IRQ context */
156 if (r < 1 && in_interrupt())
157 goto failed_kiq;
158
159 might_sleep();
160 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
161
162 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
163 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
164 }
165
166 if (cnt > MAX_KIQ_REG_TRY)
167 goto failed_kiq;
168
169 return;
170
171failed_kiq:
172 pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
173}
174
231/** 175/**
232 * amdgpu_virt_request_full_gpu() - request full gpu access 176 * amdgpu_virt_request_full_gpu() - request full gpu access
233 * @amdgpu: amdgpu device. 177 * @amdgpu: amdgpu device.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 880ac113a3a9..0728fbc9a692 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -238,7 +238,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
238struct amdgpu_virt { 238struct amdgpu_virt {
239 uint32_t caps; 239 uint32_t caps;
240 struct amdgpu_bo *csa_obj; 240 struct amdgpu_bo *csa_obj;
241 uint64_t csa_vmid0_addr;
242 bool chained_ib_support; 241 bool chained_ib_support;
243 uint32_t reg_val_offs; 242 uint32_t reg_val_offs;
244 struct amdgpu_irq_src ack_irq; 243 struct amdgpu_irq_src ack_irq;
@@ -251,8 +250,6 @@ struct amdgpu_virt {
251 uint32_t gim_feature; 250 uint32_t gim_feature;
252}; 251};
253 252
254#define AMDGPU_CSA_SIZE (8 * 1024)
255
256#define amdgpu_sriov_enabled(adev) \ 253#define amdgpu_sriov_enabled(adev) \
257((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) 254((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
258 255
@@ -277,17 +274,13 @@ static inline bool is_virtual_machine(void)
277#endif 274#endif
278} 275}
279 276
280struct amdgpu_vm;
281
282uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
283bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); 277bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
284int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
285int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
286 struct amdgpu_bo_va **bo_va);
287void amdgpu_free_static_csa(struct amdgpu_device *adev);
288void amdgpu_virt_init_setting(struct amdgpu_device *adev); 278void amdgpu_virt_init_setting(struct amdgpu_device *adev);
289uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); 279uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
290void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); 280void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
281void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
282 uint32_t reg0, uint32_t rreg1,
283 uint32_t ref, uint32_t mask);
291int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); 284int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
292int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); 285int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
293int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); 286int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d6c47972062a..58a2363040dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1632,13 +1632,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1632 continue; 1632 continue;
1633 } 1633 }
1634 1634
1635 /* First check if the entry is already handled */
1636 if (cursor.pfn < frag_start) {
1637 cursor.entry->huge = true;
1638 amdgpu_vm_pt_next(adev, &cursor);
1639 continue;
1640 }
1641
1642 /* If it isn't already handled it can't be a huge page */ 1635 /* If it isn't already handled it can't be a huge page */
1643 if (cursor.entry->huge) { 1636 if (cursor.entry->huge) {
1644 /* Add the entry to the relocated list to update it. */ 1637 /* Add the entry to the relocated list to update it. */
@@ -1701,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1701 } 1694 }
1702 } while (frag_start < entry_end); 1695 } while (frag_start < entry_end);
1703 1696
1704 if (frag >= shift) 1697 if (amdgpu_vm_pt_descendant(adev, &cursor)) {
1698 /* Mark all child entries as huge */
1699 while (cursor.pfn < frag_start) {
1700 cursor.entry->huge = true;
1701 amdgpu_vm_pt_next(adev, &cursor);
1702 }
1703
1704 } else if (frag >= shift) {
1705 /* or just move on to the next on the same level. */
1705 amdgpu_vm_pt_next(adev, &cursor); 1706 amdgpu_vm_pt_next(adev, &cursor);
1707 }
1706 } 1708 }
1707 1709
1708 return 0; 1710 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 897afbb348c1..909216a9b447 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -63,7 +63,7 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
63 63
64int amdgpu_xgmi_add_device(struct amdgpu_device *adev) 64int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
65{ 65{
66 struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE]; 66 struct psp_xgmi_topology_info *tmp_topology;
67 struct amdgpu_hive_info *hive; 67 struct amdgpu_hive_info *hive;
68 struct amdgpu_xgmi *entry; 68 struct amdgpu_xgmi *entry;
69 struct amdgpu_device *tmp_adev; 69 struct amdgpu_device *tmp_adev;
@@ -73,10 +73,12 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
73 if ((adev->asic_type < CHIP_VEGA20) || 73 if ((adev->asic_type < CHIP_VEGA20) ||
74 (adev->flags & AMD_IS_APU) ) 74 (adev->flags & AMD_IS_APU) )
75 return 0; 75 return 0;
76 adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp); 76 adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
77 adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); 77 adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
78 78
79 memset(&tmp_topology[0], 0, sizeof(tmp_topology)); 79 tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
80 if (!tmp_topology)
81 return -ENOMEM;
80 mutex_lock(&xgmi_mutex); 82 mutex_lock(&xgmi_mutex);
81 hive = amdgpu_get_xgmi_hive(adev); 83 hive = amdgpu_get_xgmi_hive(adev);
82 if (!hive) 84 if (!hive)
@@ -84,23 +86,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
84 86
85 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); 87 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
86 list_for_each_entry(entry, &hive->device_list, head) 88 list_for_each_entry(entry, &hive->device_list, head)
87 tmp_topology[count++].device_id = entry->device_id; 89 tmp_topology->nodes[count++].node_id = entry->node_id;
88 90
89 ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology); 91 /* Each psp need to get the latest topology */
90 if (ret) { 92 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
91 dev_err(adev->dev, 93 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
92 "XGMI: Get topology failure on device %llx, hive %llx, ret %d", 94 if (ret) {
93 adev->gmc.xgmi.device_id, 95 dev_err(tmp_adev->dev,
94 adev->gmc.xgmi.hive_id, ret); 96 "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
95 goto exit; 97 tmp_adev->gmc.xgmi.node_id,
98 tmp_adev->gmc.xgmi.hive_id, ret);
99 /* To do : continue with some node failed or disable the whole hive */
100 break;
101 }
96 } 102 }
103
97 /* Each psp need to set the latest topology */ 104 /* Each psp need to set the latest topology */
98 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 105 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
99 ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); 106 ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
100 if (ret) { 107 if (ret) {
101 dev_err(tmp_adev->dev, 108 dev_err(tmp_adev->dev,
102 "XGMI: Set topology failure on device %llx, hive %llx, ret %d", 109 "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
103 tmp_adev->gmc.xgmi.device_id, 110 tmp_adev->gmc.xgmi.node_id,
104 tmp_adev->gmc.xgmi.hive_id, ret); 111 tmp_adev->gmc.xgmi.hive_id, ret);
105 /* To do : continue with some node failed or disable the whole hive */ 112 /* To do : continue with some node failed or disable the whole hive */
106 break; 113 break;
@@ -113,7 +120,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
113 120
114exit: 121exit:
115 mutex_unlock(&xgmi_mutex); 122 mutex_unlock(&xgmi_mutex);
123 kfree(tmp_topology);
116 return ret; 124 return ret;
117} 125}
118
119
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 79220a91abe3..86e14c754dd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
743 743
744 if (pi->caps_sq_ramping || pi->caps_db_ramping || 744 if (pi->caps_sq_ramping || pi->caps_db_ramping ||
745 pi->caps_td_ramping || pi->caps_tcp_ramping) { 745 pi->caps_td_ramping || pi->caps_tcp_ramping) {
746 adev->gfx.rlc.funcs->enter_safe_mode(adev); 746 amdgpu_gfx_rlc_enter_safe_mode(adev);
747 747
748 if (enable) { 748 if (enable) {
749 ret = ci_program_pt_config_registers(adev, didt_config_ci); 749 ret = ci_program_pt_config_registers(adev, didt_config_ci);
750 if (ret) { 750 if (ret) {
751 adev->gfx.rlc.funcs->exit_safe_mode(adev); 751 amdgpu_gfx_rlc_exit_safe_mode(adev);
752 return ret; 752 return ret;
753 } 753 }
754 } 754 }
755 755
756 ci_do_enable_didt(adev, enable); 756 ci_do_enable_didt(adev, enable);
757 757
758 adev->gfx.rlc.funcs->exit_safe_mode(adev); 758 amdgpu_gfx_rlc_exit_safe_mode(adev);
759 } 759 }
760 760
761 return 0; 761 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index b918c8886b75..45795191de1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
198 198
199static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 199static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
200{ 200{
201 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 201 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
202 int i; 202 int i;
203 203
204 for (i = 0; i < count; i++) 204 for (i = 0; i < count; i++)
@@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
218 * Schedule an IB in the DMA ring (CIK). 218 * Schedule an IB in the DMA ring (CIK).
219 */ 219 */
220static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, 220static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
221 struct amdgpu_job *job,
221 struct amdgpu_ib *ib, 222 struct amdgpu_ib *ib,
222 unsigned vmid, bool ctx_switch) 223 bool ctx_switch)
223{ 224{
225 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
224 u32 extra_bits = vmid & 0xf; 226 u32 extra_bits = vmid & 0xf;
225 227
226 /* IB packet must end on a 8 DW boundary */ 228 /* IB packet must end on a 8 DW boundary */
@@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
316 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 318 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
317 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); 319 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
318 } 320 }
319 sdma0->ready = false; 321 sdma0->sched.ready = false;
320 sdma1->ready = false; 322 sdma1->sched.ready = false;
321} 323}
322 324
323/** 325/**
@@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
494 /* enable DMA IBs */ 496 /* enable DMA IBs */
495 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 497 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
496 498
497 ring->ready = true; 499 ring->sched.ready = true;
498 } 500 }
499 501
500 cik_sdma_enable(adev, true); 502 cik_sdma_enable(adev, true);
501 503
502 for (i = 0; i < adev->sdma.num_instances; i++) { 504 for (i = 0; i < adev->sdma.num_instances; i++) {
503 ring = &adev->sdma.instance[i].ring; 505 ring = &adev->sdma.instance[i].ring;
504 r = amdgpu_ring_test_ring(ring); 506 r = amdgpu_ring_test_helper(ring);
505 if (r) { 507 if (r)
506 ring->ready = false;
507 return r; 508 return r;
508 }
509 509
510 if (adev->mman.buffer_funcs_ring == ring) 510 if (adev->mman.buffer_funcs_ring == ring)
511 amdgpu_ttm_set_buffer_funcs_status(adev, true); 511 amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
618 u64 gpu_addr; 618 u64 gpu_addr;
619 619
620 r = amdgpu_device_wb_get(adev, &index); 620 r = amdgpu_device_wb_get(adev, &index);
621 if (r) { 621 if (r)
622 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
623 return r; 622 return r;
624 }
625 623
626 gpu_addr = adev->wb.gpu_addr + (index * 4); 624 gpu_addr = adev->wb.gpu_addr + (index * 4);
627 tmp = 0xCAFEDEAD; 625 tmp = 0xCAFEDEAD;
628 adev->wb.wb[index] = cpu_to_le32(tmp); 626 adev->wb.wb[index] = cpu_to_le32(tmp);
629 627
630 r = amdgpu_ring_alloc(ring, 5); 628 r = amdgpu_ring_alloc(ring, 5);
631 if (r) { 629 if (r)
632 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 630 goto error_free_wb;
633 amdgpu_device_wb_free(adev, index); 631
634 return r;
635 }
636 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 632 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
637 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 633 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
638 amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 634 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
@@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
647 DRM_UDELAY(1); 643 DRM_UDELAY(1);
648 } 644 }
649 645
650 if (i < adev->usec_timeout) { 646 if (i >= adev->usec_timeout)
651 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 647 r = -ETIMEDOUT;
652 } else {
653 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
654 ring->idx, tmp);
655 r = -EINVAL;
656 }
657 amdgpu_device_wb_free(adev, index);
658 648
649error_free_wb:
650 amdgpu_device_wb_free(adev, index);
659 return r; 651 return r;
660} 652}
661 653
@@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
678 long r; 670 long r;
679 671
680 r = amdgpu_device_wb_get(adev, &index); 672 r = amdgpu_device_wb_get(adev, &index);
681 if (r) { 673 if (r)
682 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
683 return r; 674 return r;
684 }
685 675
686 gpu_addr = adev->wb.gpu_addr + (index * 4); 676 gpu_addr = adev->wb.gpu_addr + (index * 4);
687 tmp = 0xCAFEDEAD; 677 tmp = 0xCAFEDEAD;
688 adev->wb.wb[index] = cpu_to_le32(tmp); 678 adev->wb.wb[index] = cpu_to_le32(tmp);
689 memset(&ib, 0, sizeof(ib)); 679 memset(&ib, 0, sizeof(ib));
690 r = amdgpu_ib_get(adev, NULL, 256, &ib); 680 r = amdgpu_ib_get(adev, NULL, 256, &ib);
691 if (r) { 681 if (r)
692 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
693 goto err0; 682 goto err0;
694 }
695 683
696 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, 684 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
697 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 685 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
@@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
706 694
707 r = dma_fence_wait_timeout(f, false, timeout); 695 r = dma_fence_wait_timeout(f, false, timeout);
708 if (r == 0) { 696 if (r == 0) {
709 DRM_ERROR("amdgpu: IB test timed out\n");
710 r = -ETIMEDOUT; 697 r = -ETIMEDOUT;
711 goto err1; 698 goto err1;
712 } else if (r < 0) { 699 } else if (r < 0) {
713 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
714 goto err1; 700 goto err1;
715 } 701 }
716 tmp = le32_to_cpu(adev->wb.wb[index]); 702 tmp = le32_to_cpu(adev->wb.wb[index]);
717 if (tmp == 0xDEADBEEF) { 703 if (tmp == 0xDEADBEEF)
718 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
719 r = 0; 704 r = 0;
720 } else { 705 else
721 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
722 r = -EINVAL; 706 r = -EINVAL;
723 }
724 707
725err1: 708err1:
726 amdgpu_ib_free(adev, &ib, NULL); 709 amdgpu_ib_free(adev, &ib, NULL);
@@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
822 */ 805 */
823static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 806static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
824{ 807{
825 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 808 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
826 u32 pad_count; 809 u32 pad_count;
827 int i; 810 int i;
828 811
@@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
1214 struct amdgpu_irq_src *source, 1197 struct amdgpu_irq_src *source,
1215 struct amdgpu_iv_entry *entry) 1198 struct amdgpu_iv_entry *entry)
1216{ 1199{
1200 u8 instance_id;
1201
1217 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1202 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1218 schedule_work(&adev->reset_work); 1203 instance_id = (entry->ring_id & 0x3) >> 0;
1204 drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
1219 return 0; 1205 return 0;
1220} 1206}
1221 1207
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index d76eb27945dc..1dc3013ea1d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
1775 int r; 1775 int r;
1776 1776
1777 r = amdgpu_gfx_scratch_get(adev, &scratch); 1777 r = amdgpu_gfx_scratch_get(adev, &scratch);
1778 if (r) { 1778 if (r)
1779 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
1780 return r; 1779 return r;
1781 } 1780
1782 WREG32(scratch, 0xCAFEDEAD); 1781 WREG32(scratch, 0xCAFEDEAD);
1783 1782
1784 r = amdgpu_ring_alloc(ring, 3); 1783 r = amdgpu_ring_alloc(ring, 3);
1785 if (r) { 1784 if (r)
1786 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); 1785 goto error_free_scratch;
1787 amdgpu_gfx_scratch_free(adev, scratch); 1786
1788 return r;
1789 }
1790 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 1787 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1791 amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START)); 1788 amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
1792 amdgpu_ring_write(ring, 0xDEADBEEF); 1789 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
1798 break; 1795 break;
1799 DRM_UDELAY(1); 1796 DRM_UDELAY(1);
1800 } 1797 }
1801 if (i < adev->usec_timeout) { 1798
1802 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 1799 if (i >= adev->usec_timeout)
1803 } else { 1800 r = -ETIMEDOUT;
1804 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 1801
1805 ring->idx, scratch, tmp); 1802error_free_scratch:
1806 r = -EINVAL;
1807 }
1808 amdgpu_gfx_scratch_free(adev, scratch); 1803 amdgpu_gfx_scratch_free(adev, scratch);
1809 return r; 1804 return r;
1810} 1805}
@@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1845} 1840}
1846 1841
1847static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, 1842static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
1843 struct amdgpu_job *job,
1848 struct amdgpu_ib *ib, 1844 struct amdgpu_ib *ib,
1849 unsigned vmid, bool ctx_switch) 1845 bool ctx_switch)
1850{ 1846{
1847 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1851 u32 header, control = 0; 1848 u32 header, control = 0;
1852 1849
1853 /* insert SWITCH_BUFFER packet before first IB in the ring frame */ 1850 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
@@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1892 long r; 1889 long r;
1893 1890
1894 r = amdgpu_gfx_scratch_get(adev, &scratch); 1891 r = amdgpu_gfx_scratch_get(adev, &scratch);
1895 if (r) { 1892 if (r)
1896 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
1897 return r; 1893 return r;
1898 } 1894
1899 WREG32(scratch, 0xCAFEDEAD); 1895 WREG32(scratch, 0xCAFEDEAD);
1900 memset(&ib, 0, sizeof(ib)); 1896 memset(&ib, 0, sizeof(ib));
1901 r = amdgpu_ib_get(adev, NULL, 256, &ib); 1897 r = amdgpu_ib_get(adev, NULL, 256, &ib);
1902 if (r) { 1898 if (r)
1903 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1904 goto err1; 1899 goto err1;
1905 } 1900
1906 ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); 1901 ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
1907 ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START)); 1902 ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
1908 ib.ptr[2] = 0xDEADBEEF; 1903 ib.ptr[2] = 0xDEADBEEF;
@@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1914 1909
1915 r = dma_fence_wait_timeout(f, false, timeout); 1910 r = dma_fence_wait_timeout(f, false, timeout);
1916 if (r == 0) { 1911 if (r == 0) {
1917 DRM_ERROR("amdgpu: IB test timed out\n");
1918 r = -ETIMEDOUT; 1912 r = -ETIMEDOUT;
1919 goto err2; 1913 goto err2;
1920 } else if (r < 0) { 1914 } else if (r < 0) {
1921 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1922 goto err2; 1915 goto err2;
1923 } 1916 }
1924 tmp = RREG32(scratch); 1917 tmp = RREG32(scratch);
1925 if (tmp == 0xDEADBEEF) { 1918 if (tmp == 0xDEADBEEF)
1926 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
1927 r = 0; 1919 r = 0;
1928 } else { 1920 else
1929 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
1930 scratch, tmp);
1931 r = -EINVAL; 1921 r = -EINVAL;
1932 }
1933 1922
1934err2: 1923err2:
1935 amdgpu_ib_free(adev, &ib, NULL); 1924 amdgpu_ib_free(adev, &ib, NULL);
@@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
1950 CP_ME_CNTL__CE_HALT_MASK)); 1939 CP_ME_CNTL__CE_HALT_MASK));
1951 WREG32(mmSCRATCH_UMSK, 0); 1940 WREG32(mmSCRATCH_UMSK, 0);
1952 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1941 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1953 adev->gfx.gfx_ring[i].ready = false; 1942 adev->gfx.gfx_ring[i].sched.ready = false;
1954 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1943 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1955 adev->gfx.compute_ring[i].ready = false; 1944 adev->gfx.compute_ring[i].sched.ready = false;
1956 } 1945 }
1957 udelay(50); 1946 udelay(50);
1958} 1947}
@@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
2124 2113
2125 /* start the rings */ 2114 /* start the rings */
2126 gfx_v6_0_cp_gfx_start(adev); 2115 gfx_v6_0_cp_gfx_start(adev);
2127 ring->ready = true; 2116 r = amdgpu_ring_test_helper(ring);
2128 r = amdgpu_ring_test_ring(ring); 2117 if (r)
2129 if (r) {
2130 ring->ready = false;
2131 return r; 2118 return r;
2132 }
2133 2119
2134 return 0; 2120 return 0;
2135} 2121}
@@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
2227 WREG32(mmCP_RB2_CNTL, tmp); 2213 WREG32(mmCP_RB2_CNTL, tmp);
2228 WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); 2214 WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
2229 2215
2230 adev->gfx.compute_ring[0].ready = false;
2231 adev->gfx.compute_ring[1].ready = false;
2232 2216
2233 for (i = 0; i < 2; i++) { 2217 for (i = 0; i < 2; i++) {
2234 r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]); 2218 r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
2235 if (r) 2219 if (r)
2236 return r; 2220 return r;
2237 adev->gfx.compute_ring[i].ready = true;
2238 } 2221 }
2239 2222
2240 return 0; 2223 return 0;
@@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
2368 amdgpu_ring_write(ring, val); 2351 amdgpu_ring_write(ring, val);
2369} 2352}
2370 2353
2371static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
2372{
2373 amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
2374 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
2375 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
2376}
2377
2378static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) 2354static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
2379{ 2355{
2380 const u32 *src_ptr; 2356 const u32 *src_ptr;
2381 volatile u32 *dst_ptr; 2357 volatile u32 *dst_ptr;
2382 u32 dws, i; 2358 u32 dws;
2383 u64 reg_list_mc_addr; 2359 u64 reg_list_mc_addr;
2384 const struct cs_section_def *cs_data; 2360 const struct cs_section_def *cs_data;
2385 int r; 2361 int r;
@@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
2394 cs_data = adev->gfx.rlc.cs_data; 2370 cs_data = adev->gfx.rlc.cs_data;
2395 2371
2396 if (src_ptr) { 2372 if (src_ptr) {
2397 /* save restore block */ 2373 /* init save restore block */
2398 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 2374 r = amdgpu_gfx_rlc_init_sr(adev, dws);
2399 AMDGPU_GEM_DOMAIN_VRAM, 2375 if (r)
2400 &adev->gfx.rlc.save_restore_obj,
2401 &adev->gfx.rlc.save_restore_gpu_addr,
2402 (void **)&adev->gfx.rlc.sr_ptr);
2403 if (r) {
2404 dev_warn(adev->dev, "(%d) create RLC sr bo failed\n",
2405 r);
2406 gfx_v6_0_rlc_fini(adev);
2407 return r; 2376 return r;
2408 }
2409
2410 /* write the sr buffer */
2411 dst_ptr = adev->gfx.rlc.sr_ptr;
2412 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
2413 dst_ptr[i] = cpu_to_le32(src_ptr[i]);
2414
2415 amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
2416 amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
2417 } 2377 }
2418 2378
2419 if (cs_data) { 2379 if (cs_data) {
@@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
2428 (void **)&adev->gfx.rlc.cs_ptr); 2388 (void **)&adev->gfx.rlc.cs_ptr);
2429 if (r) { 2389 if (r) {
2430 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 2390 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
2431 gfx_v6_0_rlc_fini(adev); 2391 amdgpu_gfx_rlc_fini(adev);
2432 return r; 2392 return r;
2433 } 2393 }
2434 2394
@@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
2549 if (!adev->gfx.rlc_fw) 2509 if (!adev->gfx.rlc_fw)
2550 return -EINVAL; 2510 return -EINVAL;
2551 2511
2552 gfx_v6_0_rlc_stop(adev); 2512 adev->gfx.rlc.funcs->stop(adev);
2553 gfx_v6_0_rlc_reset(adev); 2513 adev->gfx.rlc.funcs->reset(adev);
2554 gfx_v6_0_init_pg(adev); 2514 gfx_v6_0_init_pg(adev);
2555 gfx_v6_0_init_cg(adev); 2515 gfx_v6_0_init_cg(adev);
2556 2516
@@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
2578 WREG32(mmRLC_UCODE_ADDR, 0); 2538 WREG32(mmRLC_UCODE_ADDR, 0);
2579 2539
2580 gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); 2540 gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
2581 gfx_v6_0_rlc_start(adev); 2541 adev->gfx.rlc.funcs->start(adev);
2582 2542
2583 return 0; 2543 return 0;
2584} 2544}
@@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
3075 .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q 3035 .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
3076}; 3036};
3077 3037
3038static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
3039 .init = gfx_v6_0_rlc_init,
3040 .resume = gfx_v6_0_rlc_resume,
3041 .stop = gfx_v6_0_rlc_stop,
3042 .reset = gfx_v6_0_rlc_reset,
3043 .start = gfx_v6_0_rlc_start
3044};
3045
3078static int gfx_v6_0_early_init(void *handle) 3046static int gfx_v6_0_early_init(void *handle)
3079{ 3047{
3080 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3048 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle)
3082 adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; 3050 adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
3083 adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; 3051 adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
3084 adev->gfx.funcs = &gfx_v6_0_gfx_funcs; 3052 adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
3053 adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
3085 gfx_v6_0_set_ring_funcs(adev); 3054 gfx_v6_0_set_ring_funcs(adev);
3086 gfx_v6_0_set_irq_funcs(adev); 3055 gfx_v6_0_set_irq_funcs(adev);
3087 3056
@@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle)
3114 return r; 3083 return r;
3115 } 3084 }
3116 3085
3117 r = gfx_v6_0_rlc_init(adev); 3086 r = adev->gfx.rlc.funcs->init(adev);
3118 if (r) { 3087 if (r) {
3119 DRM_ERROR("Failed to init rlc BOs!\n"); 3088 DRM_ERROR("Failed to init rlc BOs!\n");
3120 return r; 3089 return r;
@@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle)
3165 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3134 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3166 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 3135 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
3167 3136
3168 gfx_v6_0_rlc_fini(adev); 3137 amdgpu_gfx_rlc_fini(adev);
3169 3138
3170 return 0; 3139 return 0;
3171} 3140}
@@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle)
3177 3146
3178 gfx_v6_0_constants_init(adev); 3147 gfx_v6_0_constants_init(adev);
3179 3148
3180 r = gfx_v6_0_rlc_resume(adev); 3149 r = adev->gfx.rlc.funcs->resume(adev);
3181 if (r) 3150 if (r)
3182 return r; 3151 return r;
3183 3152
@@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle)
3195 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3164 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3196 3165
3197 gfx_v6_0_cp_enable(adev, false); 3166 gfx_v6_0_cp_enable(adev, false);
3198 gfx_v6_0_rlc_stop(adev); 3167 adev->gfx.rlc.funcs->stop(adev);
3199 gfx_v6_0_fini_pg(adev); 3168 gfx_v6_0_fini_pg(adev);
3200 3169
3201 return 0; 3170 return 0;
@@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
3393 return 0; 3362 return 0;
3394} 3363}
3395 3364
3365static void gfx_v6_0_fault(struct amdgpu_device *adev,
3366 struct amdgpu_iv_entry *entry)
3367{
3368 struct amdgpu_ring *ring;
3369
3370 switch (entry->ring_id) {
3371 case 0:
3372 ring = &adev->gfx.gfx_ring[0];
3373 break;
3374 case 1:
3375 case 2:
3376 ring = &adev->gfx.compute_ring[entry->ring_id - 1];
3377 break;
3378 default:
3379 return;
3380 }
3381 drm_sched_fault(&ring->sched);
3382}
3383
3396static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, 3384static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
3397 struct amdgpu_irq_src *source, 3385 struct amdgpu_irq_src *source,
3398 struct amdgpu_iv_entry *entry) 3386 struct amdgpu_iv_entry *entry)
3399{ 3387{
3400 DRM_ERROR("Illegal register access in command stream\n"); 3388 DRM_ERROR("Illegal register access in command stream\n");
3401 schedule_work(&adev->reset_work); 3389 gfx_v6_0_fault(adev, entry);
3402 return 0; 3390 return 0;
3403} 3391}
3404 3392
@@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
3407 struct amdgpu_iv_entry *entry) 3395 struct amdgpu_iv_entry *entry)
3408{ 3396{
3409 DRM_ERROR("Illegal instruction in command stream\n"); 3397 DRM_ERROR("Illegal instruction in command stream\n");
3410 schedule_work(&adev->reset_work); 3398 gfx_v6_0_fault(adev, entry);
3411 return 0; 3399 return 0;
3412} 3400}
3413 3401
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0e72bc09939a..f467b9bd090d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] =
882 882
883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); 883static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev);
884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); 884static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
885static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev);
886static void gfx_v7_0_init_pg(struct amdgpu_device *adev); 885static void gfx_v7_0_init_pg(struct amdgpu_device *adev);
887static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); 886static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev);
888 887
@@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2064 int r; 2063 int r;
2065 2064
2066 r = amdgpu_gfx_scratch_get(adev, &scratch); 2065 r = amdgpu_gfx_scratch_get(adev, &scratch);
2067 if (r) { 2066 if (r)
2068 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
2069 return r; 2067 return r;
2070 } 2068
2071 WREG32(scratch, 0xCAFEDEAD); 2069 WREG32(scratch, 0xCAFEDEAD);
2072 r = amdgpu_ring_alloc(ring, 3); 2070 r = amdgpu_ring_alloc(ring, 3);
2073 if (r) { 2071 if (r)
2074 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); 2072 goto error_free_scratch;
2075 amdgpu_gfx_scratch_free(adev, scratch); 2073
2076 return r;
2077 }
2078 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 2074 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2079 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 2075 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2080 amdgpu_ring_write(ring, 0xDEADBEEF); 2076 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
2086 break; 2082 break;
2087 DRM_UDELAY(1); 2083 DRM_UDELAY(1);
2088 } 2084 }
2089 if (i < adev->usec_timeout) { 2085 if (i >= adev->usec_timeout)
2090 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 2086 r = -ETIMEDOUT;
2091 } else { 2087
2092 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 2088error_free_scratch:
2093 ring->idx, scratch, tmp);
2094 r = -EINVAL;
2095 }
2096 amdgpu_gfx_scratch_free(adev, scratch); 2089 amdgpu_gfx_scratch_free(adev, scratch);
2097 return r; 2090 return r;
2098} 2091}
@@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
2233 * on the gfx ring for execution by the GPU. 2226 * on the gfx ring for execution by the GPU.
2234 */ 2227 */
2235static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 2228static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2236 struct amdgpu_ib *ib, 2229 struct amdgpu_job *job,
2237 unsigned vmid, bool ctx_switch) 2230 struct amdgpu_ib *ib,
2231 bool ctx_switch)
2238{ 2232{
2233 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2239 u32 header, control = 0; 2234 u32 header, control = 0;
2240 2235
2241 /* insert SWITCH_BUFFER packet before first IB in the ring frame */ 2236 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
@@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
2262} 2257}
2263 2258
2264static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 2259static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2260 struct amdgpu_job *job,
2265 struct amdgpu_ib *ib, 2261 struct amdgpu_ib *ib,
2266 unsigned vmid, bool ctx_switch) 2262 bool ctx_switch)
2267{ 2263{
2264 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2268 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 2265 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
2269 2266
2270 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 2267 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2316 long r; 2313 long r;
2317 2314
2318 r = amdgpu_gfx_scratch_get(adev, &scratch); 2315 r = amdgpu_gfx_scratch_get(adev, &scratch);
2319 if (r) { 2316 if (r)
2320 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
2321 return r; 2317 return r;
2322 } 2318
2323 WREG32(scratch, 0xCAFEDEAD); 2319 WREG32(scratch, 0xCAFEDEAD);
2324 memset(&ib, 0, sizeof(ib)); 2320 memset(&ib, 0, sizeof(ib));
2325 r = amdgpu_ib_get(adev, NULL, 256, &ib); 2321 r = amdgpu_ib_get(adev, NULL, 256, &ib);
2326 if (r) { 2322 if (r)
2327 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
2328 goto err1; 2323 goto err1;
2329 } 2324
2330 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2325 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2331 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 2326 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
2332 ib.ptr[2] = 0xDEADBEEF; 2327 ib.ptr[2] = 0xDEADBEEF;
@@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
2338 2333
2339 r = dma_fence_wait_timeout(f, false, timeout); 2334 r = dma_fence_wait_timeout(f, false, timeout);
2340 if (r == 0) { 2335 if (r == 0) {
2341 DRM_ERROR("amdgpu: IB test timed out\n");
2342 r = -ETIMEDOUT; 2336 r = -ETIMEDOUT;
2343 goto err2; 2337 goto err2;
2344 } else if (r < 0) { 2338 } else if (r < 0) {
2345 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
2346 goto err2; 2339 goto err2;
2347 } 2340 }
2348 tmp = RREG32(scratch); 2341 tmp = RREG32(scratch);
2349 if (tmp == 0xDEADBEEF) { 2342 if (tmp == 0xDEADBEEF)
2350 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
2351 r = 0; 2343 r = 0;
2352 } else { 2344 else
2353 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
2354 scratch, tmp);
2355 r = -EINVAL; 2345 r = -EINVAL;
2356 }
2357 2346
2358err2: 2347err2:
2359 amdgpu_ib_free(adev, &ib, NULL); 2348 amdgpu_ib_free(adev, &ib, NULL);
@@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2403 } else { 2392 } else {
2404 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); 2393 WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
2405 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2394 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2406 adev->gfx.gfx_ring[i].ready = false; 2395 adev->gfx.gfx_ring[i].sched.ready = false;
2407 } 2396 }
2408 udelay(50); 2397 udelay(50);
2409} 2398}
@@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
2613 2602
2614 /* start the ring */ 2603 /* start the ring */
2615 gfx_v7_0_cp_gfx_start(adev); 2604 gfx_v7_0_cp_gfx_start(adev);
2616 ring->ready = true; 2605 r = amdgpu_ring_test_helper(ring);
2617 r = amdgpu_ring_test_ring(ring); 2606 if (r)
2618 if (r) {
2619 ring->ready = false;
2620 return r; 2607 return r;
2621 }
2622 2608
2623 return 0; 2609 return 0;
2624} 2610}
@@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2675 } else { 2661 } else {
2676 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2662 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2677 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2663 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2678 adev->gfx.compute_ring[i].ready = false; 2664 adev->gfx.compute_ring[i].sched.ready = false;
2679 } 2665 }
2680 udelay(50); 2666 udelay(50);
2681} 2667}
@@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
2781 * GFX7_MEC_HPD_SIZE * 2; 2767 * GFX7_MEC_HPD_SIZE * 2;
2782 2768
2783 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 2769 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2784 AMDGPU_GEM_DOMAIN_GTT, 2770 AMDGPU_GEM_DOMAIN_VRAM,
2785 &adev->gfx.mec.hpd_eop_obj, 2771 &adev->gfx.mec.hpd_eop_obj,
2786 &adev->gfx.mec.hpd_eop_gpu_addr, 2772 &adev->gfx.mec.hpd_eop_gpu_addr,
2787 (void **)&hpd); 2773 (void **)&hpd);
@@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
3106 3092
3107 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3093 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3108 ring = &adev->gfx.compute_ring[i]; 3094 ring = &adev->gfx.compute_ring[i];
3109 ring->ready = true; 3095 amdgpu_ring_test_helper(ring);
3110 r = amdgpu_ring_test_ring(ring);
3111 if (r)
3112 ring->ready = false;
3113 } 3096 }
3114 3097
3115 return 0; 3098 return 0;
@@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3268 * The RLC is a multi-purpose microengine that handles a 3251 * The RLC is a multi-purpose microengine that handles a
3269 * variety of functions. 3252 * variety of functions.
3270 */ 3253 */
3271static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev)
3272{
3273 amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
3274 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
3275 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
3276}
3277
3278static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) 3254static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3279{ 3255{
3280 const u32 *src_ptr; 3256 const u32 *src_ptr;
3281 volatile u32 *dst_ptr; 3257 u32 dws;
3282 u32 dws, i;
3283 const struct cs_section_def *cs_data; 3258 const struct cs_section_def *cs_data;
3284 int r; 3259 int r;
3285 3260
@@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
3306 cs_data = adev->gfx.rlc.cs_data; 3281 cs_data = adev->gfx.rlc.cs_data;
3307 3282
3308 if (src_ptr) { 3283 if (src_ptr) {
3309 /* save restore block */ 3284 /* init save restore block */
3310 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 3285 r = amdgpu_gfx_rlc_init_sr(adev, dws);
3311 AMDGPU_GEM_DOMAIN_VRAM, 3286 if (r)
3312 &adev->gfx.rlc.save_restore_obj,
3313 &adev->gfx.rlc.save_restore_gpu_addr,
3314 (void **)&adev->gfx.rlc.sr_ptr);
3315 if (r) {
3316 dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r);
3317 gfx_v7_0_rlc_fini(adev);
3318 return r; 3287 return r;
3319 }
3320
3321 /* write the sr buffer */
3322 dst_ptr = adev->gfx.rlc.sr_ptr;
3323 for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
3324 dst_ptr[i] = cpu_to_le32(src_ptr[i]);
3325 amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
3326 amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
3327 } 3288 }
3328 3289
3329 if (cs_data) { 3290 if (cs_data) {
3330 /* clear state block */ 3291 /* init clear state block */
3331 adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); 3292 r = amdgpu_gfx_rlc_init_csb(adev);
3332 3293 if (r)
3333 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
3334 AMDGPU_GEM_DOMAIN_VRAM,
3335 &adev->gfx.rlc.clear_state_obj,
3336 &adev->gfx.rlc.clear_state_gpu_addr,
3337 (void **)&adev->gfx.rlc.cs_ptr);
3338 if (r) {
3339 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
3340 gfx_v7_0_rlc_fini(adev);
3341 return r; 3294 return r;
3342 }
3343
3344 /* set up the cs buffer */
3345 dst_ptr = adev->gfx.rlc.cs_ptr;
3346 gfx_v7_0_get_csb_buffer(adev, dst_ptr);
3347 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
3348 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
3349 } 3295 }
3350 3296
3351 if (adev->gfx.rlc.cp_table_size) { 3297 if (adev->gfx.rlc.cp_table_size) {
3352 3298 r = amdgpu_gfx_rlc_init_cpt(adev);
3353 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 3299 if (r)
3354 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
3355 &adev->gfx.rlc.cp_table_obj,
3356 &adev->gfx.rlc.cp_table_gpu_addr,
3357 (void **)&adev->gfx.rlc.cp_table_ptr);
3358 if (r) {
3359 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
3360 gfx_v7_0_rlc_fini(adev);
3361 return r; 3300 return r;
3362 }
3363
3364 gfx_v7_0_init_cp_pg_table(adev);
3365
3366 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
3367 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
3368
3369 } 3301 }
3370 3302
3371 return 0; 3303 return 0;
@@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
3446 return orig; 3378 return orig;
3447} 3379}
3448 3380
3449static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) 3381static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev)
3382{
3383 return true;
3384}
3385
3386static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev)
3450{ 3387{
3451 u32 tmp, i, mask; 3388 u32 tmp, i, mask;
3452 3389
@@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3468 } 3405 }
3469} 3406}
3470 3407
3471static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) 3408static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev)
3472{ 3409{
3473 u32 tmp; 3410 u32 tmp;
3474 3411
@@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3545 adev->gfx.rlc_feature_version = le32_to_cpu( 3482 adev->gfx.rlc_feature_version = le32_to_cpu(
3546 hdr->ucode_feature_version); 3483 hdr->ucode_feature_version);
3547 3484
3548 gfx_v7_0_rlc_stop(adev); 3485 adev->gfx.rlc.funcs->stop(adev);
3549 3486
3550 /* disable CG */ 3487 /* disable CG */
3551 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; 3488 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3552 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 3489 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
3553 3490
3554 gfx_v7_0_rlc_reset(adev); 3491 adev->gfx.rlc.funcs->reset(adev);
3555 3492
3556 gfx_v7_0_init_pg(adev); 3493 gfx_v7_0_init_pg(adev);
3557 3494
@@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
3582 if (adev->asic_type == CHIP_BONAIRE) 3519 if (adev->asic_type == CHIP_BONAIRE)
3583 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); 3520 WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0);
3584 3521
3585 gfx_v7_0_rlc_start(adev); 3522 adev->gfx.rlc.funcs->start(adev);
3586 3523
3587 return 0; 3524 return 0;
3588} 3525}
@@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
3784 WREG32(mmRLC_PG_CNTL, data); 3721 WREG32(mmRLC_PG_CNTL, data);
3785} 3722}
3786 3723
3787static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) 3724static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev)
3788{ 3725{
3789 const __le32 *fw_data;
3790 volatile u32 *dst_ptr;
3791 int me, i, max_me = 4;
3792 u32 bo_offset = 0;
3793 u32 table_offset, table_size;
3794
3795 if (adev->asic_type == CHIP_KAVERI) 3726 if (adev->asic_type == CHIP_KAVERI)
3796 max_me = 5; 3727 return 5;
3797 3728 else
3798 if (adev->gfx.rlc.cp_table_ptr == NULL) 3729 return 4;
3799 return;
3800
3801 /* write the cp table buffer */
3802 dst_ptr = adev->gfx.rlc.cp_table_ptr;
3803 for (me = 0; me < max_me; me++) {
3804 if (me == 0) {
3805 const struct gfx_firmware_header_v1_0 *hdr =
3806 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
3807 fw_data = (const __le32 *)
3808 (adev->gfx.ce_fw->data +
3809 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3810 table_offset = le32_to_cpu(hdr->jt_offset);
3811 table_size = le32_to_cpu(hdr->jt_size);
3812 } else if (me == 1) {
3813 const struct gfx_firmware_header_v1_0 *hdr =
3814 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
3815 fw_data = (const __le32 *)
3816 (adev->gfx.pfp_fw->data +
3817 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3818 table_offset = le32_to_cpu(hdr->jt_offset);
3819 table_size = le32_to_cpu(hdr->jt_size);
3820 } else if (me == 2) {
3821 const struct gfx_firmware_header_v1_0 *hdr =
3822 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
3823 fw_data = (const __le32 *)
3824 (adev->gfx.me_fw->data +
3825 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3826 table_offset = le32_to_cpu(hdr->jt_offset);
3827 table_size = le32_to_cpu(hdr->jt_size);
3828 } else if (me == 3) {
3829 const struct gfx_firmware_header_v1_0 *hdr =
3830 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3831 fw_data = (const __le32 *)
3832 (adev->gfx.mec_fw->data +
3833 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3834 table_offset = le32_to_cpu(hdr->jt_offset);
3835 table_size = le32_to_cpu(hdr->jt_size);
3836 } else {
3837 const struct gfx_firmware_header_v1_0 *hdr =
3838 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3839 fw_data = (const __le32 *)
3840 (adev->gfx.mec2_fw->data +
3841 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3842 table_offset = le32_to_cpu(hdr->jt_offset);
3843 table_size = le32_to_cpu(hdr->jt_size);
3844 }
3845
3846 for (i = 0; i < table_size; i ++) {
3847 dst_ptr[bo_offset + i] =
3848 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
3849 }
3850
3851 bo_offset += table_size;
3852 }
3853} 3730}
3854 3731
3855static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, 3732static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
@@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4288}; 4165};
4289 4166
4290static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { 4167static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
4291 .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, 4168 .is_rlc_enabled = gfx_v7_0_is_rlc_enabled,
4292 .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode 4169 .set_safe_mode = gfx_v7_0_set_safe_mode,
4170 .unset_safe_mode = gfx_v7_0_unset_safe_mode,
4171 .init = gfx_v7_0_rlc_init,
4172 .get_csb_size = gfx_v7_0_get_csb_size,
4173 .get_csb_buffer = gfx_v7_0_get_csb_buffer,
4174 .get_cp_table_num = gfx_v7_0_cp_pg_table_num,
4175 .resume = gfx_v7_0_rlc_resume,
4176 .stop = gfx_v7_0_rlc_stop,
4177 .reset = gfx_v7_0_rlc_reset,
4178 .start = gfx_v7_0_rlc_start
4293}; 4179};
4294 4180
4295static int gfx_v7_0_early_init(void *handle) 4181static int gfx_v7_0_early_init(void *handle)
@@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle)
4540 return r; 4426 return r;
4541 } 4427 }
4542 4428
4543 r = gfx_v7_0_rlc_init(adev); 4429 r = adev->gfx.rlc.funcs->init(adev);
4544 if (r) { 4430 if (r) {
4545 DRM_ERROR("Failed to init rlc BOs!\n"); 4431 DRM_ERROR("Failed to init rlc BOs!\n");
4546 return r; 4432 return r;
@@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle)
4604 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 4490 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
4605 4491
4606 gfx_v7_0_cp_compute_fini(adev); 4492 gfx_v7_0_cp_compute_fini(adev);
4607 gfx_v7_0_rlc_fini(adev); 4493 amdgpu_gfx_rlc_fini(adev);
4608 gfx_v7_0_mec_fini(adev); 4494 gfx_v7_0_mec_fini(adev);
4609 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 4495 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
4610 &adev->gfx.rlc.clear_state_gpu_addr, 4496 &adev->gfx.rlc.clear_state_gpu_addr,
@@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle)
4627 gfx_v7_0_constants_init(adev); 4513 gfx_v7_0_constants_init(adev);
4628 4514
4629 /* init rlc */ 4515 /* init rlc */
4630 r = gfx_v7_0_rlc_resume(adev); 4516 r = adev->gfx.rlc.funcs->resume(adev);
4631 if (r) 4517 if (r)
4632 return r; 4518 return r;
4633 4519
@@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle)
4645 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4531 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4646 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4532 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4647 gfx_v7_0_cp_enable(adev, false); 4533 gfx_v7_0_cp_enable(adev, false);
4648 gfx_v7_0_rlc_stop(adev); 4534 adev->gfx.rlc.funcs->stop(adev);
4649 gfx_v7_0_fini_pg(adev); 4535 gfx_v7_0_fini_pg(adev);
4650 4536
4651 return 0; 4537 return 0;
@@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle)
4730 gfx_v7_0_update_cg(adev, false); 4616 gfx_v7_0_update_cg(adev, false);
4731 4617
4732 /* stop the rlc */ 4618 /* stop the rlc */
4733 gfx_v7_0_rlc_stop(adev); 4619 adev->gfx.rlc.funcs->stop(adev);
4734 4620
4735 /* Disable GFX parsing/prefetching */ 4621 /* Disable GFX parsing/prefetching */
4736 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); 4622 WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
@@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
4959 return 0; 4845 return 0;
4960} 4846}
4961 4847
4848static void gfx_v7_0_fault(struct amdgpu_device *adev,
4849 struct amdgpu_iv_entry *entry)
4850{
4851 struct amdgpu_ring *ring;
4852 u8 me_id, pipe_id;
4853 int i;
4854
4855 me_id = (entry->ring_id & 0x0c) >> 2;
4856 pipe_id = (entry->ring_id & 0x03) >> 0;
4857 switch (me_id) {
4858 case 0:
4859 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4860 break;
4861 case 1:
4862 case 2:
4863 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4864 ring = &adev->gfx.compute_ring[i];
4865 if ((ring->me == me_id) && (ring->pipe == pipe_id))
4866 drm_sched_fault(&ring->sched);
4867 }
4868 break;
4869 }
4870}
4871
4962static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, 4872static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
4963 struct amdgpu_irq_src *source, 4873 struct amdgpu_irq_src *source,
4964 struct amdgpu_iv_entry *entry) 4874 struct amdgpu_iv_entry *entry)
4965{ 4875{
4966 DRM_ERROR("Illegal register access in command stream\n"); 4876 DRM_ERROR("Illegal register access in command stream\n");
4967 schedule_work(&adev->reset_work); 4877 gfx_v7_0_fault(adev, entry);
4968 return 0; 4878 return 0;
4969} 4879}
4970 4880
@@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
4974{ 4884{
4975 DRM_ERROR("Illegal instruction in command stream\n"); 4885 DRM_ERROR("Illegal instruction in command stream\n");
4976 // XXX soft reset the gfx block only 4886 // XXX soft reset the gfx block only
4977 schedule_work(&adev->reset_work); 4887 gfx_v7_0_fault(adev, entry);
4978 return 0; 4888 return 0;
4979} 4889}
4980 4890
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 617b0c8908a3..cb066a8dccd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -54,7 +54,7 @@
54#include "ivsrcid/ivsrcid_vislands30.h" 54#include "ivsrcid/ivsrcid_vislands30.h"
55 55
56#define GFX8_NUM_GFX_RINGS 1 56#define GFX8_NUM_GFX_RINGS 1
57#define GFX8_MEC_HPD_SIZE 2048 57#define GFX8_MEC_HPD_SIZE 4096
58 58
59#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 59#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 60#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -839,18 +839,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 int r; 839 int r;
840 840
841 r = amdgpu_gfx_scratch_get(adev, &scratch); 841 r = amdgpu_gfx_scratch_get(adev, &scratch);
842 if (r) { 842 if (r)
843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844 return r; 843 return r;
845 } 844
846 WREG32(scratch, 0xCAFEDEAD); 845 WREG32(scratch, 0xCAFEDEAD);
847 r = amdgpu_ring_alloc(ring, 3); 846 r = amdgpu_ring_alloc(ring, 3);
848 if (r) { 847 if (r)
849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 848 goto error_free_scratch;
850 ring->idx, r); 849
851 amdgpu_gfx_scratch_free(adev, scratch);
852 return r;
853 }
854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 850 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 851 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856 amdgpu_ring_write(ring, 0xDEADBEEF); 852 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -862,14 +858,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
862 break; 858 break;
863 DRM_UDELAY(1); 859 DRM_UDELAY(1);
864 } 860 }
865 if (i < adev->usec_timeout) { 861
866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 862 if (i >= adev->usec_timeout)
867 ring->idx, i); 863 r = -ETIMEDOUT;
868 } else { 864
869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 865error_free_scratch:
870 ring->idx, scratch, tmp);
871 r = -EINVAL;
872 }
873 amdgpu_gfx_scratch_free(adev, scratch); 866 amdgpu_gfx_scratch_free(adev, scratch);
874 return r; 867 return r;
875} 868}
@@ -886,19 +879,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
886 long r; 879 long r;
887 880
888 r = amdgpu_device_wb_get(adev, &index); 881 r = amdgpu_device_wb_get(adev, &index);
889 if (r) { 882 if (r)
890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891 return r; 883 return r;
892 }
893 884
894 gpu_addr = adev->wb.gpu_addr + (index * 4); 885 gpu_addr = adev->wb.gpu_addr + (index * 4);
895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 886 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896 memset(&ib, 0, sizeof(ib)); 887 memset(&ib, 0, sizeof(ib));
897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 888 r = amdgpu_ib_get(adev, NULL, 16, &ib);
898 if (r) { 889 if (r)
899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900 goto err1; 890 goto err1;
901 } 891
902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 892 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 893 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904 ib.ptr[2] = lower_32_bits(gpu_addr); 894 ib.ptr[2] = lower_32_bits(gpu_addr);
@@ -912,22 +902,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
912 902
913 r = dma_fence_wait_timeout(f, false, timeout); 903 r = dma_fence_wait_timeout(f, false, timeout);
914 if (r == 0) { 904 if (r == 0) {
915 DRM_ERROR("amdgpu: IB test timed out.\n");
916 r = -ETIMEDOUT; 905 r = -ETIMEDOUT;
917 goto err2; 906 goto err2;
918 } else if (r < 0) { 907 } else if (r < 0) {
919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920 goto err2; 908 goto err2;
921 } 909 }
922 910
923 tmp = adev->wb.wb[index]; 911 tmp = adev->wb.wb[index];
924 if (tmp == 0xDEADBEEF) { 912 if (tmp == 0xDEADBEEF)
925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926 r = 0; 913 r = 0;
927 } else { 914 else
928 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929 r = -EINVAL; 915 r = -EINVAL;
930 }
931 916
932err2: 917err2:
933 amdgpu_ib_free(adev, &ib, NULL); 918 amdgpu_ib_free(adev, &ib, NULL);
@@ -1298,81 +1283,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1298 buffer[count++] = cpu_to_le32(0); 1283 buffer[count++] = cpu_to_le32(0);
1299} 1284}
1300 1285
1301static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1286static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1302{ 1287{
1303 const __le32 *fw_data;
1304 volatile u32 *dst_ptr;
1305 int me, i, max_me = 4;
1306 u32 bo_offset = 0;
1307 u32 table_offset, table_size;
1308
1309 if (adev->asic_type == CHIP_CARRIZO) 1288 if (adev->asic_type == CHIP_CARRIZO)
1310 max_me = 5; 1289 return 5;
1311 1290 else
1312 /* write the cp table buffer */ 1291 return 4;
1313 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1314 for (me = 0; me < max_me; me++) {
1315 if (me == 0) {
1316 const struct gfx_firmware_header_v1_0 *hdr =
1317 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1318 fw_data = (const __le32 *)
1319 (adev->gfx.ce_fw->data +
1320 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1321 table_offset = le32_to_cpu(hdr->jt_offset);
1322 table_size = le32_to_cpu(hdr->jt_size);
1323 } else if (me == 1) {
1324 const struct gfx_firmware_header_v1_0 *hdr =
1325 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1326 fw_data = (const __le32 *)
1327 (adev->gfx.pfp_fw->data +
1328 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1329 table_offset = le32_to_cpu(hdr->jt_offset);
1330 table_size = le32_to_cpu(hdr->jt_size);
1331 } else if (me == 2) {
1332 const struct gfx_firmware_header_v1_0 *hdr =
1333 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1334 fw_data = (const __le32 *)
1335 (adev->gfx.me_fw->data +
1336 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1337 table_offset = le32_to_cpu(hdr->jt_offset);
1338 table_size = le32_to_cpu(hdr->jt_size);
1339 } else if (me == 3) {
1340 const struct gfx_firmware_header_v1_0 *hdr =
1341 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1342 fw_data = (const __le32 *)
1343 (adev->gfx.mec_fw->data +
1344 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1345 table_offset = le32_to_cpu(hdr->jt_offset);
1346 table_size = le32_to_cpu(hdr->jt_size);
1347 } else if (me == 4) {
1348 const struct gfx_firmware_header_v1_0 *hdr =
1349 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1350 fw_data = (const __le32 *)
1351 (adev->gfx.mec2_fw->data +
1352 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1353 table_offset = le32_to_cpu(hdr->jt_offset);
1354 table_size = le32_to_cpu(hdr->jt_size);
1355 }
1356
1357 for (i = 0; i < table_size; i ++) {
1358 dst_ptr[bo_offset + i] =
1359 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1360 }
1361
1362 bo_offset += table_size;
1363 }
1364}
1365
1366static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1367{
1368 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1370} 1292}
1371 1293
1372static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1294static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1373{ 1295{
1374 volatile u32 *dst_ptr;
1375 u32 dws;
1376 const struct cs_section_def *cs_data; 1296 const struct cs_section_def *cs_data;
1377 int r; 1297 int r;
1378 1298
@@ -1381,44 +1301,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1381 cs_data = adev->gfx.rlc.cs_data; 1301 cs_data = adev->gfx.rlc.cs_data;
1382 1302
1383 if (cs_data) { 1303 if (cs_data) {
1384 /* clear state block */ 1304 /* init clear state block */
1385 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1305 r = amdgpu_gfx_rlc_init_csb(adev);
1386 1306 if (r)
1387 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1388 AMDGPU_GEM_DOMAIN_VRAM,
1389 &adev->gfx.rlc.clear_state_obj,
1390 &adev->gfx.rlc.clear_state_gpu_addr,
1391 (void **)&adev->gfx.rlc.cs_ptr);
1392 if (r) {
1393 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1394 gfx_v8_0_rlc_fini(adev);
1395 return r; 1307 return r;
1396 }
1397
1398 /* set up the cs buffer */
1399 dst_ptr = adev->gfx.rlc.cs_ptr;
1400 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1401 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1402 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1403 } 1308 }
1404 1309
1405 if ((adev->asic_type == CHIP_CARRIZO) || 1310 if ((adev->asic_type == CHIP_CARRIZO) ||
1406 (adev->asic_type == CHIP_STONEY)) { 1311 (adev->asic_type == CHIP_STONEY)) {
1407 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1312 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1408 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1313 r = amdgpu_gfx_rlc_init_cpt(adev);
1409 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1314 if (r)
1410 &adev->gfx.rlc.cp_table_obj,
1411 &adev->gfx.rlc.cp_table_gpu_addr,
1412 (void **)&adev->gfx.rlc.cp_table_ptr);
1413 if (r) {
1414 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1415 return r; 1315 return r;
1416 }
1417
1418 cz_init_cp_jump_table(adev);
1419
1420 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1421 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1422 } 1316 }
1423 1317
1424 return 0; 1318 return 0;
@@ -1443,7 +1337,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1443 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1337 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1444 1338
1445 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1339 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1446 AMDGPU_GEM_DOMAIN_GTT, 1340 AMDGPU_GEM_DOMAIN_VRAM,
1447 &adev->gfx.mec.hpd_eop_obj, 1341 &adev->gfx.mec.hpd_eop_obj,
1448 &adev->gfx.mec.hpd_eop_gpu_addr, 1342 &adev->gfx.mec.hpd_eop_gpu_addr,
1449 (void **)&hpd); 1343 (void **)&hpd);
@@ -1629,7 +1523,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1629 return 0; 1523 return 0;
1630 1524
1631 /* bail if the compute ring is not ready */ 1525 /* bail if the compute ring is not ready */
1632 if (!ring->ready) 1526 if (!ring->sched.ready)
1633 return 0; 1527 return 0;
1634 1528
1635 tmp = RREG32(mmGB_EDC_MODE); 1529 tmp = RREG32(mmGB_EDC_MODE);
@@ -2088,7 +1982,7 @@ static int gfx_v8_0_sw_init(void *handle)
2088 return r; 1982 return r;
2089 } 1983 }
2090 1984
2091 r = gfx_v8_0_rlc_init(adev); 1985 r = adev->gfx.rlc.funcs->init(adev);
2092 if (r) { 1986 if (r) {
2093 DRM_ERROR("Failed to init rlc BOs!\n"); 1987 DRM_ERROR("Failed to init rlc BOs!\n");
2094 return r; 1988 return r;
@@ -2181,7 +2075,7 @@ static int gfx_v8_0_sw_fini(void *handle)
2181 amdgpu_gfx_kiq_fini(adev); 2075 amdgpu_gfx_kiq_fini(adev);
2182 2076
2183 gfx_v8_0_mec_fini(adev); 2077 gfx_v8_0_mec_fini(adev);
2184 gfx_v8_0_rlc_fini(adev); 2078 amdgpu_gfx_rlc_fini(adev);
2185 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2079 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2186 &adev->gfx.rlc.clear_state_gpu_addr, 2080 &adev->gfx.rlc.clear_state_gpu_addr,
2187 (void **)&adev->gfx.rlc.cs_ptr); 2081 (void **)&adev->gfx.rlc.cs_ptr);
@@ -4175,10 +4069,10 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4175 4069
4176static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4070static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4177{ 4071{
4178 gfx_v8_0_rlc_stop(adev); 4072 adev->gfx.rlc.funcs->stop(adev);
4179 gfx_v8_0_rlc_reset(adev); 4073 adev->gfx.rlc.funcs->reset(adev);
4180 gfx_v8_0_init_pg(adev); 4074 gfx_v8_0_init_pg(adev);
4181 gfx_v8_0_rlc_start(adev); 4075 adev->gfx.rlc.funcs->start(adev);
4182 4076
4183 return 0; 4077 return 0;
4184} 4078}
@@ -4197,7 +4091,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4091 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4092 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4199 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4093 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4200 adev->gfx.gfx_ring[i].ready = false; 4094 adev->gfx.gfx_ring[i].sched.ready = false;
4201 } 4095 }
4202 WREG32(mmCP_ME_CNTL, tmp); 4096 WREG32(mmCP_ME_CNTL, tmp);
4203 udelay(50); 4097 udelay(50);
@@ -4379,10 +4273,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4379 /* start the ring */ 4273 /* start the ring */
4380 amdgpu_ring_clear_ring(ring); 4274 amdgpu_ring_clear_ring(ring);
4381 gfx_v8_0_cp_gfx_start(adev); 4275 gfx_v8_0_cp_gfx_start(adev);
4382 ring->ready = true; 4276 ring->sched.ready = true;
4383 r = amdgpu_ring_test_ring(ring); 4277 r = amdgpu_ring_test_helper(ring);
4384 if (r)
4385 ring->ready = false;
4386 4278
4387 return r; 4279 return r;
4388} 4280}
@@ -4396,8 +4288,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4396 } else { 4288 } else {
4397 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4289 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4398 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4290 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4399 adev->gfx.compute_ring[i].ready = false; 4291 adev->gfx.compute_ring[i].sched.ready = false;
4400 adev->gfx.kiq.ring.ready = false; 4292 adev->gfx.kiq.ring.sched.ready = false;
4401 } 4293 }
4402 udelay(50); 4294 udelay(50);
4403} 4295}
@@ -4473,11 +4365,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4473 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4365 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4474 } 4366 }
4475 4367
4476 r = amdgpu_ring_test_ring(kiq_ring); 4368 r = amdgpu_ring_test_helper(kiq_ring);
4477 if (r) { 4369 if (r)
4478 DRM_ERROR("KCQ enable failed\n"); 4370 DRM_ERROR("KCQ enable failed\n");
4479 kiq_ring->ready = false;
4480 }
4481 return r; 4371 return r;
4482} 4372}
4483 4373
@@ -4781,7 +4671,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4781 amdgpu_bo_kunmap(ring->mqd_obj); 4671 amdgpu_bo_kunmap(ring->mqd_obj);
4782 ring->mqd_ptr = NULL; 4672 ring->mqd_ptr = NULL;
4783 amdgpu_bo_unreserve(ring->mqd_obj); 4673 amdgpu_bo_unreserve(ring->mqd_obj);
4784 ring->ready = true; 4674 ring->sched.ready = true;
4785 return 0; 4675 return 0;
4786} 4676}
4787 4677
@@ -4820,10 +4710,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4820 */ 4710 */
4821 for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { 4711 for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
4822 ring = &adev->gfx.compute_ring[i]; 4712 ring = &adev->gfx.compute_ring[i];
4823 ring->ready = true; 4713 r = amdgpu_ring_test_helper(ring);
4824 r = amdgpu_ring_test_ring(ring);
4825 if (r)
4826 ring->ready = false;
4827 } 4714 }
4828 4715
4829done: 4716done:
@@ -4867,7 +4754,7 @@ static int gfx_v8_0_hw_init(void *handle)
4867 gfx_v8_0_init_golden_registers(adev); 4754 gfx_v8_0_init_golden_registers(adev);
4868 gfx_v8_0_constants_init(adev); 4755 gfx_v8_0_constants_init(adev);
4869 4756
4870 r = gfx_v8_0_rlc_resume(adev); 4757 r = adev->gfx.rlc.funcs->resume(adev);
4871 if (r) 4758 if (r)
4872 return r; 4759 return r;
4873 4760
@@ -4899,7 +4786,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4899 amdgpu_ring_write(kiq_ring, 0); 4786 amdgpu_ring_write(kiq_ring, 0);
4900 amdgpu_ring_write(kiq_ring, 0); 4787 amdgpu_ring_write(kiq_ring, 0);
4901 } 4788 }
4902 r = amdgpu_ring_test_ring(kiq_ring); 4789 r = amdgpu_ring_test_helper(kiq_ring);
4903 if (r) 4790 if (r)
4904 DRM_ERROR("KCQ disable failed\n"); 4791 DRM_ERROR("KCQ disable failed\n");
4905 4792
@@ -4973,16 +4860,16 @@ static int gfx_v8_0_hw_fini(void *handle)
4973 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4860 pr_debug("For SRIOV client, shouldn't do anything.\n");
4974 return 0; 4861 return 0;
4975 } 4862 }
4976 adev->gfx.rlc.funcs->enter_safe_mode(adev); 4863 amdgpu_gfx_rlc_enter_safe_mode(adev);
4977 if (!gfx_v8_0_wait_for_idle(adev)) 4864 if (!gfx_v8_0_wait_for_idle(adev))
4978 gfx_v8_0_cp_enable(adev, false); 4865 gfx_v8_0_cp_enable(adev, false);
4979 else 4866 else
4980 pr_err("cp is busy, skip halt cp\n"); 4867 pr_err("cp is busy, skip halt cp\n");
4981 if (!gfx_v8_0_wait_for_rlc_idle(adev)) 4868 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4982 gfx_v8_0_rlc_stop(adev); 4869 adev->gfx.rlc.funcs->stop(adev);
4983 else 4870 else
4984 pr_err("rlc is busy, skip halt rlc\n"); 4871 pr_err("rlc is busy, skip halt rlc\n");
4985 adev->gfx.rlc.funcs->exit_safe_mode(adev); 4872 amdgpu_gfx_rlc_exit_safe_mode(adev);
4986 return 0; 4873 return 0;
4987} 4874}
4988 4875
@@ -5071,7 +4958,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
5071 srbm_soft_reset = adev->gfx.srbm_soft_reset; 4958 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5072 4959
5073 /* stop the rlc */ 4960 /* stop the rlc */
5074 gfx_v8_0_rlc_stop(adev); 4961 adev->gfx.rlc.funcs->stop(adev);
5075 4962
5076 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 4963 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5077 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 4964 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
@@ -5197,7 +5084,7 @@ static int gfx_v8_0_post_soft_reset(void *handle)
5197 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5084 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5198 gfx_v8_0_cp_gfx_resume(adev); 5085 gfx_v8_0_cp_gfx_resume(adev);
5199 5086
5200 gfx_v8_0_rlc_start(adev); 5087 adev->gfx.rlc.funcs->start(adev);
5201 5088
5202 return 0; 5089 return 0;
5203} 5090}
@@ -5445,7 +5332,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
5445 AMD_PG_SUPPORT_RLC_SMU_HS | 5332 AMD_PG_SUPPORT_RLC_SMU_HS |
5446 AMD_PG_SUPPORT_CP | 5333 AMD_PG_SUPPORT_CP |
5447 AMD_PG_SUPPORT_GFX_DMG)) 5334 AMD_PG_SUPPORT_GFX_DMG))
5448 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5335 amdgpu_gfx_rlc_enter_safe_mode(adev);
5449 switch (adev->asic_type) { 5336 switch (adev->asic_type) {
5450 case CHIP_CARRIZO: 5337 case CHIP_CARRIZO:
5451 case CHIP_STONEY: 5338 case CHIP_STONEY:
@@ -5499,7 +5386,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
5499 AMD_PG_SUPPORT_RLC_SMU_HS | 5386 AMD_PG_SUPPORT_RLC_SMU_HS |
5500 AMD_PG_SUPPORT_CP | 5387 AMD_PG_SUPPORT_CP |
5501 AMD_PG_SUPPORT_GFX_DMG)) 5388 AMD_PG_SUPPORT_GFX_DMG))
5502 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5389 amdgpu_gfx_rlc_exit_safe_mode(adev);
5503 return 0; 5390 return 0;
5504} 5391}
5505 5392
@@ -5593,57 +5480,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5593#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5480#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5594#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5481#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5595 5482
5596static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5483static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5597{ 5484{
5598 u32 data; 5485 uint32_t rlc_setting;
5599 unsigned i;
5600 5486
5601 data = RREG32(mmRLC_CNTL); 5487 rlc_setting = RREG32(mmRLC_CNTL);
5602 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5488 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5603 return; 5489 return false;
5604 5490
5605 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5491 return true;
5606 data |= RLC_SAFE_MODE__CMD_MASK; 5492}
5607 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5608 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5609 WREG32(mmRLC_SAFE_MODE, data);
5610 5493
5611 for (i = 0; i < adev->usec_timeout; i++) { 5494static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5612 if ((RREG32(mmRLC_GPM_STAT) & 5495{
5613 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5496 uint32_t data;
5614 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5497 unsigned i;
5615 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5498 data = RREG32(mmRLC_CNTL);
5616 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5499 data |= RLC_SAFE_MODE__CMD_MASK;
5617 break; 5500 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5618 udelay(1); 5501 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5619 } 5502 WREG32(mmRLC_SAFE_MODE, data);
5620 5503
5621 for (i = 0; i < adev->usec_timeout; i++) { 5504 /* wait for RLC_SAFE_MODE */
5622 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5505 for (i = 0; i < adev->usec_timeout; i++) {
5623 break; 5506 if ((RREG32(mmRLC_GPM_STAT) &
5624 udelay(1); 5507 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5625 } 5508 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5626 adev->gfx.rlc.in_safe_mode = true; 5509 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5510 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5511 break;
5512 udelay(1);
5513 }
5514 for (i = 0; i < adev->usec_timeout; i++) {
5515 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5516 break;
5517 udelay(1);
5627 } 5518 }
5628} 5519}
5629 5520
5630static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5521static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5631{ 5522{
5632 u32 data = 0; 5523 uint32_t data;
5633 unsigned i; 5524 unsigned i;
5634 5525
5635 data = RREG32(mmRLC_CNTL); 5526 data = RREG32(mmRLC_CNTL);
5636 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5527 data |= RLC_SAFE_MODE__CMD_MASK;
5637 return; 5528 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5638 5529 WREG32(mmRLC_SAFE_MODE, data);
5639 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5640 if (adev->gfx.rlc.in_safe_mode) {
5641 data |= RLC_SAFE_MODE__CMD_MASK;
5642 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5643 WREG32(mmRLC_SAFE_MODE, data);
5644 adev->gfx.rlc.in_safe_mode = false;
5645 }
5646 }
5647 5530
5648 for (i = 0; i < adev->usec_timeout; i++) { 5531 for (i = 0; i < adev->usec_timeout; i++) {
5649 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5532 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
@@ -5653,8 +5536,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5653} 5536}
5654 5537
5655static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5538static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5656 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5539 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5657 .exit_safe_mode = iceland_exit_rlc_safe_mode 5540 .set_safe_mode = gfx_v8_0_set_safe_mode,
5541 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5542 .init = gfx_v8_0_rlc_init,
5543 .get_csb_size = gfx_v8_0_get_csb_size,
5544 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5545 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5546 .resume = gfx_v8_0_rlc_resume,
5547 .stop = gfx_v8_0_rlc_stop,
5548 .reset = gfx_v8_0_rlc_reset,
5549 .start = gfx_v8_0_rlc_start
5658}; 5550};
5659 5551
5660static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5552static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -5662,7 +5554,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
5662{ 5554{
5663 uint32_t temp, data; 5555 uint32_t temp, data;
5664 5556
5665 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5557 amdgpu_gfx_rlc_enter_safe_mode(adev);
5666 5558
5667 /* It is disabled by HW by default */ 5559 /* It is disabled by HW by default */
5668 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5560 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
@@ -5758,7 +5650,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
5758 gfx_v8_0_wait_for_rlc_serdes(adev); 5650 gfx_v8_0_wait_for_rlc_serdes(adev);
5759 } 5651 }
5760 5652
5761 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5653 amdgpu_gfx_rlc_exit_safe_mode(adev);
5762} 5654}
5763 5655
5764static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5656static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5768,7 +5660,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
5768 5660
5769 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5661 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5770 5662
5771 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5663 amdgpu_gfx_rlc_enter_safe_mode(adev);
5772 5664
5773 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5665 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5774 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5666 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
@@ -5851,7 +5743,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
5851 5743
5852 gfx_v8_0_wait_for_rlc_serdes(adev); 5744 gfx_v8_0_wait_for_rlc_serdes(adev);
5853 5745
5854 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5746 amdgpu_gfx_rlc_exit_safe_mode(adev);
5855} 5747}
5856static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5748static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5857 bool enable) 5749 bool enable)
@@ -6131,9 +6023,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6131} 6023}
6132 6024
6133static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6025static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6134 struct amdgpu_ib *ib, 6026 struct amdgpu_job *job,
6135 unsigned vmid, bool ctx_switch) 6027 struct amdgpu_ib *ib,
6028 bool ctx_switch)
6136{ 6029{
6030 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6137 u32 header, control = 0; 6031 u32 header, control = 0;
6138 6032
6139 if (ib->flags & AMDGPU_IB_FLAG_CE) 6033 if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -6161,9 +6055,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6161} 6055}
6162 6056
6163static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6057static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6058 struct amdgpu_job *job,
6164 struct amdgpu_ib *ib, 6059 struct amdgpu_ib *ib,
6165 unsigned vmid, bool ctx_switch) 6060 bool ctx_switch)
6166{ 6061{
6062 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6167 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6063 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6168 6064
6169 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6065 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
@@ -6738,12 +6634,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6738 return 0; 6634 return 0;
6739} 6635}
6740 6636
6637static void gfx_v8_0_fault(struct amdgpu_device *adev,
6638 struct amdgpu_iv_entry *entry)
6639{
6640 u8 me_id, pipe_id, queue_id;
6641 struct amdgpu_ring *ring;
6642 int i;
6643
6644 me_id = (entry->ring_id & 0x0c) >> 2;
6645 pipe_id = (entry->ring_id & 0x03) >> 0;
6646 queue_id = (entry->ring_id & 0x70) >> 4;
6647
6648 switch (me_id) {
6649 case 0:
6650 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6651 break;
6652 case 1:
6653 case 2:
6654 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6655 ring = &adev->gfx.compute_ring[i];
6656 if (ring->me == me_id && ring->pipe == pipe_id &&
6657 ring->queue == queue_id)
6658 drm_sched_fault(&ring->sched);
6659 }
6660 break;
6661 }
6662}
6663
6741static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6664static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6742 struct amdgpu_irq_src *source, 6665 struct amdgpu_irq_src *source,
6743 struct amdgpu_iv_entry *entry) 6666 struct amdgpu_iv_entry *entry)
6744{ 6667{
6745 DRM_ERROR("Illegal register access in command stream\n"); 6668 DRM_ERROR("Illegal register access in command stream\n");
6746 schedule_work(&adev->reset_work); 6669 gfx_v8_0_fault(adev, entry);
6747 return 0; 6670 return 0;
6748} 6671}
6749 6672
@@ -6752,7 +6675,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6752 struct amdgpu_iv_entry *entry) 6675 struct amdgpu_iv_entry *entry)
6753{ 6676{
6754 DRM_ERROR("Illegal instruction in command stream\n"); 6677 DRM_ERROR("Illegal instruction in command stream\n");
6755 schedule_work(&adev->reset_work); 6678 gfx_v8_0_fault(adev, entry);
6756 return 0; 6679 return 0;
6757} 6680}
6758 6681
@@ -6976,10 +6899,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6976 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6899 17 + /* gfx_v8_0_ring_emit_vm_flush */
6977 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6900 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6978 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6901 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6979 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6980 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6902 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6981 .test_ring = gfx_v8_0_ring_test_ring, 6903 .test_ring = gfx_v8_0_ring_test_ring,
6982 .test_ib = gfx_v8_0_ring_test_ib,
6983 .insert_nop = amdgpu_ring_insert_nop, 6904 .insert_nop = amdgpu_ring_insert_nop,
6984 .pad_ib = amdgpu_ring_generic_pad_ib, 6905 .pad_ib = amdgpu_ring_generic_pad_ib,
6985 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6906 .emit_rreg = gfx_v8_0_ring_emit_rreg,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6d7baf59d6e1..c27caa144c57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -41,7 +41,7 @@
41#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 41#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
42 42
43#define GFX9_NUM_GFX_RINGS 1 43#define GFX9_NUM_GFX_RINGS 1
44#define GFX9_MEC_HPD_SIZE 2048 44#define GFX9_MEC_HPD_SIZE 4096
45#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 45#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
46#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 46#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
47 47
@@ -396,18 +396,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
396 int r; 396 int r;
397 397
398 r = amdgpu_gfx_scratch_get(adev, &scratch); 398 r = amdgpu_gfx_scratch_get(adev, &scratch);
399 if (r) { 399 if (r)
400 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
401 return r; 400 return r;
402 } 401
403 WREG32(scratch, 0xCAFEDEAD); 402 WREG32(scratch, 0xCAFEDEAD);
404 r = amdgpu_ring_alloc(ring, 3); 403 r = amdgpu_ring_alloc(ring, 3);
405 if (r) { 404 if (r)
406 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 405 goto error_free_scratch;
407 ring->idx, r); 406
408 amdgpu_gfx_scratch_free(adev, scratch);
409 return r;
410 }
411 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 407 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
412 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 408 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
413 amdgpu_ring_write(ring, 0xDEADBEEF); 409 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -419,14 +415,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
419 break; 415 break;
420 DRM_UDELAY(1); 416 DRM_UDELAY(1);
421 } 417 }
422 if (i < adev->usec_timeout) { 418
423 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 419 if (i >= adev->usec_timeout)
424 ring->idx, i); 420 r = -ETIMEDOUT;
425 } else { 421
426 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 422error_free_scratch:
427 ring->idx, scratch, tmp);
428 r = -EINVAL;
429 }
430 amdgpu_gfx_scratch_free(adev, scratch); 423 amdgpu_gfx_scratch_free(adev, scratch);
431 return r; 424 return r;
432} 425}
@@ -443,19 +436,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
443 long r; 436 long r;
444 437
445 r = amdgpu_device_wb_get(adev, &index); 438 r = amdgpu_device_wb_get(adev, &index);
446 if (r) { 439 if (r)
447 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
448 return r; 440 return r;
449 }
450 441
451 gpu_addr = adev->wb.gpu_addr + (index * 4); 442 gpu_addr = adev->wb.gpu_addr + (index * 4);
452 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 443 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
453 memset(&ib, 0, sizeof(ib)); 444 memset(&ib, 0, sizeof(ib));
454 r = amdgpu_ib_get(adev, NULL, 16, &ib); 445 r = amdgpu_ib_get(adev, NULL, 16, &ib);
455 if (r) { 446 if (r)
456 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
457 goto err1; 447 goto err1;
458 } 448
459 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 449 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
460 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 450 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
461 ib.ptr[2] = lower_32_bits(gpu_addr); 451 ib.ptr[2] = lower_32_bits(gpu_addr);
@@ -469,22 +459,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
469 459
470 r = dma_fence_wait_timeout(f, false, timeout); 460 r = dma_fence_wait_timeout(f, false, timeout);
471 if (r == 0) { 461 if (r == 0) {
472 DRM_ERROR("amdgpu: IB test timed out.\n"); 462 r = -ETIMEDOUT;
473 r = -ETIMEDOUT; 463 goto err2;
474 goto err2;
475 } else if (r < 0) { 464 } else if (r < 0) {
476 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 465 goto err2;
477 goto err2;
478 } 466 }
479 467
480 tmp = adev->wb.wb[index]; 468 tmp = adev->wb.wb[index];
481 if (tmp == 0xDEADBEEF) { 469 if (tmp == 0xDEADBEEF)
482 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 470 r = 0;
483 r = 0; 471 else
484 } else { 472 r = -EINVAL;
485 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
486 r = -EINVAL;
487 }
488 473
489err2: 474err2:
490 amdgpu_ib_free(adev, &ib, NULL); 475 amdgpu_ib_free(adev, &ib, NULL);
@@ -1065,85 +1050,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1065 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1050 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1066} 1051}
1067 1052
1068static void rv_init_cp_jump_table(struct amdgpu_device *adev) 1053static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1069{
1070 const __le32 *fw_data;
1071 volatile u32 *dst_ptr;
1072 int me, i, max_me = 5;
1073 u32 bo_offset = 0;
1074 u32 table_offset, table_size;
1075
1076 /* write the cp table buffer */
1077 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1078 for (me = 0; me < max_me; me++) {
1079 if (me == 0) {
1080 const struct gfx_firmware_header_v1_0 *hdr =
1081 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1082 fw_data = (const __le32 *)
1083 (adev->gfx.ce_fw->data +
1084 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1085 table_offset = le32_to_cpu(hdr->jt_offset);
1086 table_size = le32_to_cpu(hdr->jt_size);
1087 } else if (me == 1) {
1088 const struct gfx_firmware_header_v1_0 *hdr =
1089 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1090 fw_data = (const __le32 *)
1091 (adev->gfx.pfp_fw->data +
1092 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1093 table_offset = le32_to_cpu(hdr->jt_offset);
1094 table_size = le32_to_cpu(hdr->jt_size);
1095 } else if (me == 2) {
1096 const struct gfx_firmware_header_v1_0 *hdr =
1097 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1098 fw_data = (const __le32 *)
1099 (adev->gfx.me_fw->data +
1100 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1101 table_offset = le32_to_cpu(hdr->jt_offset);
1102 table_size = le32_to_cpu(hdr->jt_size);
1103 } else if (me == 3) {
1104 const struct gfx_firmware_header_v1_0 *hdr =
1105 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1106 fw_data = (const __le32 *)
1107 (adev->gfx.mec_fw->data +
1108 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1109 table_offset = le32_to_cpu(hdr->jt_offset);
1110 table_size = le32_to_cpu(hdr->jt_size);
1111 } else if (me == 4) {
1112 const struct gfx_firmware_header_v1_0 *hdr =
1113 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1114 fw_data = (const __le32 *)
1115 (adev->gfx.mec2_fw->data +
1116 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1117 table_offset = le32_to_cpu(hdr->jt_offset);
1118 table_size = le32_to_cpu(hdr->jt_size);
1119 }
1120
1121 for (i = 0; i < table_size; i ++) {
1122 dst_ptr[bo_offset + i] =
1123 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1124 }
1125
1126 bo_offset += table_size;
1127 }
1128}
1129
1130static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
1131{ 1054{
1132 /* clear state block */ 1055 return 5;
1133 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1134 &adev->gfx.rlc.clear_state_gpu_addr,
1135 (void **)&adev->gfx.rlc.cs_ptr);
1136
1137 /* jump table block */
1138 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1139 &adev->gfx.rlc.cp_table_gpu_addr,
1140 (void **)&adev->gfx.rlc.cp_table_ptr);
1141} 1056}
1142 1057
1143static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1058static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1144{ 1059{
1145 volatile u32 *dst_ptr;
1146 u32 dws;
1147 const struct cs_section_def *cs_data; 1060 const struct cs_section_def *cs_data;
1148 int r; 1061 int r;
1149 1062
@@ -1152,45 +1065,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1152 cs_data = adev->gfx.rlc.cs_data; 1065 cs_data = adev->gfx.rlc.cs_data;
1153 1066
1154 if (cs_data) { 1067 if (cs_data) {
1155 /* clear state block */ 1068 /* init clear state block */
1156 adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); 1069 r = amdgpu_gfx_rlc_init_csb(adev);
1157 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1070 if (r)
1158 AMDGPU_GEM_DOMAIN_VRAM,
1159 &adev->gfx.rlc.clear_state_obj,
1160 &adev->gfx.rlc.clear_state_gpu_addr,
1161 (void **)&adev->gfx.rlc.cs_ptr);
1162 if (r) {
1163 dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
1164 r);
1165 gfx_v9_0_rlc_fini(adev);
1166 return r; 1071 return r;
1167 }
1168 /* set up the cs buffer */
1169 dst_ptr = adev->gfx.rlc.cs_ptr;
1170 gfx_v9_0_get_csb_buffer(adev, dst_ptr);
1171 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1172 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1173 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1174 } 1072 }
1175 1073
1176 if (adev->asic_type == CHIP_RAVEN) { 1074 if (adev->asic_type == CHIP_RAVEN) {
1177 /* TODO: double check the cp_table_size for RV */ 1075 /* TODO: double check the cp_table_size for RV */
1178 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1076 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1179 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1077 r = amdgpu_gfx_rlc_init_cpt(adev);
1180 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1078 if (r)
1181 &adev->gfx.rlc.cp_table_obj,
1182 &adev->gfx.rlc.cp_table_gpu_addr,
1183 (void **)&adev->gfx.rlc.cp_table_ptr);
1184 if (r) {
1185 dev_err(adev->dev,
1186 "(%d) failed to create cp table bo\n", r);
1187 gfx_v9_0_rlc_fini(adev);
1188 return r; 1079 return r;
1189 }
1190
1191 rv_init_cp_jump_table(adev);
1192 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1193 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1194 } 1080 }
1195 1081
1196 switch (adev->asic_type) { 1082 switch (adev->asic_type) {
@@ -1264,7 +1150,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1264 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1150 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1265 1151
1266 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1152 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1267 AMDGPU_GEM_DOMAIN_GTT, 1153 AMDGPU_GEM_DOMAIN_VRAM,
1268 &adev->gfx.mec.hpd_eop_obj, 1154 &adev->gfx.mec.hpd_eop_obj,
1269 &adev->gfx.mec.hpd_eop_gpu_addr, 1155 &adev->gfx.mec.hpd_eop_gpu_addr,
1270 (void **)&hpd); 1156 (void **)&hpd);
@@ -1635,8 +1521,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1635 /* Clear GDS reserved memory */ 1521 /* Clear GDS reserved memory */
1636 r = amdgpu_ring_alloc(ring, 17); 1522 r = amdgpu_ring_alloc(ring, 17);
1637 if (r) { 1523 if (r) {
1638 DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", 1524 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1639 ring->idx, r); 1525 ring->name, r);
1640 return r; 1526 return r;
1641 } 1527 }
1642 1528
@@ -1748,7 +1634,7 @@ static int gfx_v9_0_sw_init(void *handle)
1748 return r; 1634 return r;
1749 } 1635 }
1750 1636
1751 r = gfx_v9_0_rlc_init(adev); 1637 r = adev->gfx.rlc.funcs->init(adev);
1752 if (r) { 1638 if (r) {
1753 DRM_ERROR("Failed to init rlc BOs!\n"); 1639 DRM_ERROR("Failed to init rlc BOs!\n");
1754 return r; 1640 return r;
@@ -2498,12 +2384,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2498 return 0; 2384 return 0;
2499 } 2385 }
2500 2386
2501 gfx_v9_0_rlc_stop(adev); 2387 adev->gfx.rlc.funcs->stop(adev);
2502 2388
2503 /* disable CG */ 2389 /* disable CG */
2504 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2390 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2505 2391
2506 gfx_v9_0_rlc_reset(adev); 2392 adev->gfx.rlc.funcs->reset(adev);
2507 2393
2508 gfx_v9_0_init_pg(adev); 2394 gfx_v9_0_init_pg(adev);
2509 2395
@@ -2514,15 +2400,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2514 return r; 2400 return r;
2515 } 2401 }
2516 2402
2517 if (adev->asic_type == CHIP_RAVEN || 2403 switch (adev->asic_type) {
2518 adev->asic_type == CHIP_VEGA20) { 2404 case CHIP_RAVEN:
2519 if (amdgpu_lbpw != 0) 2405 if (amdgpu_lbpw == 0)
2406 gfx_v9_0_enable_lbpw(adev, false);
2407 else
2408 gfx_v9_0_enable_lbpw(adev, true);
2409 break;
2410 case CHIP_VEGA20:
2411 if (amdgpu_lbpw > 0)
2520 gfx_v9_0_enable_lbpw(adev, true); 2412 gfx_v9_0_enable_lbpw(adev, true);
2521 else 2413 else
2522 gfx_v9_0_enable_lbpw(adev, false); 2414 gfx_v9_0_enable_lbpw(adev, false);
2415 break;
2416 default:
2417 break;
2523 } 2418 }
2524 2419
2525 gfx_v9_0_rlc_start(adev); 2420 adev->gfx.rlc.funcs->start(adev);
2526 2421
2527 return 0; 2422 return 0;
2528} 2423}
@@ -2537,7 +2432,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2537 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2432 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2538 if (!enable) { 2433 if (!enable) {
2539 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2434 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2540 adev->gfx.gfx_ring[i].ready = false; 2435 adev->gfx.gfx_ring[i].sched.ready = false;
2541 } 2436 }
2542 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); 2437 WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
2543 udelay(50); 2438 udelay(50);
@@ -2727,7 +2622,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2727 2622
2728 /* start the ring */ 2623 /* start the ring */
2729 gfx_v9_0_cp_gfx_start(adev); 2624 gfx_v9_0_cp_gfx_start(adev);
2730 ring->ready = true; 2625 ring->sched.ready = true;
2731 2626
2732 return 0; 2627 return 0;
2733} 2628}
@@ -2742,8 +2637,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2742 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 2637 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
2743 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2638 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2744 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2639 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2745 adev->gfx.compute_ring[i].ready = false; 2640 adev->gfx.compute_ring[i].sched.ready = false;
2746 adev->gfx.kiq.ring.ready = false; 2641 adev->gfx.kiq.ring.sched.ready = false;
2747 } 2642 }
2748 udelay(50); 2643 udelay(50);
2749} 2644}
@@ -2866,11 +2761,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2866 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2761 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2867 } 2762 }
2868 2763
2869 r = amdgpu_ring_test_ring(kiq_ring); 2764 r = amdgpu_ring_test_helper(kiq_ring);
2870 if (r) { 2765 if (r)
2871 DRM_ERROR("KCQ enable failed\n"); 2766 DRM_ERROR("KCQ enable failed\n");
2872 kiq_ring->ready = false;
2873 }
2874 2767
2875 return r; 2768 return r;
2876} 2769}
@@ -3249,7 +3142,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3249 amdgpu_bo_kunmap(ring->mqd_obj); 3142 amdgpu_bo_kunmap(ring->mqd_obj);
3250 ring->mqd_ptr = NULL; 3143 ring->mqd_ptr = NULL;
3251 amdgpu_bo_unreserve(ring->mqd_obj); 3144 amdgpu_bo_unreserve(ring->mqd_obj);
3252 ring->ready = true; 3145 ring->sched.ready = true;
3253 return 0; 3146 return 0;
3254} 3147}
3255 3148
@@ -3314,19 +3207,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3314 return r; 3207 return r;
3315 3208
3316 ring = &adev->gfx.gfx_ring[0]; 3209 ring = &adev->gfx.gfx_ring[0];
3317 r = amdgpu_ring_test_ring(ring); 3210 r = amdgpu_ring_test_helper(ring);
3318 if (r) { 3211 if (r)
3319 ring->ready = false;
3320 return r; 3212 return r;
3321 }
3322 3213
3323 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3214 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3324 ring = &adev->gfx.compute_ring[i]; 3215 ring = &adev->gfx.compute_ring[i];
3325 3216 amdgpu_ring_test_helper(ring);
3326 ring->ready = true;
3327 r = amdgpu_ring_test_ring(ring);
3328 if (r)
3329 ring->ready = false;
3330 } 3217 }
3331 3218
3332 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3219 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
@@ -3353,7 +3240,7 @@ static int gfx_v9_0_hw_init(void *handle)
3353 if (r) 3240 if (r)
3354 return r; 3241 return r;
3355 3242
3356 r = gfx_v9_0_rlc_resume(adev); 3243 r = adev->gfx.rlc.funcs->resume(adev);
3357 if (r) 3244 if (r)
3358 return r; 3245 return r;
3359 3246
@@ -3391,7 +3278,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3391 amdgpu_ring_write(kiq_ring, 0); 3278 amdgpu_ring_write(kiq_ring, 0);
3392 amdgpu_ring_write(kiq_ring, 0); 3279 amdgpu_ring_write(kiq_ring, 0);
3393 } 3280 }
3394 r = amdgpu_ring_test_ring(kiq_ring); 3281 r = amdgpu_ring_test_helper(kiq_ring);
3395 if (r) 3282 if (r)
3396 DRM_ERROR("KCQ disable failed\n"); 3283 DRM_ERROR("KCQ disable failed\n");
3397 3284
@@ -3433,7 +3320,7 @@ static int gfx_v9_0_hw_fini(void *handle)
3433 } 3320 }
3434 3321
3435 gfx_v9_0_cp_enable(adev, false); 3322 gfx_v9_0_cp_enable(adev, false);
3436 gfx_v9_0_rlc_stop(adev); 3323 adev->gfx.rlc.funcs->stop(adev);
3437 3324
3438 gfx_v9_0_csb_vram_unpin(adev); 3325 gfx_v9_0_csb_vram_unpin(adev);
3439 3326
@@ -3508,7 +3395,7 @@ static int gfx_v9_0_soft_reset(void *handle)
3508 3395
3509 if (grbm_soft_reset) { 3396 if (grbm_soft_reset) {
3510 /* stop the rlc */ 3397 /* stop the rlc */
3511 gfx_v9_0_rlc_stop(adev); 3398 adev->gfx.rlc.funcs->stop(adev);
3512 3399
3513 /* Disable GFX parsing/prefetching */ 3400 /* Disable GFX parsing/prefetching */
3514 gfx_v9_0_cp_gfx_enable(adev, false); 3401 gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3607,64 +3494,47 @@ static int gfx_v9_0_late_init(void *handle)
3607 return 0; 3494 return 0;
3608} 3495}
3609 3496
3610static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) 3497static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3611{ 3498{
3612 uint32_t rlc_setting, data; 3499 uint32_t rlc_setting;
3613 unsigned i;
3614
3615 if (adev->gfx.rlc.in_safe_mode)
3616 return;
3617 3500
3618 /* if RLC is not enabled, do nothing */ 3501 /* if RLC is not enabled, do nothing */
3619 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3502 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3620 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3503 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3621 return; 3504 return false;
3622
3623 if (adev->cg_flags &
3624 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
3625 AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3626 data = RLC_SAFE_MODE__CMD_MASK;
3627 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3628 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3629 3505
3630 /* wait for RLC_SAFE_MODE */ 3506 return true;
3631 for (i = 0; i < adev->usec_timeout; i++) {
3632 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3633 break;
3634 udelay(1);
3635 }
3636 adev->gfx.rlc.in_safe_mode = true;
3637 }
3638} 3507}
3639 3508
3640static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) 3509static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3641{ 3510{
3642 uint32_t rlc_setting, data; 3511 uint32_t data;
3643 3512 unsigned i;
3644 if (!adev->gfx.rlc.in_safe_mode)
3645 return;
3646 3513
3647 /* if RLC is not enabled, do nothing */ 3514 data = RLC_SAFE_MODE__CMD_MASK;
3648 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3515 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3649 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3516 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3650 return;
3651 3517
3652 if (adev->cg_flags & 3518 /* wait for RLC_SAFE_MODE */
3653 (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 3519 for (i = 0; i < adev->usec_timeout; i++) {
3654 /* 3520 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3655 * Try to exit safe mode only if it is already in safe 3521 break;
3656 * mode. 3522 udelay(1);
3657 */
3658 data = RLC_SAFE_MODE__CMD_MASK;
3659 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3660 adev->gfx.rlc.in_safe_mode = false;
3661 } 3523 }
3662} 3524}
3663 3525
3526static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3527{
3528 uint32_t data;
3529
3530 data = RLC_SAFE_MODE__CMD_MASK;
3531 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3532}
3533
3664static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3534static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3665 bool enable) 3535 bool enable)
3666{ 3536{
3667 gfx_v9_0_enter_rlc_safe_mode(adev); 3537 amdgpu_gfx_rlc_enter_safe_mode(adev);
3668 3538
3669 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3539 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3670 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3540 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3675,7 +3545,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3675 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3545 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3676 } 3546 }
3677 3547
3678 gfx_v9_0_exit_rlc_safe_mode(adev); 3548 amdgpu_gfx_rlc_exit_safe_mode(adev);
3679} 3549}
3680 3550
3681static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3551static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3773,7 +3643,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
3773{ 3643{
3774 uint32_t data, def; 3644 uint32_t data, def;
3775 3645
3776 adev->gfx.rlc.funcs->enter_safe_mode(adev); 3646 amdgpu_gfx_rlc_enter_safe_mode(adev);
3777 3647
3778 /* Enable 3D CGCG/CGLS */ 3648 /* Enable 3D CGCG/CGLS */
3779 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 3649 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
@@ -3813,7 +3683,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
3813 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 3683 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3814 } 3684 }
3815 3685
3816 adev->gfx.rlc.funcs->exit_safe_mode(adev); 3686 amdgpu_gfx_rlc_exit_safe_mode(adev);
3817} 3687}
3818 3688
3819static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 3689static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -3821,7 +3691,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
3821{ 3691{
3822 uint32_t def, data; 3692 uint32_t def, data;
3823 3693
3824 adev->gfx.rlc.funcs->enter_safe_mode(adev); 3694 amdgpu_gfx_rlc_enter_safe_mode(adev);
3825 3695
3826 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 3696 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3827 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3697 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
@@ -3861,7 +3731,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
3861 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 3731 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3862 } 3732 }
3863 3733
3864 adev->gfx.rlc.funcs->exit_safe_mode(adev); 3734 amdgpu_gfx_rlc_exit_safe_mode(adev);
3865} 3735}
3866 3736
3867static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 3737static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -3890,8 +3760,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
3890} 3760}
3891 3761
3892static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 3762static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
3893 .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, 3763 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
3894 .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode 3764 .set_safe_mode = gfx_v9_0_set_safe_mode,
3765 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
3766 .init = gfx_v9_0_rlc_init,
3767 .get_csb_size = gfx_v9_0_get_csb_size,
3768 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
3769 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
3770 .resume = gfx_v9_0_rlc_resume,
3771 .stop = gfx_v9_0_rlc_stop,
3772 .reset = gfx_v9_0_rlc_reset,
3773 .start = gfx_v9_0_rlc_start
3895}; 3774};
3896 3775
3897static int gfx_v9_0_set_powergating_state(void *handle, 3776static int gfx_v9_0_set_powergating_state(void *handle,
@@ -4072,9 +3951,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4072} 3951}
4073 3952
4074static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 3953static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4075 struct amdgpu_ib *ib, 3954 struct amdgpu_job *job,
4076 unsigned vmid, bool ctx_switch) 3955 struct amdgpu_ib *ib,
3956 bool ctx_switch)
4077{ 3957{
3958 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4078 u32 header, control = 0; 3959 u32 header, control = 0;
4079 3960
4080 if (ib->flags & AMDGPU_IB_FLAG_CE) 3961 if (ib->flags & AMDGPU_IB_FLAG_CE)
@@ -4103,20 +3984,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4103} 3984}
4104 3985
4105static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 3986static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4106 struct amdgpu_ib *ib, 3987 struct amdgpu_job *job,
4107 unsigned vmid, bool ctx_switch) 3988 struct amdgpu_ib *ib,
3989 bool ctx_switch)
4108{ 3990{
4109 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 3991 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
3992 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4110 3993
4111 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 3994 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4112 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3995 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4113 amdgpu_ring_write(ring, 3996 amdgpu_ring_write(ring,
4114#ifdef __BIG_ENDIAN 3997#ifdef __BIG_ENDIAN
4115 (2 << 0) | 3998 (2 << 0) |
4116#endif 3999#endif
4117 lower_32_bits(ib->gpu_addr)); 4000 lower_32_bits(ib->gpu_addr));
4118 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4001 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4119 amdgpu_ring_write(ring, control); 4002 amdgpu_ring_write(ring, control);
4120} 4003}
4121 4004
4122static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4005static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@@ -4695,12 +4578,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
4695 return 0; 4578 return 0;
4696} 4579}
4697 4580
4581static void gfx_v9_0_fault(struct amdgpu_device *adev,
4582 struct amdgpu_iv_entry *entry)
4583{
4584 u8 me_id, pipe_id, queue_id;
4585 struct amdgpu_ring *ring;
4586 int i;
4587
4588 me_id = (entry->ring_id & 0x0c) >> 2;
4589 pipe_id = (entry->ring_id & 0x03) >> 0;
4590 queue_id = (entry->ring_id & 0x70) >> 4;
4591
4592 switch (me_id) {
4593 case 0:
4594 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
4595 break;
4596 case 1:
4597 case 2:
4598 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4599 ring = &adev->gfx.compute_ring[i];
4600 if (ring->me == me_id && ring->pipe == pipe_id &&
4601 ring->queue == queue_id)
4602 drm_sched_fault(&ring->sched);
4603 }
4604 break;
4605 }
4606}
4607
4698static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 4608static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
4699 struct amdgpu_irq_src *source, 4609 struct amdgpu_irq_src *source,
4700 struct amdgpu_iv_entry *entry) 4610 struct amdgpu_iv_entry *entry)
4701{ 4611{
4702 DRM_ERROR("Illegal register access in command stream\n"); 4612 DRM_ERROR("Illegal register access in command stream\n");
4703 schedule_work(&adev->reset_work); 4613 gfx_v9_0_fault(adev, entry);
4704 return 0; 4614 return 0;
4705} 4615}
4706 4616
@@ -4709,7 +4619,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
4709 struct amdgpu_iv_entry *entry) 4619 struct amdgpu_iv_entry *entry)
4710{ 4620{
4711 DRM_ERROR("Illegal instruction in command stream\n"); 4621 DRM_ERROR("Illegal instruction in command stream\n");
4712 schedule_work(&adev->reset_work); 4622 gfx_v9_0_fault(adev, entry);
4713 return 0; 4623 return 0;
4714} 4624}
4715 4625
@@ -4836,10 +4746,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4836 2 + /* gfx_v9_0_ring_emit_vm_flush */ 4746 2 + /* gfx_v9_0_ring_emit_vm_flush */
4837 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 4747 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4838 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4748 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
4839 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
4840 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 4749 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
4841 .test_ring = gfx_v9_0_ring_test_ring, 4750 .test_ring = gfx_v9_0_ring_test_ring,
4842 .test_ib = gfx_v9_0_ring_test_ib,
4843 .insert_nop = amdgpu_ring_insert_nop, 4751 .insert_nop = amdgpu_ring_insert_nop,
4844 .pad_ib = amdgpu_ring_generic_pad_ib, 4752 .pad_ib = amdgpu_ring_generic_pad_ib,
4845 .emit_rreg = gfx_v9_0_ring_emit_rreg, 4753 .emit_rreg = gfx_v9_0_ring_emit_rreg,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index ceb7847b504f..f5edddf3b29d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
35 return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; 35 return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;
36} 36}
37 37
38static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) 38void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
39 uint64_t page_table_base)
39{ 40{
40 uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); 41 /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
42 int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
43 - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
41 44
42 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, 45 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
43 lower_32_bits(value)); 46 offset * vmid, lower_32_bits(page_table_base));
44 47
45 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, 48 WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
46 upper_32_bits(value)); 49 offset * vmid, upper_32_bits(page_table_base));
47} 50}
48 51
49static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) 52static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
50{ 53{
51 gfxhub_v1_0_init_gart_pt_regs(adev); 54 uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
55
56 gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
52 57
53 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, 58 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
54 (u32)(adev->gmc.gart_start >> 12)); 59 (u32)(adev->gmc.gart_start >> 12));
@@ -72,7 +77,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
72 77
73 /* Program the system aperture low logical page number. */ 78 /* Program the system aperture low logical page number. */
74 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 79 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
75 min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); 80 min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
76 81
77 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) 82 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
78 /* 83 /*
@@ -82,11 +87,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
82 * to get rid of the VM fault and hardware hang. 87 * to get rid of the VM fault and hardware hang.
83 */ 88 */
84 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 89 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
85 max((adev->gmc.vram_end >> 18) + 0x1, 90 max((adev->gmc.fb_end >> 18) + 0x1,
86 adev->gmc.agp_end >> 18)); 91 adev->gmc.agp_end >> 18));
87 else 92 else
88 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 93 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
89 max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); 94 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
90 95
91 /* Set default page address. */ 96 /* Set default page address. */
92 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start 97 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
index 206e29cad753..92d3a70cd9b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h
@@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
30 bool value); 30 bool value);
31void gfxhub_v1_0_init(struct amdgpu_device *adev); 31void gfxhub_v1_0_init(struct amdgpu_device *adev);
32u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); 32u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev);
33void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
34 uint64_t page_table_base);
33 35
34#endif 36#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e1c2b4e9c7b2..2821d1d846e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
358 return 0; 358 return 0;
359} 359}
360 360
361static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) 361static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
362 uint32_t vmid, uint32_t flush_type)
362{ 363{
363 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 364 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
364} 365}
@@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
580 else 581 else
581 gmc_v6_0_set_fault_enable_default(adev, true); 582 gmc_v6_0_set_fault_enable_default(adev, true);
582 583
583 gmc_v6_0_flush_gpu_tlb(adev, 0); 584 gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
584 dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", 585 dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
585 (unsigned)(adev->gmc.gart_size >> 20), 586 (unsigned)(adev->gmc.gart_size >> 20),
586 (unsigned long long)table_addr); 587 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 910c4ce19cb3..761dcfb2fec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
430 * 430 *
431 * Flush the TLB for the requested page table (CIK). 431 * Flush the TLB for the requested page table (CIK).
432 */ 432 */
433static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) 433static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
434 uint32_t vmid, uint32_t flush_type)
434{ 435{
435 /* bits 0-15 are the VM contexts0-15 */ 436 /* bits 0-15 are the VM contexts0-15 */
436 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 437 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
698 WREG32(mmCHUB_CONTROL, tmp); 699 WREG32(mmCHUB_CONTROL, tmp);
699 } 700 }
700 701
701 gmc_v7_0_flush_gpu_tlb(adev, 0); 702 gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
702 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 703 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
703 (unsigned)(adev->gmc.gart_size >> 20), 704 (unsigned)(adev->gmc.gart_size >> 20),
704 (unsigned long long)table_addr); 705 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1d3265c97b70..531aaf377592 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -611,7 +611,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
611 * Flush the TLB for the requested page table (CIK). 611 * Flush the TLB for the requested page table (CIK).
612 */ 612 */
613static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, 613static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
614 uint32_t vmid) 614 uint32_t vmid, uint32_t flush_type)
615{ 615{
616 /* bits 0-15 are the VM contexts0-15 */ 616 /* bits 0-15 are the VM contexts0-15 */
617 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 617 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
@@ -920,7 +920,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
920 else 920 else
921 gmc_v8_0_set_fault_enable_default(adev, true); 921 gmc_v8_0_set_fault_enable_default(adev, true);
922 922
923 gmc_v8_0_flush_gpu_tlb(adev, 0); 923 gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
924 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 924 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
925 (unsigned)(adev->gmc.gart_size >> 20), 925 (unsigned)(adev->gmc.gart_size >> 20),
926 (unsigned long long)table_addr); 926 (unsigned long long)table_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f35d7a554ad5..811231e4ec53 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -293,14 +293,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
293 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; 293 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
294} 294}
295 295
296static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) 296static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
297 uint32_t flush_type)
297{ 298{
298 u32 req = 0; 299 u32 req = 0;
299 300
300 /* invalidate using legacy mode on vmid*/
301 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, 301 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
302 PER_VMID_INVALIDATE_REQ, 1 << vmid); 302 PER_VMID_INVALIDATE_REQ, 1 << vmid);
303 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); 303 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
304 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); 304 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
305 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); 305 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
306 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); 306 req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
@@ -312,48 +312,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
312 return req; 312 return req;
313} 313}
314 314
315static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
316 uint32_t reg0, uint32_t reg1,
317 uint32_t ref, uint32_t mask)
318{
319 signed long r, cnt = 0;
320 unsigned long flags;
321 uint32_t seq;
322 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
323 struct amdgpu_ring *ring = &kiq->ring;
324
325 spin_lock_irqsave(&kiq->ring_lock, flags);
326
327 amdgpu_ring_alloc(ring, 32);
328 amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
329 ref, mask);
330 amdgpu_fence_emit_polling(ring, &seq);
331 amdgpu_ring_commit(ring);
332 spin_unlock_irqrestore(&kiq->ring_lock, flags);
333
334 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
335
336 /* don't wait anymore for IRQ context */
337 if (r < 1 && in_interrupt())
338 goto failed_kiq;
339
340 might_sleep();
341
342 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
343 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
344 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
345 }
346
347 if (cnt > MAX_KIQ_REG_TRY)
348 goto failed_kiq;
349
350 return 0;
351
352failed_kiq:
353 pr_err("failed to invalidate tlb with kiq\n");
354 return r;
355}
356
357/* 315/*
358 * GART 316 * GART
359 * VMID 0 is the physical GPU addresses as used by the kernel. 317 * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -362,64 +320,47 @@ failed_kiq:
362 */ 320 */
363 321
364/** 322/**
365 * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback 323 * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
366 * 324 *
367 * @adev: amdgpu_device pointer 325 * @adev: amdgpu_device pointer
368 * @vmid: vm instance to flush 326 * @vmid: vm instance to flush
327 * @flush_type: the flush type
369 * 328 *
370 * Flush the TLB for the requested page table. 329 * Flush the TLB for the requested page table using certain type.
371 */ 330 */
372static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, 331static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
373 uint32_t vmid) 332 uint32_t vmid, uint32_t flush_type)
374{ 333{
375 /* Use register 17 for GART */
376 const unsigned eng = 17; 334 const unsigned eng = 17;
377 unsigned i, j; 335 unsigned i, j;
378 int r;
379 336
380 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 337 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
381 struct amdgpu_vmhub *hub = &adev->vmhub[i]; 338 struct amdgpu_vmhub *hub = &adev->vmhub[i];
382 u32 tmp = gmc_v9_0_get_invalidate_req(vmid); 339 u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
383
384 if (adev->gfx.kiq.ring.ready &&
385 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
386 !adev->in_gpu_reset) {
387 r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
388 hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
389 if (!r)
390 continue;
391 }
392 340
393 spin_lock(&adev->gmc.invalidate_lock); 341 if (i == AMDGPU_GFXHUB && !adev->in_gpu_reset &&
342 adev->gfx.kiq.ring.sched.ready &&
343 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
344 uint32_t req = hub->vm_inv_eng0_req + eng;
345 uint32_t ack = hub->vm_inv_eng0_ack + eng;
394 346
395 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); 347 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
396 348 1 << vmid);
397 /* Busy wait for ACK.*/
398 for (j = 0; j < 100; j++) {
399 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
400 tmp &= 1 << vmid;
401 if (tmp)
402 break;
403 cpu_relax();
404 }
405 if (j < 100) {
406 spin_unlock(&adev->gmc.invalidate_lock);
407 continue; 349 continue;
408 } 350 }
409 351
410 /* Wait for ACK with a delay.*/ 352 spin_lock(&adev->gmc.invalidate_lock);
353 WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
411 for (j = 0; j < adev->usec_timeout; j++) { 354 for (j = 0; j < adev->usec_timeout; j++) {
412 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); 355 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
413 tmp &= 1 << vmid; 356 if (tmp & (1 << vmid))
414 if (tmp)
415 break; 357 break;
416 udelay(1); 358 udelay(1);
417 } 359 }
418 if (j < adev->usec_timeout) {
419 spin_unlock(&adev->gmc.invalidate_lock);
420 continue;
421 }
422 spin_unlock(&adev->gmc.invalidate_lock); 360 spin_unlock(&adev->gmc.invalidate_lock);
361 if (j < adev->usec_timeout)
362 continue;
363
423 DRM_ERROR("Timeout waiting for VM flush ACK!\n"); 364 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
424 } 365 }
425} 366}
@@ -429,7 +370,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
429{ 370{
430 struct amdgpu_device *adev = ring->adev; 371 struct amdgpu_device *adev = ring->adev;
431 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; 372 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
432 uint32_t req = gmc_v9_0_get_invalidate_req(vmid); 373 uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
433 unsigned eng = ring->vm_inv_eng; 374 unsigned eng = ring->vm_inv_eng;
434 375
435 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), 376 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
@@ -739,9 +680,8 @@ static int gmc_v9_0_late_init(void *handle)
739 unsigned vmhub = ring->funcs->vmhub; 680 unsigned vmhub = ring->funcs->vmhub;
740 681
741 ring->vm_inv_eng = vm_inv_eng[vmhub]++; 682 ring->vm_inv_eng = vm_inv_eng[vmhub]++;
742 dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", 683 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
743 ring->idx, ring->name, ring->vm_inv_eng, 684 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
744 ring->funcs->vmhub);
745 } 685 }
746 686
747 /* Engine 16 is used for KFD and 17 for GART flushes */ 687 /* Engine 16 is used for KFD and 17 for GART flushes */
@@ -1122,7 +1062,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1122 1062
1123 gfxhub_v1_0_set_fault_enable_default(adev, value); 1063 gfxhub_v1_0_set_fault_enable_default(adev, value);
1124 mmhub_v1_0_set_fault_enable_default(adev, value); 1064 mmhub_v1_0_set_fault_enable_default(adev, value);
1125 gmc_v9_0_flush_gpu_tlb(adev, 0); 1065 gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
1126 1066
1127 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 1067 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1128 (unsigned)(adev->gmc.gart_size >> 20), 1068 (unsigned)(adev->gmc.gart_size >> 20),
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index d0e478f43443..0c9a2c03504e 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
508 pi->caps_db_ramping || 508 pi->caps_db_ramping ||
509 pi->caps_td_ramping || 509 pi->caps_td_ramping ||
510 pi->caps_tcp_ramping) { 510 pi->caps_tcp_ramping) {
511 adev->gfx.rlc.funcs->enter_safe_mode(adev); 511 amdgpu_gfx_rlc_enter_safe_mode(adev);
512 512
513 if (enable) { 513 if (enable) {
514 ret = kv_program_pt_config_registers(adev, didt_config_kv); 514 ret = kv_program_pt_config_registers(adev, didt_config_kv);
515 if (ret) { 515 if (ret) {
516 adev->gfx.rlc.funcs->exit_safe_mode(adev); 516 amdgpu_gfx_rlc_exit_safe_mode(adev);
517 return ret; 517 return ret;
518 } 518 }
519 } 519 }
520 520
521 kv_do_enable_didt(adev, enable); 521 kv_do_enable_didt(adev, enable);
522 522
523 adev->gfx.rlc.funcs->exit_safe_mode(adev); 523 amdgpu_gfx_rlc_exit_safe_mode(adev);
524 } 524 }
525 525
526 return 0; 526 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index fd23ba1226a5..d0d966d6080a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
52 return base; 52 return base;
53} 53}
54 54
55static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) 55void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
56 uint64_t page_table_base)
56{ 57{
57 uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); 58 /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */
59 int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32
60 - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
58 61
59 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, 62 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
60 lower_32_bits(value)); 63 offset * vmid, lower_32_bits(page_table_base));
61 64
62 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, 65 WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
63 upper_32_bits(value)); 66 offset * vmid, upper_32_bits(page_table_base));
64} 67}
65 68
66static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) 69static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
67{ 70{
68 mmhub_v1_0_init_gart_pt_regs(adev); 71 uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
72
73 mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base);
69 74
70 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, 75 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
71 (u32)(adev->gmc.gart_start >> 12)); 76 (u32)(adev->gmc.gart_start >> 12));
@@ -90,7 +95,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
90 95
91 /* Program the system aperture low logical page number. */ 96 /* Program the system aperture low logical page number. */
92 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 97 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
93 min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); 98 min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
94 99
95 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) 100 if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
96 /* 101 /*
@@ -100,11 +105,11 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
100 * to get rid of the VM fault and hardware hang. 105 * to get rid of the VM fault and hardware hang.
101 */ 106 */
102 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 107 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
103 max((adev->gmc.vram_end >> 18) + 0x1, 108 max((adev->gmc.fb_end >> 18) + 0x1,
104 adev->gmc.agp_end >> 18)); 109 adev->gmc.agp_end >> 18));
105 else 110 else
106 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 111 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
107 max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); 112 max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
108 113
109 /* Set default page address. */ 114 /* Set default page address. */
110 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + 115 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index bef3d0c0c117..0de0fdf98c00 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
34void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); 34void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
35void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, 35void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
36 bool enable); 36 bool enable);
37void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
38 uint64_t page_table_base);
37 39
38#endif 40#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 3f3fac2d50cd..e5dd052d9e06 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -34,6 +34,7 @@
34#include "nbio/nbio_7_4_offset.h" 34#include "nbio/nbio_7_4_offset.h"
35 35
36MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); 36MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
37MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
37 38
38/* address block */ 39/* address block */
39#define smnMP1_FIRMWARE_FLAGS 0x3010024 40#define smnMP1_FIRMWARE_FLAGS 0x3010024
@@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
98 const char *chip_name; 99 const char *chip_name;
99 char fw_name[30]; 100 char fw_name[30];
100 int err = 0; 101 int err = 0;
101 const struct psp_firmware_header_v1_0 *hdr; 102 const struct psp_firmware_header_v1_0 *sos_hdr;
103 const struct ta_firmware_header_v1_0 *ta_hdr;
102 104
103 DRM_DEBUG("\n"); 105 DRM_DEBUG("\n");
104 106
@@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
119 if (err) 121 if (err)
120 goto out; 122 goto out;
121 123
122 hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; 124 sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
123 adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version); 125 adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
124 adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version); 126 adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
125 adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes); 127 adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
126 adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) - 128 adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) -
127 le32_to_cpu(hdr->sos_size_bytes); 129 le32_to_cpu(sos_hdr->sos_size_bytes);
128 adev->psp.sys_start_addr = (uint8_t *)hdr + 130 adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
129 le32_to_cpu(hdr->header.ucode_array_offset_bytes); 131 le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
130 adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + 132 adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
131 le32_to_cpu(hdr->sos_offset_bytes); 133 le32_to_cpu(sos_hdr->sos_offset_bytes);
134
135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
136 err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
137 if (err)
138 goto out;
139
140 err = amdgpu_ucode_validate(adev->psp.ta_fw);
141 if (err)
142 goto out;
143
144 ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
145 adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
146 adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
147 adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
148 le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
149
132 return 0; 150 return 0;
133out: 151out:
134 if (err) { 152 if (err) {
@@ -167,7 +185,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
167 /* Copy PSP System Driver binary to memory */ 185 /* Copy PSP System Driver binary to memory */
168 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); 186 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
169 187
170 /* Provide the sys driver to bootrom */ 188 /* Provide the sys driver to bootloader */
171 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, 189 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
172 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 190 (uint32_t)(psp->fw_pri_mc_addr >> 20));
173 psp_gfxdrv_command_reg = 1 << 16; 191 psp_gfxdrv_command_reg = 1 << 16;
@@ -208,7 +226,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
208 /* Copy Secure OS binary to PSP memory */ 226 /* Copy Secure OS binary to PSP memory */
209 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); 227 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
210 228
211 /* Provide the PSP secure OS to bootrom */ 229 /* Provide the PSP secure OS to bootloader */
212 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, 230 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
213 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 231 (uint32_t)(psp->fw_pri_mc_addr >> 20));
214 psp_gfxdrv_command_reg = 2 << 16; 232 psp_gfxdrv_command_reg = 2 << 16;
@@ -552,24 +570,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
552static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, 570static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
553 int number_devices, struct psp_xgmi_topology_info *topology) 571 int number_devices, struct psp_xgmi_topology_info *topology)
554{ 572{
573 struct ta_xgmi_shared_memory *xgmi_cmd;
574 struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
575 struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
576 int i;
577 int ret;
578
579 if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
580 return -EINVAL;
581
582 xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
583 memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
584
585 /* Fill in the shared memory with topology information as input */
586 topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
587 xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO;
588 topology_info_input->num_nodes = number_devices;
589
590 for (i = 0; i < topology_info_input->num_nodes; i++) {
591 topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
592 topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
593 topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
594 topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
595 }
596
597 /* Invoke xgmi ta to get the topology information */
598 ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO);
599 if (ret)
600 return ret;
601
602 /* Read the output topology information from the shared memory */
603 topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
604 topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
605 for (i = 0; i < topology->num_nodes; i++) {
606 topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
607 topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
608 topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled;
609 topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine;
610 }
611
555 return 0; 612 return 0;
556} 613}
557 614
558static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, 615static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
559 int number_devices, struct psp_xgmi_topology_info *topology) 616 int number_devices, struct psp_xgmi_topology_info *topology)
560{ 617{
561 return 0; 618 struct ta_xgmi_shared_memory *xgmi_cmd;
619 struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
620 int i;
621
622 if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
623 return -EINVAL;
624
625 xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
626 memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
627
628 topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
629 xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
630 topology_info_input->num_nodes = number_devices;
631
632 for (i = 0; i < topology_info_input->num_nodes; i++) {
633 topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
634 topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
635 topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
636 topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
637 }
638
639 /* Invoke xgmi ta to set topology information */
640 return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
562} 641}
563 642
564static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp) 643static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
565{ 644{
566 u64 hive_id = 0; 645 struct ta_xgmi_shared_memory *xgmi_cmd;
646 int ret;
647
648 xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
649 memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
650
651 xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
652
653 /* Invoke xgmi ta to get hive id */
654 ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
655 if (ret)
656 return 0;
657 else
658 return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
659}
660
661static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
662{
663 struct ta_xgmi_shared_memory *xgmi_cmd;
664 int ret;
665
666 xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf;
667 memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
567 668
568 /* Remove me when we can get correct hive_id through PSP */ 669 xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
569 if (psp->adev->gmc.xgmi.num_physical_nodes)
570 hive_id = 0x123456789abcdef;
571 670
572 return hive_id; 671 /* Invoke xgmi ta to get the node id */
672 ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
673 if (ret)
674 return 0;
675 else
676 return xgmi_cmd->xgmi_out_message.get_node_id.node_id;
573} 677}
574 678
575static const struct psp_funcs psp_v11_0_funcs = { 679static const struct psp_funcs psp_v11_0_funcs = {
@@ -587,6 +691,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
587 .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, 691 .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
588 .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, 692 .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
589 .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, 693 .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
694 .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
590}; 695};
591 696
592void psp_v11_0_set_psp_funcs(struct psp_context *psp) 697void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index e1ebf770c303..9cea0bbe4525 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
194 /* Copy PSP System Driver binary to memory */ 194 /* Copy PSP System Driver binary to memory */
195 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); 195 memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
196 196
197 /* Provide the sys driver to bootrom */ 197 /* Provide the sys driver to bootloader */
198 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, 198 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
199 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 199 (uint32_t)(psp->fw_pri_mc_addr >> 20));
200 psp_gfxdrv_command_reg = 1 << 16; 200 psp_gfxdrv_command_reg = 1 << 16;
@@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
254 /* Copy Secure OS binary to PSP memory */ 254 /* Copy Secure OS binary to PSP memory */
255 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); 255 memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
256 256
257 /* Provide the PSP secure OS to bootrom */ 257 /* Provide the PSP secure OS to bootloader */
258 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, 258 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
259 (uint32_t)(psp->fw_pri_mc_addr >> 20)); 259 (uint32_t)(psp->fw_pri_mc_addr >> 20));
260 psp_gfxdrv_command_reg = 2 << 16; 260 psp_gfxdrv_command_reg = 2 << 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 2d4770e173dd..9f3cb2aec7c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
225 225
226static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 226static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
227{ 227{
228 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 228 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
229 int i; 229 int i;
230 230
231 for (i = 0; i < count; i++) 231 for (i = 0; i < count; i++)
@@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
245 * Schedule an IB in the DMA ring (VI). 245 * Schedule an IB in the DMA ring (VI).
246 */ 246 */
247static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, 247static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
248 struct amdgpu_job *job,
248 struct amdgpu_ib *ib, 249 struct amdgpu_ib *ib,
249 unsigned vmid, bool ctx_switch) 250 bool ctx_switch)
250{ 251{
252 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
253
251 /* IB packet must end on a 8 DW boundary */ 254 /* IB packet must end on a 8 DW boundary */
252 sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 255 sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
253 256
@@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
349 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 352 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
350 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 353 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
351 } 354 }
352 sdma0->ready = false; 355 sdma0->sched.ready = false;
353 sdma1->ready = false; 356 sdma1->sched.ready = false;
354} 357}
355 358
356/** 359/**
@@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
471 /* enable DMA IBs */ 474 /* enable DMA IBs */
472 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 475 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
473 476
474 ring->ready = true; 477 ring->sched.ready = true;
475 } 478 }
476 479
477 sdma_v2_4_enable(adev, true); 480 sdma_v2_4_enable(adev, true);
478 for (i = 0; i < adev->sdma.num_instances; i++) { 481 for (i = 0; i < adev->sdma.num_instances; i++) {
479 ring = &adev->sdma.instance[i].ring; 482 ring = &adev->sdma.instance[i].ring;
480 r = amdgpu_ring_test_ring(ring); 483 r = amdgpu_ring_test_helper(ring);
481 if (r) { 484 if (r)
482 ring->ready = false;
483 return r; 485 return r;
484 }
485 486
486 if (adev->mman.buffer_funcs_ring == ring) 487 if (adev->mman.buffer_funcs_ring == ring)
487 amdgpu_ttm_set_buffer_funcs_status(adev, true); 488 amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
550 u64 gpu_addr; 551 u64 gpu_addr;
551 552
552 r = amdgpu_device_wb_get(adev, &index); 553 r = amdgpu_device_wb_get(adev, &index);
553 if (r) { 554 if (r)
554 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
555 return r; 555 return r;
556 }
557 556
558 gpu_addr = adev->wb.gpu_addr + (index * 4); 557 gpu_addr = adev->wb.gpu_addr + (index * 4);
559 tmp = 0xCAFEDEAD; 558 tmp = 0xCAFEDEAD;
560 adev->wb.wb[index] = cpu_to_le32(tmp); 559 adev->wb.wb[index] = cpu_to_le32(tmp);
561 560
562 r = amdgpu_ring_alloc(ring, 5); 561 r = amdgpu_ring_alloc(ring, 5);
563 if (r) { 562 if (r)
564 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 563 goto error_free_wb;
565 amdgpu_device_wb_free(adev, index);
566 return r;
567 }
568 564
569 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 565 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
570 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 566 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
581 DRM_UDELAY(1); 577 DRM_UDELAY(1);
582 } 578 }
583 579
584 if (i < adev->usec_timeout) { 580 if (i >= adev->usec_timeout)
585 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 581 r = -ETIMEDOUT;
586 } else {
587 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
588 ring->idx, tmp);
589 r = -EINVAL;
590 }
591 amdgpu_device_wb_free(adev, index);
592 582
583error_free_wb:
584 amdgpu_device_wb_free(adev, index);
593 return r; 585 return r;
594} 586}
595 587
@@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
612 long r; 604 long r;
613 605
614 r = amdgpu_device_wb_get(adev, &index); 606 r = amdgpu_device_wb_get(adev, &index);
615 if (r) { 607 if (r)
616 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
617 return r; 608 return r;
618 }
619 609
620 gpu_addr = adev->wb.gpu_addr + (index * 4); 610 gpu_addr = adev->wb.gpu_addr + (index * 4);
621 tmp = 0xCAFEDEAD; 611 tmp = 0xCAFEDEAD;
622 adev->wb.wb[index] = cpu_to_le32(tmp); 612 adev->wb.wb[index] = cpu_to_le32(tmp);
623 memset(&ib, 0, sizeof(ib)); 613 memset(&ib, 0, sizeof(ib));
624 r = amdgpu_ib_get(adev, NULL, 256, &ib); 614 r = amdgpu_ib_get(adev, NULL, 256, &ib);
625 if (r) { 615 if (r)
626 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
627 goto err0; 616 goto err0;
628 }
629 617
630 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 618 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
631 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 619 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
644 632
645 r = dma_fence_wait_timeout(f, false, timeout); 633 r = dma_fence_wait_timeout(f, false, timeout);
646 if (r == 0) { 634 if (r == 0) {
647 DRM_ERROR("amdgpu: IB test timed out\n");
648 r = -ETIMEDOUT; 635 r = -ETIMEDOUT;
649 goto err1; 636 goto err1;
650 } else if (r < 0) { 637 } else if (r < 0) {
651 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
652 goto err1; 638 goto err1;
653 } 639 }
654 tmp = le32_to_cpu(adev->wb.wb[index]); 640 tmp = le32_to_cpu(adev->wb.wb[index]);
655 if (tmp == 0xDEADBEEF) { 641 if (tmp == 0xDEADBEEF)
656 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
657 r = 0; 642 r = 0;
658 } else { 643 else
659 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
660 r = -EINVAL; 644 r = -EINVAL;
661 }
662 645
663err1: 646err1:
664 amdgpu_ib_free(adev, &ib, NULL); 647 amdgpu_ib_free(adev, &ib, NULL);
@@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
760 */ 743 */
761static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 744static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
762{ 745{
763 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 746 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
764 u32 pad_count; 747 u32 pad_count;
765 int i; 748 int i;
766 749
@@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
1105 struct amdgpu_irq_src *source, 1088 struct amdgpu_irq_src *source,
1106 struct amdgpu_iv_entry *entry) 1089 struct amdgpu_iv_entry *entry)
1107{ 1090{
1091 u8 instance_id, queue_id;
1092
1108 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1093 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1109 schedule_work(&adev->reset_work); 1094 instance_id = (entry->ring_id & 0x3) >> 0;
1095 queue_id = (entry->ring_id & 0xc) >> 2;
1096
1097 if (instance_id <= 1 && queue_id == 0)
1098 drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
1110 return 0; 1099 return 0;
1111} 1100}
1112 1101
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 6fb3edaba0ec..b6a25f92d566 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
399 399
400static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 400static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
401{ 401{
402 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 402 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
403 int i; 403 int i;
404 404
405 for (i = 0; i < count; i++) 405 for (i = 0; i < count; i++)
@@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
419 * Schedule an IB in the DMA ring (VI). 419 * Schedule an IB in the DMA ring (VI).
420 */ 420 */
421static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 421static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
422 struct amdgpu_job *job,
422 struct amdgpu_ib *ib, 423 struct amdgpu_ib *ib,
423 unsigned vmid, bool ctx_switch) 424 bool ctx_switch)
424{ 425{
426 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
427
425 /* IB packet must end on a 8 DW boundary */ 428 /* IB packet must end on a 8 DW boundary */
426 sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 429 sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
427 430
@@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
523 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 526 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
524 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 527 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
525 } 528 }
526 sdma0->ready = false; 529 sdma0->sched.ready = false;
527 sdma1->ready = false; 530 sdma1->sched.ready = false;
528} 531}
529 532
530/** 533/**
@@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
739 /* enable DMA IBs */ 742 /* enable DMA IBs */
740 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); 743 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
741 744
742 ring->ready = true; 745 ring->sched.ready = true;
743 } 746 }
744 747
745 /* unhalt the MEs */ 748 /* unhalt the MEs */
@@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
749 752
750 for (i = 0; i < adev->sdma.num_instances; i++) { 753 for (i = 0; i < adev->sdma.num_instances; i++) {
751 ring = &adev->sdma.instance[i].ring; 754 ring = &adev->sdma.instance[i].ring;
752 r = amdgpu_ring_test_ring(ring); 755 r = amdgpu_ring_test_helper(ring);
753 if (r) { 756 if (r)
754 ring->ready = false;
755 return r; 757 return r;
756 }
757 758
758 if (adev->mman.buffer_funcs_ring == ring) 759 if (adev->mman.buffer_funcs_ring == ring)
759 amdgpu_ttm_set_buffer_funcs_status(adev, true); 760 amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
822 u64 gpu_addr; 823 u64 gpu_addr;
823 824
824 r = amdgpu_device_wb_get(adev, &index); 825 r = amdgpu_device_wb_get(adev, &index);
825 if (r) { 826 if (r)
826 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
827 return r; 827 return r;
828 }
829 828
830 gpu_addr = adev->wb.gpu_addr + (index * 4); 829 gpu_addr = adev->wb.gpu_addr + (index * 4);
831 tmp = 0xCAFEDEAD; 830 tmp = 0xCAFEDEAD;
832 adev->wb.wb[index] = cpu_to_le32(tmp); 831 adev->wb.wb[index] = cpu_to_le32(tmp);
833 832
834 r = amdgpu_ring_alloc(ring, 5); 833 r = amdgpu_ring_alloc(ring, 5);
835 if (r) { 834 if (r)
836 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 835 goto error_free_wb;
837 amdgpu_device_wb_free(adev, index);
838 return r;
839 }
840 836
841 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 837 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
842 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 838 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
853 DRM_UDELAY(1); 849 DRM_UDELAY(1);
854 } 850 }
855 851
856 if (i < adev->usec_timeout) { 852 if (i >= adev->usec_timeout)
857 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 853 r = -ETIMEDOUT;
858 } else {
859 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
860 ring->idx, tmp);
861 r = -EINVAL;
862 }
863 amdgpu_device_wb_free(adev, index);
864 854
855error_free_wb:
856 amdgpu_device_wb_free(adev, index);
865 return r; 857 return r;
866} 858}
867 859
@@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
884 long r; 876 long r;
885 877
886 r = amdgpu_device_wb_get(adev, &index); 878 r = amdgpu_device_wb_get(adev, &index);
887 if (r) { 879 if (r)
888 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
889 return r; 880 return r;
890 }
891 881
892 gpu_addr = adev->wb.gpu_addr + (index * 4); 882 gpu_addr = adev->wb.gpu_addr + (index * 4);
893 tmp = 0xCAFEDEAD; 883 tmp = 0xCAFEDEAD;
894 adev->wb.wb[index] = cpu_to_le32(tmp); 884 adev->wb.wb[index] = cpu_to_le32(tmp);
895 memset(&ib, 0, sizeof(ib)); 885 memset(&ib, 0, sizeof(ib));
896 r = amdgpu_ib_get(adev, NULL, 256, &ib); 886 r = amdgpu_ib_get(adev, NULL, 256, &ib);
897 if (r) { 887 if (r)
898 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
899 goto err0; 888 goto err0;
900 }
901 889
902 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 890 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
903 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 891 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
916 904
917 r = dma_fence_wait_timeout(f, false, timeout); 905 r = dma_fence_wait_timeout(f, false, timeout);
918 if (r == 0) { 906 if (r == 0) {
919 DRM_ERROR("amdgpu: IB test timed out\n");
920 r = -ETIMEDOUT; 907 r = -ETIMEDOUT;
921 goto err1; 908 goto err1;
922 } else if (r < 0) { 909 } else if (r < 0) {
923 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
924 goto err1; 910 goto err1;
925 } 911 }
926 tmp = le32_to_cpu(adev->wb.wb[index]); 912 tmp = le32_to_cpu(adev->wb.wb[index]);
927 if (tmp == 0xDEADBEEF) { 913 if (tmp == 0xDEADBEEF)
928 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
929 r = 0; 914 r = 0;
930 } else { 915 else
931 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
932 r = -EINVAL; 916 r = -EINVAL;
933 }
934err1: 917err1:
935 amdgpu_ib_free(adev, &ib, NULL); 918 amdgpu_ib_free(adev, &ib, NULL);
936 dma_fence_put(f); 919 dma_fence_put(f);
@@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
1031 */ 1014 */
1032static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1015static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1033{ 1016{
1034 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 1017 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1035 u32 pad_count; 1018 u32 pad_count;
1036 int i; 1019 int i;
1037 1020
@@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1440 struct amdgpu_irq_src *source, 1423 struct amdgpu_irq_src *source,
1441 struct amdgpu_iv_entry *entry) 1424 struct amdgpu_iv_entry *entry)
1442{ 1425{
1426 u8 instance_id, queue_id;
1427
1443 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1428 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1444 schedule_work(&adev->reset_work); 1429 instance_id = (entry->ring_id & 0x3) >> 0;
1430 queue_id = (entry->ring_id & 0xc) >> 2;
1431
1432 if (instance_id <= 1 && queue_id == 0)
1433 drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
1445 return 0; 1434 return 0;
1446} 1435}
1447 1436
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7a8c9172d30a..f4490cdd9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
54#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L 54#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
55#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L 55#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
56 56
57#define WREG32_SDMA(instance, offset, value) \
58 WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
59#define RREG32_SDMA(instance, offset) \
60 RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
61
57static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); 62static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
58static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); 63static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
59static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); 64static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -367,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
367 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); 372 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
368 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); 373 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
369 } else { 374 } else {
370 u32 lowbit, highbit; 375 wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI);
371
372 lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
373 highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
374
375 DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
376 ring->me, highbit, lowbit);
377 wptr = highbit;
378 wptr = wptr << 32; 376 wptr = wptr << 32;
379 wptr |= lowbit; 377 wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR);
378 DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
379 ring->me, wptr);
380 } 380 }
381 381
382 return wptr >> 2; 382 return wptr >> 2;
@@ -417,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
417 lower_32_bits(ring->wptr << 2), 417 lower_32_bits(ring->wptr << 2),
418 ring->me, 418 ring->me,
419 upper_32_bits(ring->wptr << 2)); 419 upper_32_bits(ring->wptr << 2));
420 WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); 420 WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
421 WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); 421 lower_32_bits(ring->wptr << 2));
422 WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
423 upper_32_bits(ring->wptr << 2));
424 }
425}
426
427/**
428 * sdma_v4_0_page_ring_get_wptr - get the current write pointer
429 *
430 * @ring: amdgpu ring pointer
431 *
432 * Get the current wptr from the hardware (VEGA10+).
433 */
434static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
435{
436 struct amdgpu_device *adev = ring->adev;
437 u64 wptr;
438
439 if (ring->use_doorbell) {
440 /* XXX check if swapping is necessary on BE */
441 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
442 } else {
443 wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
444 wptr = wptr << 32;
445 wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
446 }
447
448 return wptr >> 2;
449}
450
451/**
452 * sdma_v4_0_ring_set_wptr - commit the write pointer
453 *
454 * @ring: amdgpu ring pointer
455 *
456 * Write the wptr back to the hardware (VEGA10+).
457 */
458static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
459{
460 struct amdgpu_device *adev = ring->adev;
461
462 if (ring->use_doorbell) {
463 u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
464
465 /* XXX check if swapping is necessary on BE */
466 WRITE_ONCE(*wb, (ring->wptr << 2));
467 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
468 } else {
469 uint64_t wptr = ring->wptr << 2;
470
471 WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
472 lower_32_bits(wptr));
473 WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
474 upper_32_bits(wptr));
422 } 475 }
423} 476}
424 477
425static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 478static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
426{ 479{
427 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 480 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
428 int i; 481 int i;
429 482
430 for (i = 0; i < count; i++) 483 for (i = 0; i < count; i++)
@@ -444,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
444 * Schedule an IB in the DMA ring (VEGA10). 497 * Schedule an IB in the DMA ring (VEGA10).
445 */ 498 */
446static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 499static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
447 struct amdgpu_ib *ib, 500 struct amdgpu_job *job,
448 unsigned vmid, bool ctx_switch) 501 struct amdgpu_ib *ib,
502 bool ctx_switch)
449{ 503{
504 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
505
450 /* IB packet must end on a 8 DW boundary */ 506 /* IB packet must end on a 8 DW boundary */
451 sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); 507 sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
452 508
@@ -568,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
568 amdgpu_ttm_set_buffer_funcs_status(adev, false); 624 amdgpu_ttm_set_buffer_funcs_status(adev, false);
569 625
570 for (i = 0; i < adev->sdma.num_instances; i++) { 626 for (i = 0; i < adev->sdma.num_instances; i++) {
571 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 627 rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
572 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 628 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
573 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 629 WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
574 ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 630 ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
575 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); 631 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
576 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); 632 WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
577 } 633 }
578 634
579 sdma0->ready = false; 635 sdma0->sched.ready = false;
580 sdma1->ready = false; 636 sdma1->sched.ready = false;
581} 637}
582 638
583/** 639/**
@@ -593,6 +649,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
593} 649}
594 650
595/** 651/**
652 * sdma_v4_0_page_stop - stop the page async dma engines
653 *
654 * @adev: amdgpu_device pointer
655 *
656 * Stop the page async dma ring buffers (VEGA10).
657 */
658static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
659{
660 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
661 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
662 u32 rb_cntl, ib_cntl;
663 int i;
664
665 if ((adev->mman.buffer_funcs_ring == sdma0) ||
666 (adev->mman.buffer_funcs_ring == sdma1))
667 amdgpu_ttm_set_buffer_funcs_status(adev, false);
668
669 for (i = 0; i < adev->sdma.num_instances; i++) {
670 rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
671 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
672 RB_ENABLE, 0);
673 WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
674 ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
675 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
676 IB_ENABLE, 0);
677 WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
678 }
679
680 sdma0->sched.ready = false;
681 sdma1->sched.ready = false;
682}
683
684/**
596 * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch 685 * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
597 * 686 *
598 * @adev: amdgpu_device pointer 687 * @adev: amdgpu_device pointer
@@ -630,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
630 } 719 }
631 720
632 for (i = 0; i < adev->sdma.num_instances; i++) { 721 for (i = 0; i < adev->sdma.num_instances; i++) {
633 f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); 722 f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
634 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 723 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
635 AUTO_CTXSW_ENABLE, enable ? 1 : 0); 724 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
636 if (enable && amdgpu_sdma_phase_quantum) { 725 if (enable && amdgpu_sdma_phase_quantum) {
637 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), 726 WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
638 phase_quantum); 727 WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
639 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), 728 WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
640 phase_quantum);
641 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
642 phase_quantum);
643 } 729 }
644 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); 730 WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
645 } 731 }
646 732
647} 733}
@@ -662,156 +748,217 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
662 if (enable == false) { 748 if (enable == false) {
663 sdma_v4_0_gfx_stop(adev); 749 sdma_v4_0_gfx_stop(adev);
664 sdma_v4_0_rlc_stop(adev); 750 sdma_v4_0_rlc_stop(adev);
751 if (adev->sdma.has_page_queue)
752 sdma_v4_0_page_stop(adev);
665 } 753 }
666 754
667 for (i = 0; i < adev->sdma.num_instances; i++) { 755 for (i = 0; i < adev->sdma.num_instances; i++) {
668 f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 756 f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
669 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); 757 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
670 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); 758 WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
671 } 759 }
672} 760}
673 761
674/** 762/**
763 * sdma_v4_0_rb_cntl - get parameters for rb_cntl
764 */
765static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
766{
767 /* Set ring buffer size in dwords */
768 uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
769
770 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
771#ifdef __BIG_ENDIAN
772 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
773 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
774 RPTR_WRITEBACK_SWAP_ENABLE, 1);
775#endif
776 return rb_cntl;
777}
778
779/**
675 * sdma_v4_0_gfx_resume - setup and start the async dma engines 780 * sdma_v4_0_gfx_resume - setup and start the async dma engines
676 * 781 *
677 * @adev: amdgpu_device pointer 782 * @adev: amdgpu_device pointer
783 * @i: instance to resume
678 * 784 *
679 * Set up the gfx DMA ring buffers and enable them (VEGA10). 785 * Set up the gfx DMA ring buffers and enable them (VEGA10).
680 * Returns 0 for success, error for failure. 786 * Returns 0 for success, error for failure.
681 */ 787 */
682static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) 788static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
683{ 789{
684 struct amdgpu_ring *ring; 790 struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
685 u32 rb_cntl, ib_cntl, wptr_poll_cntl; 791 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
686 u32 rb_bufsz;
687 u32 wb_offset; 792 u32 wb_offset;
688 u32 doorbell; 793 u32 doorbell;
689 u32 doorbell_offset; 794 u32 doorbell_offset;
690 u32 temp;
691 u64 wptr_gpu_addr; 795 u64 wptr_gpu_addr;
692 int i, r;
693
694 for (i = 0; i < adev->sdma.num_instances; i++) {
695 ring = &adev->sdma.instance[i].ring;
696 wb_offset = (ring->rptr_offs * 4);
697 796
698 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); 797 wb_offset = (ring->rptr_offs * 4);
699 798
700 /* Set ring buffer size in dwords */ 799 rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
701 rb_bufsz = order_base_2(ring->ring_size / 4); 800 rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
702 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 801 WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
703 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
704#ifdef __BIG_ENDIAN
705 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
706 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
707 RPTR_WRITEBACK_SWAP_ENABLE, 1);
708#endif
709 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
710 802
711 /* Initialize the ring buffer's read and write pointers */ 803 /* Initialize the ring buffer's read and write pointers */
712 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); 804 WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
713 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); 805 WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
714 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); 806 WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
715 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); 807 WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
716 808
717 /* set the wb address whether it's enabled or not */ 809 /* set the wb address whether it's enabled or not */
718 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), 810 WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
719 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); 811 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
720 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), 812 WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
721 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); 813 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
722 814
723 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 815 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
816 RPTR_WRITEBACK_ENABLE, 1);
724 817
725 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); 818 WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
726 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); 819 WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
727 820
728 ring->wptr = 0; 821 ring->wptr = 0;
729 822
730 /* before programing wptr to a less value, need set minor_ptr_update first */ 823 /* before programing wptr to a less value, need set minor_ptr_update first */
731 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); 824 WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
732 825
733 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ 826 doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
734 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); 827 doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
735 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
736 }
737 828
738 doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); 829 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
739 doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); 830 ring->use_doorbell);
740 831 doorbell_offset = REG_SET_FIELD(doorbell_offset,
741 if (ring->use_doorbell) { 832 SDMA0_GFX_DOORBELL_OFFSET,
742 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
743 doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
744 OFFSET, ring->doorbell_index); 833 OFFSET, ring->doorbell_index);
745 } else { 834 WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
746 doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); 835 WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
747 } 836 adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
748 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); 837 ring->doorbell_index);
749 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); 838
750 adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, 839 sdma_v4_0_ring_set_wptr(ring);
751 ring->doorbell_index); 840
841 /* set minor_ptr_update to 0 after wptr programed */
842 WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
843
844 /* setup the wptr shadow polling */
845 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
846 WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
847 lower_32_bits(wptr_gpu_addr));
848 WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
849 upper_32_bits(wptr_gpu_addr));
850 wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
851 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
852 SDMA0_GFX_RB_WPTR_POLL_CNTL,
853 F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
854 WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
855
856 /* enable DMA RB */
857 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
858 WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
859
860 ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
861 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
862#ifdef __BIG_ENDIAN
863 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
864#endif
865 /* enable DMA IBs */
866 WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
752 867
753 if (amdgpu_sriov_vf(adev)) 868 ring->sched.ready = true;
754 sdma_v4_0_ring_set_wptr(ring); 869}
755 870
756 /* set minor_ptr_update to 0 after wptr programed */ 871/**
757 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); 872 * sdma_v4_0_page_resume - setup and start the async dma engines
873 *
874 * @adev: amdgpu_device pointer
875 * @i: instance to resume
876 *
877 * Set up the page DMA ring buffers and enable them (VEGA10).
878 * Returns 0 for success, error for failure.
879 */
880static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
881{
882 struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
883 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
884 u32 wb_offset;
885 u32 doorbell;
886 u32 doorbell_offset;
887 u64 wptr_gpu_addr;
758 888
759 /* set utc l1 enable flag always to 1 */ 889 wb_offset = (ring->rptr_offs * 4);
760 temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
761 temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
762 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
763 890
764 if (!amdgpu_sriov_vf(adev)) { 891 rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
765 /* unhalt engine */ 892 rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
766 temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); 893 WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
767 temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
768 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
769 }
770 894
771 /* setup the wptr shadow polling */ 895 /* Initialize the ring buffer's read and write pointers */
772 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 896 WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
773 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), 897 WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
774 lower_32_bits(wptr_gpu_addr)); 898 WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
775 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), 899 WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
776 upper_32_bits(wptr_gpu_addr));
777 wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
778 if (amdgpu_sriov_vf(adev))
779 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
780 else
781 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
782 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
783 900
784 /* enable DMA RB */ 901 /* set the wb address whether it's enabled or not */
785 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 902 WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
786 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); 903 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
904 WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
905 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
787 906
788 ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); 907 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
789 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); 908 RPTR_WRITEBACK_ENABLE, 1);
790#ifdef __BIG_ENDIAN
791 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
792#endif
793 /* enable DMA IBs */
794 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
795 909
796 ring->ready = true; 910 WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
911 WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
797 912
798 if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ 913 ring->wptr = 0;
799 sdma_v4_0_ctx_switch_enable(adev, true);
800 sdma_v4_0_enable(adev, true);
801 }
802 914
803 r = amdgpu_ring_test_ring(ring); 915 /* before programing wptr to a less value, need set minor_ptr_update first */
804 if (r) { 916 WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
805 ring->ready = false;
806 return r;
807 }
808 917
809 if (adev->mman.buffer_funcs_ring == ring) 918 doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
810 amdgpu_ttm_set_buffer_funcs_status(adev, true); 919 doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
811 920
812 } 921 doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
922 ring->use_doorbell);
923 doorbell_offset = REG_SET_FIELD(doorbell_offset,
924 SDMA0_PAGE_DOORBELL_OFFSET,
925 OFFSET, ring->doorbell_index);
926 WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
927 WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
928 /* TODO: enable doorbell support */
929 /*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
930 ring->doorbell_index);*/
931
932 sdma_v4_0_ring_set_wptr(ring);
933
934 /* set minor_ptr_update to 0 after wptr programed */
935 WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
936
937 /* setup the wptr shadow polling */
938 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
939 WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
940 lower_32_bits(wptr_gpu_addr));
941 WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
942 upper_32_bits(wptr_gpu_addr));
943 wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
944 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
945 SDMA0_PAGE_RB_WPTR_POLL_CNTL,
946 F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
947 WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
948
949 /* enable DMA RB */
950 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
951 WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
952
953 ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
954 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
955#ifdef __BIG_ENDIAN
956 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
957#endif
958 /* enable DMA IBs */
959 WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
813 960
814 return 0; 961 ring->sched.ready = true;
815} 962}
816 963
817static void 964static void
@@ -922,12 +1069,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
922 (adev->sdma.instance[i].fw->data + 1069 (adev->sdma.instance[i].fw->data +
923 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1070 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
924 1071
925 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); 1072 WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
926 1073
927 for (j = 0; j < fw_size; j++) 1074 for (j = 0; j < fw_size; j++)
928 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); 1075 WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
1076 le32_to_cpup(fw_data++));
929 1077
930 WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); 1078 WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
1079 adev->sdma.instance[i].fw_version);
931 } 1080 }
932 1081
933 return 0; 1082 return 0;
@@ -943,33 +1092,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
943 */ 1092 */
944static int sdma_v4_0_start(struct amdgpu_device *adev) 1093static int sdma_v4_0_start(struct amdgpu_device *adev)
945{ 1094{
946 int r = 0; 1095 struct amdgpu_ring *ring;
1096 int i, r;
947 1097
948 if (amdgpu_sriov_vf(adev)) { 1098 if (amdgpu_sriov_vf(adev)) {
949 sdma_v4_0_ctx_switch_enable(adev, false); 1099 sdma_v4_0_ctx_switch_enable(adev, false);
950 sdma_v4_0_enable(adev, false); 1100 sdma_v4_0_enable(adev, false);
1101 } else {
951 1102
952 /* set RB registers */ 1103 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
953 r = sdma_v4_0_gfx_resume(adev); 1104 r = sdma_v4_0_load_microcode(adev);
954 return r; 1105 if (r)
1106 return r;
1107 }
1108
1109 /* unhalt the MEs */
1110 sdma_v4_0_enable(adev, true);
1111 /* enable sdma ring preemption */
1112 sdma_v4_0_ctx_switch_enable(adev, true);
1113 }
1114
1115 /* start the gfx rings and rlc compute queues */
1116 for (i = 0; i < adev->sdma.num_instances; i++) {
1117 uint32_t temp;
1118
1119 WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
1120 sdma_v4_0_gfx_resume(adev, i);
1121 if (adev->sdma.has_page_queue)
1122 sdma_v4_0_page_resume(adev, i);
1123
1124 /* set utc l1 enable flag always to 1 */
1125 temp = RREG32_SDMA(i, mmSDMA0_CNTL);
1126 temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
1127 WREG32_SDMA(i, mmSDMA0_CNTL, temp);
1128
1129 if (!amdgpu_sriov_vf(adev)) {
1130 /* unhalt engine */
1131 temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
1132 temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
1133 WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
1134 }
955 } 1135 }
956 1136
957 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 1137 if (amdgpu_sriov_vf(adev)) {
958 r = sdma_v4_0_load_microcode(adev); 1138 sdma_v4_0_ctx_switch_enable(adev, true);
1139 sdma_v4_0_enable(adev, true);
1140 } else {
1141 r = sdma_v4_0_rlc_resume(adev);
959 if (r) 1142 if (r)
960 return r; 1143 return r;
961 } 1144 }
962 1145
963 /* unhalt the MEs */ 1146 for (i = 0; i < adev->sdma.num_instances; i++) {
964 sdma_v4_0_enable(adev, true); 1147 ring = &adev->sdma.instance[i].ring;
965 /* enable sdma ring preemption */
966 sdma_v4_0_ctx_switch_enable(adev, true);
967 1148
968 /* start the gfx rings and rlc compute queues */ 1149 r = amdgpu_ring_test_helper(ring);
969 r = sdma_v4_0_gfx_resume(adev); 1150 if (r)
970 if (r) 1151 return r;
971 return r; 1152
972 r = sdma_v4_0_rlc_resume(adev); 1153 if (adev->sdma.has_page_queue) {
1154 struct amdgpu_ring *page = &adev->sdma.instance[i].page;
1155
1156 r = amdgpu_ring_test_helper(page);
1157 if (r)
1158 return r;
1159
1160 if (adev->mman.buffer_funcs_ring == page)
1161 amdgpu_ttm_set_buffer_funcs_status(adev, true);
1162 }
1163
1164 if (adev->mman.buffer_funcs_ring == ring)
1165 amdgpu_ttm_set_buffer_funcs_status(adev, true);
1166 }
973 1167
974 return r; 1168 return r;
975} 1169}
@@ -993,21 +1187,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
993 u64 gpu_addr; 1187 u64 gpu_addr;
994 1188
995 r = amdgpu_device_wb_get(adev, &index); 1189 r = amdgpu_device_wb_get(adev, &index);
996 if (r) { 1190 if (r)
997 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
998 return r; 1191 return r;
999 }
1000 1192
1001 gpu_addr = adev->wb.gpu_addr + (index * 4); 1193 gpu_addr = adev->wb.gpu_addr + (index * 4);
1002 tmp = 0xCAFEDEAD; 1194 tmp = 0xCAFEDEAD;
1003 adev->wb.wb[index] = cpu_to_le32(tmp); 1195 adev->wb.wb[index] = cpu_to_le32(tmp);
1004 1196
1005 r = amdgpu_ring_alloc(ring, 5); 1197 r = amdgpu_ring_alloc(ring, 5);
1006 if (r) { 1198 if (r)
1007 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 1199 goto error_free_wb;
1008 amdgpu_device_wb_free(adev, index);
1009 return r;
1010 }
1011 1200
1012 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1201 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1013 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 1202 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
@@ -1024,15 +1213,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
1024 DRM_UDELAY(1); 1213 DRM_UDELAY(1);
1025 } 1214 }
1026 1215
1027 if (i < adev->usec_timeout) { 1216 if (i >= adev->usec_timeout)
1028 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 1217 r = -ETIMEDOUT;
1029 } else {
1030 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
1031 ring->idx, tmp);
1032 r = -EINVAL;
1033 }
1034 amdgpu_device_wb_free(adev, index);
1035 1218
1219error_free_wb:
1220 amdgpu_device_wb_free(adev, index);
1036 return r; 1221 return r;
1037} 1222}
1038 1223
@@ -1055,20 +1240,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1055 u64 gpu_addr; 1240 u64 gpu_addr;
1056 1241
1057 r = amdgpu_device_wb_get(adev, &index); 1242 r = amdgpu_device_wb_get(adev, &index);
1058 if (r) { 1243 if (r)
1059 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1060 return r; 1244 return r;
1061 }
1062 1245
1063 gpu_addr = adev->wb.gpu_addr + (index * 4); 1246 gpu_addr = adev->wb.gpu_addr + (index * 4);
1064 tmp = 0xCAFEDEAD; 1247 tmp = 0xCAFEDEAD;
1065 adev->wb.wb[index] = cpu_to_le32(tmp); 1248 adev->wb.wb[index] = cpu_to_le32(tmp);
1066 memset(&ib, 0, sizeof(ib)); 1249 memset(&ib, 0, sizeof(ib));
1067 r = amdgpu_ib_get(adev, NULL, 256, &ib); 1250 r = amdgpu_ib_get(adev, NULL, 256, &ib);
1068 if (r) { 1251 if (r)
1069 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1070 goto err0; 1252 goto err0;
1071 }
1072 1253
1073 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | 1254 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1074 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 1255 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
@@ -1087,21 +1268,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1087 1268
1088 r = dma_fence_wait_timeout(f, false, timeout); 1269 r = dma_fence_wait_timeout(f, false, timeout);
1089 if (r == 0) { 1270 if (r == 0) {
1090 DRM_ERROR("amdgpu: IB test timed out\n");
1091 r = -ETIMEDOUT; 1271 r = -ETIMEDOUT;
1092 goto err1; 1272 goto err1;
1093 } else if (r < 0) { 1273 } else if (r < 0) {
1094 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1095 goto err1; 1274 goto err1;
1096 } 1275 }
1097 tmp = le32_to_cpu(adev->wb.wb[index]); 1276 tmp = le32_to_cpu(adev->wb.wb[index]);
1098 if (tmp == 0xDEADBEEF) { 1277 if (tmp == 0xDEADBEEF)
1099 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
1100 r = 0; 1278 r = 0;
1101 } else { 1279 else
1102 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
1103 r = -EINVAL; 1280 r = -EINVAL;
1104 } 1281
1105err1: 1282err1:
1106 amdgpu_ib_free(adev, &ib, NULL); 1283 amdgpu_ib_free(adev, &ib, NULL);
1107 dma_fence_put(f); 1284 dma_fence_put(f);
@@ -1206,7 +1383,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1206 */ 1383 */
1207static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 1384static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1208{ 1385{
1209 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); 1386 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1210 u32 pad_count; 1387 u32 pad_count;
1211 int i; 1388 int i;
1212 1389
@@ -1276,10 +1453,18 @@ static int sdma_v4_0_early_init(void *handle)
1276{ 1453{
1277 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1454 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1278 1455
1279 if (adev->asic_type == CHIP_RAVEN) 1456 if (adev->asic_type == CHIP_RAVEN) {
1280 adev->sdma.num_instances = 1; 1457 adev->sdma.num_instances = 1;
1281 else 1458 adev->sdma.has_page_queue = false;
1459 } else {
1282 adev->sdma.num_instances = 2; 1460 adev->sdma.num_instances = 2;
1461 /* TODO: Page queue breaks driver reload under SRIOV */
1462 if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev)))
1463 adev->sdma.has_page_queue = false;
1464 else if (adev->asic_type != CHIP_VEGA20 &&
1465 adev->asic_type != CHIP_VEGA12)
1466 adev->sdma.has_page_queue = true;
1467 }
1283 1468
1284 sdma_v4_0_set_ring_funcs(adev); 1469 sdma_v4_0_set_ring_funcs(adev);
1285 sdma_v4_0_set_buffer_funcs(adev); 1470 sdma_v4_0_set_buffer_funcs(adev);
@@ -1340,6 +1525,21 @@ static int sdma_v4_0_sw_init(void *handle)
1340 AMDGPU_SDMA_IRQ_TRAP1); 1525 AMDGPU_SDMA_IRQ_TRAP1);
1341 if (r) 1526 if (r)
1342 return r; 1527 return r;
1528
1529 if (adev->sdma.has_page_queue) {
1530 ring = &adev->sdma.instance[i].page;
1531 ring->ring_obj = NULL;
1532 ring->use_doorbell = false;
1533
1534 sprintf(ring->name, "page%d", i);
1535 r = amdgpu_ring_init(adev, ring, 1024,
1536 &adev->sdma.trap_irq,
1537 (i == 0) ?
1538 AMDGPU_SDMA_IRQ_TRAP0 :
1539 AMDGPU_SDMA_IRQ_TRAP1);
1540 if (r)
1541 return r;
1542 }
1343 } 1543 }
1344 1544
1345 return r; 1545 return r;
@@ -1350,8 +1550,11 @@ static int sdma_v4_0_sw_fini(void *handle)
1350 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1550 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1351 int i; 1551 int i;
1352 1552
1353 for (i = 0; i < adev->sdma.num_instances; i++) 1553 for (i = 0; i < adev->sdma.num_instances; i++) {
1354 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1554 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1555 if (adev->sdma.has_page_queue)
1556 amdgpu_ring_fini(&adev->sdma.instance[i].page);
1557 }
1355 1558
1356 for (i = 0; i < adev->sdma.num_instances; i++) { 1559 for (i = 0; i < adev->sdma.num_instances; i++) {
1357 release_firmware(adev->sdma.instance[i].fw); 1560 release_firmware(adev->sdma.instance[i].fw);
@@ -1414,7 +1617,7 @@ static bool sdma_v4_0_is_idle(void *handle)
1414 u32 i; 1617 u32 i;
1415 1618
1416 for (i = 0; i < adev->sdma.num_instances; i++) { 1619 for (i = 0; i < adev->sdma.num_instances; i++) {
1417 u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); 1620 u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
1418 1621
1419 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 1622 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
1420 return false; 1623 return false;
@@ -1430,8 +1633,8 @@ static int sdma_v4_0_wait_for_idle(void *handle)
1430 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1633 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1431 1634
1432 for (i = 0; i < adev->usec_timeout; i++) { 1635 for (i = 0; i < adev->usec_timeout; i++) {
1433 sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); 1636 sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
1434 sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); 1637 sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
1435 1638
1436 if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) 1639 if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
1437 return 0; 1640 return 0;
@@ -1452,16 +1655,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
1452 unsigned type, 1655 unsigned type,
1453 enum amdgpu_interrupt_state state) 1656 enum amdgpu_interrupt_state state)
1454{ 1657{
1658 unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1;
1455 u32 sdma_cntl; 1659 u32 sdma_cntl;
1456 1660
1457 u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 1661 sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL);
1458 sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
1459 sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
1460
1461 sdma_cntl = RREG32(reg_offset);
1462 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1662 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
1463 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 1663 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1464 WREG32(reg_offset, sdma_cntl); 1664 WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl);
1465 1665
1466 return 0; 1666 return 0;
1467} 1667}
@@ -1470,39 +1670,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
1470 struct amdgpu_irq_src *source, 1670 struct amdgpu_irq_src *source,
1471 struct amdgpu_iv_entry *entry) 1671 struct amdgpu_iv_entry *entry)
1472{ 1672{
1673 uint32_t instance;
1674
1473 DRM_DEBUG("IH: SDMA trap\n"); 1675 DRM_DEBUG("IH: SDMA trap\n");
1474 switch (entry->client_id) { 1676 switch (entry->client_id) {
1475 case SOC15_IH_CLIENTID_SDMA0: 1677 case SOC15_IH_CLIENTID_SDMA0:
1476 switch (entry->ring_id) { 1678 instance = 0;
1477 case 0:
1478 amdgpu_fence_process(&adev->sdma.instance[0].ring);
1479 break;
1480 case 1:
1481 /* XXX compute */
1482 break;
1483 case 2:
1484 /* XXX compute */
1485 break;
1486 case 3:
1487 /* XXX page queue*/
1488 break;
1489 }
1490 break; 1679 break;
1491 case SOC15_IH_CLIENTID_SDMA1: 1680 case SOC15_IH_CLIENTID_SDMA1:
1492 switch (entry->ring_id) { 1681 instance = 1;
1493 case 0: 1682 break;
1494 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1683 default:
1495 break; 1684 return 0;
1496 case 1: 1685 }
1497 /* XXX compute */ 1686
1498 break; 1687 switch (entry->ring_id) {
1499 case 2: 1688 case 0:
1500 /* XXX compute */ 1689 amdgpu_fence_process(&adev->sdma.instance[instance].ring);
1501 break; 1690 break;
1502 case 3: 1691 case 1:
1503 /* XXX page queue*/ 1692 /* XXX compute */
1504 break; 1693 break;
1505 } 1694 case 2:
1695 /* XXX compute */
1696 break;
1697 case 3:
1698 amdgpu_fence_process(&adev->sdma.instance[instance].page);
1506 break; 1699 break;
1507 } 1700 }
1508 return 0; 1701 return 0;
@@ -1512,12 +1705,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
1512 struct amdgpu_irq_src *source, 1705 struct amdgpu_irq_src *source,
1513 struct amdgpu_iv_entry *entry) 1706 struct amdgpu_iv_entry *entry)
1514{ 1707{
1708 int instance;
1709
1515 DRM_ERROR("Illegal instruction in SDMA command stream\n"); 1710 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1516 schedule_work(&adev->reset_work); 1711
1712 switch (entry->client_id) {
1713 case SOC15_IH_CLIENTID_SDMA0:
1714 instance = 0;
1715 break;
1716 case SOC15_IH_CLIENTID_SDMA1:
1717 instance = 1;
1718 break;
1719 default:
1720 return 0;
1721 }
1722
1723 switch (entry->ring_id) {
1724 case 0:
1725 drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
1726 break;
1727 }
1517 return 0; 1728 return 0;
1518} 1729}
1519 1730
1520
1521static void sdma_v4_0_update_medium_grain_clock_gating( 1731static void sdma_v4_0_update_medium_grain_clock_gating(
1522 struct amdgpu_device *adev, 1732 struct amdgpu_device *adev,
1523 bool enable) 1733 bool enable)
@@ -1730,6 +1940,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
1730 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 1940 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1731}; 1941};
1732 1942
1943static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
1944 .type = AMDGPU_RING_TYPE_SDMA,
1945 .align_mask = 0xf,
1946 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1947 .support_64bit_ptrs = true,
1948 .vmhub = AMDGPU_MMHUB,
1949 .get_rptr = sdma_v4_0_ring_get_rptr,
1950 .get_wptr = sdma_v4_0_page_ring_get_wptr,
1951 .set_wptr = sdma_v4_0_page_ring_set_wptr,
1952 .emit_frame_size =
1953 6 + /* sdma_v4_0_ring_emit_hdp_flush */
1954 3 + /* hdp invalidate */
1955 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
1956 /* sdma_v4_0_ring_emit_vm_flush */
1957 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1958 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1959 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
1960 .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
1961 .emit_ib = sdma_v4_0_ring_emit_ib,
1962 .emit_fence = sdma_v4_0_ring_emit_fence,
1963 .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
1964 .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
1965 .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
1966 .test_ring = sdma_v4_0_ring_test_ring,
1967 .test_ib = sdma_v4_0_ring_test_ib,
1968 .insert_nop = sdma_v4_0_ring_insert_nop,
1969 .pad_ib = sdma_v4_0_ring_pad_ib,
1970 .emit_wreg = sdma_v4_0_ring_emit_wreg,
1971 .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
1972 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1973};
1974
1733static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1975static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1734{ 1976{
1735 int i; 1977 int i;
@@ -1737,6 +1979,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1737 for (i = 0; i < adev->sdma.num_instances; i++) { 1979 for (i = 0; i < adev->sdma.num_instances; i++) {
1738 adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; 1980 adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
1739 adev->sdma.instance[i].ring.me = i; 1981 adev->sdma.instance[i].ring.me = i;
1982 if (adev->sdma.has_page_queue) {
1983 adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
1984 adev->sdma.instance[i].page.me = i;
1985 }
1740 } 1986 }
1741} 1987}
1742 1988
@@ -1818,7 +2064,10 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
1818static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) 2064static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
1819{ 2065{
1820 adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; 2066 adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
1821 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 2067 if (adev->sdma.has_page_queue)
2068 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
2069 else
2070 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1822} 2071}
1823 2072
1824static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { 2073static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
@@ -1836,7 +2085,10 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1836 2085
1837 adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; 2086 adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
1838 for (i = 0; i < adev->sdma.num_instances; i++) { 2087 for (i = 0; i < adev->sdma.num_instances; i++) {
1839 sched = &adev->sdma.instance[i].ring.sched; 2088 if (adev->sdma.has_page_queue)
2089 sched = &adev->sdma.instance[i].page.sched;
2090 else
2091 sched = &adev->sdma.instance[i].ring.sched;
1840 adev->vm_manager.vm_pte_rqs[i] = 2092 adev->vm_manager.vm_pte_rqs[i] =
1841 &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 2093 &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
1842 } 2094 }
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index adbaea6da0d7..b6e473134e19 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
61} 61}
62 62
63static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, 63static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
64 struct amdgpu_job *job,
64 struct amdgpu_ib *ib, 65 struct amdgpu_ib *ib,
65 unsigned vmid, bool ctx_switch) 66 bool ctx_switch)
66{ 67{
68 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
67 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 69 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
68 * Pad as necessary with NOPs. 70 * Pad as necessary with NOPs.
69 */ 71 */
@@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
122 124
123 if (adev->mman.buffer_funcs_ring == ring) 125 if (adev->mman.buffer_funcs_ring == ring)
124 amdgpu_ttm_set_buffer_funcs_status(adev, false); 126 amdgpu_ttm_set_buffer_funcs_status(adev, false);
125 ring->ready = false; 127 ring->sched.ready = false;
126 } 128 }
127} 129}
128 130
@@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev)
175 WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); 177 WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
176 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); 178 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
177 179
178 ring->ready = true; 180 ring->sched.ready = true;
179 181
180 r = amdgpu_ring_test_ring(ring); 182 r = amdgpu_ring_test_helper(ring);
181 if (r) { 183 if (r)
182 ring->ready = false;
183 return r; 184 return r;
184 }
185 185
186 if (adev->mman.buffer_funcs_ring == ring) 186 if (adev->mman.buffer_funcs_ring == ring)
187 amdgpu_ttm_set_buffer_funcs_status(adev, true); 187 amdgpu_ttm_set_buffer_funcs_status(adev, true);
@@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
209 u64 gpu_addr; 209 u64 gpu_addr;
210 210
211 r = amdgpu_device_wb_get(adev, &index); 211 r = amdgpu_device_wb_get(adev, &index);
212 if (r) { 212 if (r)
213 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
214 return r; 213 return r;
215 }
216 214
217 gpu_addr = adev->wb.gpu_addr + (index * 4); 215 gpu_addr = adev->wb.gpu_addr + (index * 4);
218 tmp = 0xCAFEDEAD; 216 tmp = 0xCAFEDEAD;
219 adev->wb.wb[index] = cpu_to_le32(tmp); 217 adev->wb.wb[index] = cpu_to_le32(tmp);
220 218
221 r = amdgpu_ring_alloc(ring, 4); 219 r = amdgpu_ring_alloc(ring, 4);
222 if (r) { 220 if (r)
223 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 221 goto error_free_wb;
224 amdgpu_device_wb_free(adev, index);
225 return r;
226 }
227 222
228 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); 223 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
229 amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 224 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
@@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
238 DRM_UDELAY(1); 233 DRM_UDELAY(1);
239 } 234 }
240 235
241 if (i < adev->usec_timeout) { 236 if (i >= adev->usec_timeout)
242 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); 237 r = -ETIMEDOUT;
243 } else {
244 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
245 ring->idx, tmp);
246 r = -EINVAL;
247 }
248 amdgpu_device_wb_free(adev, index);
249 238
239error_free_wb:
240 amdgpu_device_wb_free(adev, index);
250 return r; 241 return r;
251} 242}
252 243
@@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
269 long r; 260 long r;
270 261
271 r = amdgpu_device_wb_get(adev, &index); 262 r = amdgpu_device_wb_get(adev, &index);
272 if (r) { 263 if (r)
273 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
274 return r; 264 return r;
275 }
276 265
277 gpu_addr = adev->wb.gpu_addr + (index * 4); 266 gpu_addr = adev->wb.gpu_addr + (index * 4);
278 tmp = 0xCAFEDEAD; 267 tmp = 0xCAFEDEAD;
279 adev->wb.wb[index] = cpu_to_le32(tmp); 268 adev->wb.wb[index] = cpu_to_le32(tmp);
280 memset(&ib, 0, sizeof(ib)); 269 memset(&ib, 0, sizeof(ib));
281 r = amdgpu_ib_get(adev, NULL, 256, &ib); 270 r = amdgpu_ib_get(adev, NULL, 256, &ib);
282 if (r) { 271 if (r)
283 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
284 goto err0; 272 goto err0;
285 }
286 273
287 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); 274 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
288 ib.ptr[1] = lower_32_bits(gpu_addr); 275 ib.ptr[1] = lower_32_bits(gpu_addr);
@@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
295 282
296 r = dma_fence_wait_timeout(f, false, timeout); 283 r = dma_fence_wait_timeout(f, false, timeout);
297 if (r == 0) { 284 if (r == 0) {
298 DRM_ERROR("amdgpu: IB test timed out\n");
299 r = -ETIMEDOUT; 285 r = -ETIMEDOUT;
300 goto err1; 286 goto err1;
301 } else if (r < 0) { 287 } else if (r < 0) {
302 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
303 goto err1; 288 goto err1;
304 } 289 }
305 tmp = le32_to_cpu(adev->wb.wb[index]); 290 tmp = le32_to_cpu(adev->wb.wb[index]);
306 if (tmp == 0xDEADBEEF) { 291 if (tmp == 0xDEADBEEF)
307 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
308 r = 0; 292 r = 0;
309 } else { 293 else
310 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
311 r = -EINVAL; 294 r = -EINVAL;
312 }
313 295
314err1: 296err1:
315 amdgpu_ib_free(adev, &ib, NULL); 297 amdgpu_ib_free(adev, &ib, NULL);
@@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
658 return 0; 640 return 0;
659} 641}
660 642
661static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev,
662 struct amdgpu_irq_src *source,
663 struct amdgpu_iv_entry *entry)
664{
665 DRM_ERROR("Illegal instruction in SDMA command stream\n");
666 schedule_work(&adev->reset_work);
667 return 0;
668}
669
670static int si_dma_set_clockgating_state(void *handle, 643static int si_dma_set_clockgating_state(void *handle,
671 enum amd_clockgating_state state) 644 enum amd_clockgating_state state)
672{ 645{
@@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
781 .process = si_dma_process_trap_irq, 754 .process = si_dma_process_trap_irq,
782}; 755};
783 756
784static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
785 .process = si_dma_process_illegal_inst_irq,
786};
787
788static void si_dma_set_irq_funcs(struct amdgpu_device *adev) 757static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
789{ 758{
790 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 759 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
791 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; 760 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
792 adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
793} 761}
794 762
795/** 763/**
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
new file mode 100644
index 000000000000..ac2c27b7630c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h
@@ -0,0 +1,130 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#ifndef _TA_XGMI_IF_H
25#define _TA_XGMI_IF_H
26
27/* Responses have bit 31 set */
28#define RSP_ID_MASK (1U << 31)
29#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
30
31enum ta_command_xgmi {
32 TA_COMMAND_XGMI__INITIALIZE = 0x00,
33 TA_COMMAND_XGMI__GET_NODE_ID = 0x01,
34 TA_COMMAND_XGMI__GET_HIVE_ID = 0x02,
35 TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03,
36 TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04
37};
38
39/* XGMI related enumerations */
40/**********************************************************/;
41enum ta_xgmi_connected_nodes {
42 TA_XGMI__MAX_CONNECTED_NODES = 64
43};
44
45enum ta_xgmi_status {
46 TA_XGMI_STATUS__SUCCESS = 0x00,
47 TA_XGMI_STATUS__GENERIC_FAILURE = 0x01,
48 TA_XGMI_STATUS__NULL_POINTER = 0x02,
49 TA_XGMI_STATUS__INVALID_PARAMETER = 0x03,
50 TA_XGMI_STATUS__NOT_INITIALIZED = 0x04,
51 TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05,
52 TA_XGMI_STATUS__INVALID_NODE_ID = 0x06,
53 TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07,
54 TA_XGMI_STATUS__FAILED_ID_GEN = 0x08,
55 TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09,
56 TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A
57};
58
59enum ta_xgmi_assigned_sdma_engine {
60 TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1,
61 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0,
62 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1,
63 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2,
64 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3,
65 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4,
66 TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5
67};
68
69/* input/output structures for XGMI commands */
70/**********************************************************/
71struct ta_xgmi_node_info {
72 uint64_t node_id;
73 uint8_t num_hops;
74 uint8_t is_sharing_enabled;
75 enum ta_xgmi_assigned_sdma_engine sdma_engine;
76};
77
78struct ta_xgmi_cmd_initialize_output {
79 uint32_t status;
80};
81
82struct ta_xgmi_cmd_get_node_id_output {
83 uint64_t node_id;
84};
85
86struct ta_xgmi_cmd_get_hive_id_output {
87 uint64_t hive_id;
88};
89
90struct ta_xgmi_cmd_get_topology_info_input {
91 uint32_t num_nodes;
92 struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
93};
94
95struct ta_xgmi_cmd_get_topology_info_output {
96 uint32_t num_nodes;
97 struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
98};
99
100struct ta_xgmi_cmd_set_topology_info_input {
101 uint32_t num_nodes;
102 struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES];
103};
104
105/**********************************************************/
106/* Common input structure for XGMI callbacks */
107union ta_xgmi_cmd_input {
108 struct ta_xgmi_cmd_get_topology_info_input get_topology_info;
109 struct ta_xgmi_cmd_set_topology_info_input set_topology_info;
110};
111
112/* Common output structure for XGMI callbacks */
113union ta_xgmi_cmd_output {
114 struct ta_xgmi_cmd_initialize_output initialize;
115 struct ta_xgmi_cmd_get_node_id_output get_node_id;
116 struct ta_xgmi_cmd_get_hive_id_output get_hive_id;
117 struct ta_xgmi_cmd_get_topology_info_output get_topology_info;
118};
119/**********************************************************/
120
121struct ta_xgmi_shared_memory {
122 uint32_t cmd_id;
123 uint32_t resp_id;
124 enum ta_xgmi_status xgmi_status;
125 uint32_t reserved;
126 union ta_xgmi_cmd_input xgmi_in_message;
127 union ta_xgmi_cmd_output xgmi_out_message;
128};
129
130#endif //_TA_XGMI_IF_H
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 1fc17bf39fed..90bbcee00f28 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle)
162 uvd_v4_2_enable_mgcg(adev, true); 162 uvd_v4_2_enable_mgcg(adev, true);
163 amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); 163 amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
164 164
165 ring->ready = true; 165 r = amdgpu_ring_test_helper(ring);
166 r = amdgpu_ring_test_ring(ring); 166 if (r)
167 if (r) {
168 ring->ready = false;
169 goto done; 167 goto done;
170 }
171 168
172 r = amdgpu_ring_alloc(ring, 10); 169 r = amdgpu_ring_alloc(ring, 10);
173 if (r) { 170 if (r) {
@@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle)
218 if (RREG32(mmUVD_STATUS) != 0) 215 if (RREG32(mmUVD_STATUS) != 0)
219 uvd_v4_2_stop(adev); 216 uvd_v4_2_stop(adev);
220 217
221 ring->ready = false; 218 ring->sched.ready = false;
222 219
223 return 0; 220 return 0;
224} 221}
@@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
484 481
485 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 482 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
486 r = amdgpu_ring_alloc(ring, 3); 483 r = amdgpu_ring_alloc(ring, 3);
487 if (r) { 484 if (r)
488 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
489 ring->idx, r);
490 return r; 485 return r;
491 } 486
492 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 487 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
493 amdgpu_ring_write(ring, 0xDEADBEEF); 488 amdgpu_ring_write(ring, 0xDEADBEEF);
494 amdgpu_ring_commit(ring); 489 amdgpu_ring_commit(ring);
@@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
499 DRM_UDELAY(1); 494 DRM_UDELAY(1);
500 } 495 }
501 496
502 if (i < adev->usec_timeout) { 497 if (i >= adev->usec_timeout)
503 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 498 r = -ETIMEDOUT;
504 ring->idx, i); 499
505 } else {
506 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
507 ring->idx, tmp);
508 r = -EINVAL;
509 }
510 return r; 500 return r;
511} 501}
512 502
@@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
519 * Write ring commands to execute the indirect buffer 509 * Write ring commands to execute the indirect buffer
520 */ 510 */
521static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, 511static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
512 struct amdgpu_job *job,
522 struct amdgpu_ib *ib, 513 struct amdgpu_ib *ib,
523 unsigned vmid, bool ctx_switch) 514 bool ctx_switch)
524{ 515{
525 amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); 516 amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
526 amdgpu_ring_write(ring, ib->gpu_addr); 517 amdgpu_ring_write(ring, ib->gpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index fde6ad5ac9ab..1c5e12703103 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle)
158 uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); 158 uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
159 uvd_v5_0_enable_mgcg(adev, true); 159 uvd_v5_0_enable_mgcg(adev, true);
160 160
161 ring->ready = true; 161 r = amdgpu_ring_test_helper(ring);
162 r = amdgpu_ring_test_ring(ring); 162 if (r)
163 if (r) {
164 ring->ready = false;
165 goto done; 163 goto done;
166 }
167 164
168 r = amdgpu_ring_alloc(ring, 10); 165 r = amdgpu_ring_alloc(ring, 10);
169 if (r) { 166 if (r) {
@@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle)
215 if (RREG32(mmUVD_STATUS) != 0) 212 if (RREG32(mmUVD_STATUS) != 0)
216 uvd_v5_0_stop(adev); 213 uvd_v5_0_stop(adev);
217 214
218 ring->ready = false; 215 ring->sched.ready = false;
219 216
220 return 0; 217 return 0;
221} 218}
@@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
500 497
501 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 498 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
502 r = amdgpu_ring_alloc(ring, 3); 499 r = amdgpu_ring_alloc(ring, 3);
503 if (r) { 500 if (r)
504 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
505 ring->idx, r);
506 return r; 501 return r;
507 }
508 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 502 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
509 amdgpu_ring_write(ring, 0xDEADBEEF); 503 amdgpu_ring_write(ring, 0xDEADBEEF);
510 amdgpu_ring_commit(ring); 504 amdgpu_ring_commit(ring);
@@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
515 DRM_UDELAY(1); 509 DRM_UDELAY(1);
516 } 510 }
517 511
518 if (i < adev->usec_timeout) { 512 if (i >= adev->usec_timeout)
519 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 513 r = -ETIMEDOUT;
520 ring->idx, i); 514
521 } else {
522 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
523 ring->idx, tmp);
524 r = -EINVAL;
525 }
526 return r; 515 return r;
527} 516}
528 517
@@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
535 * Write ring commands to execute the indirect buffer 524 * Write ring commands to execute the indirect buffer
536 */ 525 */
537static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, 526static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
527 struct amdgpu_job *job,
538 struct amdgpu_ib *ib, 528 struct amdgpu_ib *ib,
539 unsigned vmid, bool ctx_switch) 529 bool ctx_switch)
540{ 530{
541 amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); 531 amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
542 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 532 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 7a5b40275e8e..f184842ef2a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
175 int r; 175 int r;
176 176
177 r = amdgpu_ring_alloc(ring, 16); 177 r = amdgpu_ring_alloc(ring, 16);
178 if (r) { 178 if (r)
179 DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
180 ring->idx, r);
181 return r; 179 return r;
182 }
183 amdgpu_ring_write(ring, HEVC_ENC_CMD_END); 180 amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
184 amdgpu_ring_commit(ring); 181 amdgpu_ring_commit(ring);
185 182
@@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
189 DRM_UDELAY(1); 186 DRM_UDELAY(1);
190 } 187 }
191 188
192 if (i < adev->usec_timeout) { 189 if (i >= adev->usec_timeout)
193 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
194 ring->idx, i);
195 } else {
196 DRM_ERROR("amdgpu: ring %d test failed\n",
197 ring->idx);
198 r = -ETIMEDOUT; 190 r = -ETIMEDOUT;
199 }
200 191
201 return r; 192 return r;
202} 193}
@@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
336 long r; 327 long r;
337 328
338 r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); 329 r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
339 if (r) { 330 if (r)
340 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
341 goto error; 331 goto error;
342 }
343 332
344 r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); 333 r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
345 if (r) { 334 if (r)
346 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
347 goto error; 335 goto error;
348 }
349 336
350 r = dma_fence_wait_timeout(fence, false, timeout); 337 r = dma_fence_wait_timeout(fence, false, timeout);
351 if (r == 0) { 338 if (r == 0)
352 DRM_ERROR("amdgpu: IB test timed out.\n");
353 r = -ETIMEDOUT; 339 r = -ETIMEDOUT;
354 } else if (r < 0) { 340 else if (r > 0)
355 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
356 } else {
357 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
358 r = 0; 341 r = 0;
359 } 342
360error: 343error:
361 dma_fence_put(fence); 344 dma_fence_put(fence);
362 return r; 345 return r;
363} 346}
347
364static int uvd_v6_0_early_init(void *handle) 348static int uvd_v6_0_early_init(void *handle)
365{ 349{
366 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 350 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle)
476 uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); 460 uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
477 uvd_v6_0_enable_mgcg(adev, true); 461 uvd_v6_0_enable_mgcg(adev, true);
478 462
479 ring->ready = true; 463 r = amdgpu_ring_test_helper(ring);
480 r = amdgpu_ring_test_ring(ring); 464 if (r)
481 if (r) {
482 ring->ready = false;
483 goto done; 465 goto done;
484 }
485 466
486 r = amdgpu_ring_alloc(ring, 10); 467 r = amdgpu_ring_alloc(ring, 10);
487 if (r) { 468 if (r) {
@@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle)
513 if (uvd_v6_0_enc_support(adev)) { 494 if (uvd_v6_0_enc_support(adev)) {
514 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 495 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
515 ring = &adev->uvd.inst->ring_enc[i]; 496 ring = &adev->uvd.inst->ring_enc[i];
516 ring->ready = true; 497 r = amdgpu_ring_test_helper(ring);
517 r = amdgpu_ring_test_ring(ring); 498 if (r)
518 if (r) {
519 ring->ready = false;
520 goto done; 499 goto done;
521 }
522 } 500 }
523 } 501 }
524 502
@@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle)
548 if (RREG32(mmUVD_STATUS) != 0) 526 if (RREG32(mmUVD_STATUS) != 0)
549 uvd_v6_0_stop(adev); 527 uvd_v6_0_stop(adev);
550 528
551 ring->ready = false; 529 ring->sched.ready = false;
552 530
553 return 0; 531 return 0;
554} 532}
@@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
969 947
970 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); 948 WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
971 r = amdgpu_ring_alloc(ring, 3); 949 r = amdgpu_ring_alloc(ring, 3);
972 if (r) { 950 if (r)
973 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
974 ring->idx, r);
975 return r; 951 return r;
976 } 952
977 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); 953 amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
978 amdgpu_ring_write(ring, 0xDEADBEEF); 954 amdgpu_ring_write(ring, 0xDEADBEEF);
979 amdgpu_ring_commit(ring); 955 amdgpu_ring_commit(ring);
@@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
984 DRM_UDELAY(1); 960 DRM_UDELAY(1);
985 } 961 }
986 962
987 if (i < adev->usec_timeout) { 963 if (i >= adev->usec_timeout)
988 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 964 r = -ETIMEDOUT;
989 ring->idx, i); 965
990 } else {
991 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
992 ring->idx, tmp);
993 r = -EINVAL;
994 }
995 return r; 966 return r;
996} 967}
997 968
@@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
1004 * Write ring commands to execute the indirect buffer 975 * Write ring commands to execute the indirect buffer
1005 */ 976 */
1006static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, 977static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
978 struct amdgpu_job *job,
1007 struct amdgpu_ib *ib, 979 struct amdgpu_ib *ib,
1008 unsigned vmid, bool ctx_switch) 980 bool ctx_switch)
1009{ 981{
982 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
983
1010 amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); 984 amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
1011 amdgpu_ring_write(ring, vmid); 985 amdgpu_ring_write(ring, vmid);
1012 986
@@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
1027 * Write enc ring commands to execute the indirect buffer 1001 * Write enc ring commands to execute the indirect buffer
1028 */ 1002 */
1029static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, 1003static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1030 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 1004 struct amdgpu_job *job,
1005 struct amdgpu_ib *ib,
1006 bool ctx_switch)
1031{ 1007{
1008 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1009
1032 amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); 1010 amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
1033 amdgpu_ring_write(ring, vmid); 1011 amdgpu_ring_write(ring, vmid);
1034 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1012 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 58b39afcfb86..8a4595968d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
183 return 0; 183 return 0;
184 184
185 r = amdgpu_ring_alloc(ring, 16); 185 r = amdgpu_ring_alloc(ring, 16);
186 if (r) { 186 if (r)
187 DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
188 ring->me, ring->idx, r);
189 return r; 187 return r;
190 }
191 amdgpu_ring_write(ring, HEVC_ENC_CMD_END); 188 amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
192 amdgpu_ring_commit(ring); 189 amdgpu_ring_commit(ring);
193 190
@@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
197 DRM_UDELAY(1); 194 DRM_UDELAY(1);
198 } 195 }
199 196
200 if (i < adev->usec_timeout) { 197 if (i >= adev->usec_timeout)
201 DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
202 ring->me, ring->idx, i);
203 } else {
204 DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
205 ring->me, ring->idx);
206 r = -ETIMEDOUT; 198 r = -ETIMEDOUT;
207 }
208 199
209 return r; 200 return r;
210} 201}
@@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
343 long r; 334 long r;
344 335
345 r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); 336 r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
346 if (r) { 337 if (r)
347 DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
348 goto error; 338 goto error;
349 }
350 339
351 r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); 340 r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
352 if (r) { 341 if (r)
353 DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
354 goto error; 342 goto error;
355 }
356 343
357 r = dma_fence_wait_timeout(fence, false, timeout); 344 r = dma_fence_wait_timeout(fence, false, timeout);
358 if (r == 0) { 345 if (r == 0)
359 DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
360 r = -ETIMEDOUT; 346 r = -ETIMEDOUT;
361 } else if (r < 0) { 347 else if (r > 0)
362 DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
363 } else {
364 DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
365 r = 0; 348 r = 0;
366 } 349
367error: 350error:
368 dma_fence_put(fence); 351 dma_fence_put(fence);
369 return r; 352 return r;
@@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle)
540 ring = &adev->uvd.inst[j].ring; 523 ring = &adev->uvd.inst[j].ring;
541 524
542 if (!amdgpu_sriov_vf(adev)) { 525 if (!amdgpu_sriov_vf(adev)) {
543 ring->ready = true; 526 r = amdgpu_ring_test_helper(ring);
544 r = amdgpu_ring_test_ring(ring); 527 if (r)
545 if (r) {
546 ring->ready = false;
547 goto done; 528 goto done;
548 }
549 529
550 r = amdgpu_ring_alloc(ring, 10); 530 r = amdgpu_ring_alloc(ring, 10);
551 if (r) { 531 if (r) {
@@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle)
582 562
583 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 563 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
584 ring = &adev->uvd.inst[j].ring_enc[i]; 564 ring = &adev->uvd.inst[j].ring_enc[i];
585 ring->ready = true; 565 r = amdgpu_ring_test_helper(ring);
586 r = amdgpu_ring_test_ring(ring); 566 if (r)
587 if (r) {
588 ring->ready = false;
589 goto done; 567 goto done;
590 }
591 } 568 }
592 } 569 }
593done: 570done:
@@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle)
619 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { 596 for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
620 if (adev->uvd.harvest_config & (1 << i)) 597 if (adev->uvd.harvest_config & (1 << i))
621 continue; 598 continue;
622 adev->uvd.inst[i].ring.ready = false; 599 adev->uvd.inst[i].ring.sched.ready = false;
623 } 600 }
624 601
625 return 0; 602 return 0;
@@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1235 1212
1236 WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); 1213 WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
1237 r = amdgpu_ring_alloc(ring, 3); 1214 r = amdgpu_ring_alloc(ring, 3);
1238 if (r) { 1215 if (r)
1239 DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
1240 ring->me, ring->idx, r);
1241 return r; 1216 return r;
1242 } 1217
1243 amdgpu_ring_write(ring, 1218 amdgpu_ring_write(ring,
1244 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); 1219 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
1245 amdgpu_ring_write(ring, 0xDEADBEEF); 1220 amdgpu_ring_write(ring, 0xDEADBEEF);
@@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
1251 DRM_UDELAY(1); 1226 DRM_UDELAY(1);
1252 } 1227 }
1253 1228
1254 if (i < adev->usec_timeout) { 1229 if (i >= adev->usec_timeout)
1255 DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", 1230 r = -ETIMEDOUT;
1256 ring->me, ring->idx, i); 1231
1257 } else {
1258 DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
1259 ring->me, ring->idx, tmp);
1260 r = -EINVAL;
1261 }
1262 return r; 1232 return r;
1263} 1233}
1264 1234
@@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1300 * Write ring commands to execute the indirect buffer 1270 * Write ring commands to execute the indirect buffer
1301 */ 1271 */
1302static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, 1272static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
1273 struct amdgpu_job *job,
1303 struct amdgpu_ib *ib, 1274 struct amdgpu_ib *ib,
1304 unsigned vmid, bool ctx_switch) 1275 bool ctx_switch)
1305{ 1276{
1306 struct amdgpu_device *adev = ring->adev; 1277 struct amdgpu_device *adev = ring->adev;
1278 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1307 1279
1308 amdgpu_ring_write(ring, 1280 amdgpu_ring_write(ring,
1309 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); 1281 PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
@@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
1329 * Write enc ring commands to execute the indirect buffer 1301 * Write enc ring commands to execute the indirect buffer
1330 */ 1302 */
1331static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, 1303static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1332 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 1304 struct amdgpu_job *job,
1305 struct amdgpu_ib *ib,
1306 bool ctx_switch)
1333{ 1307{
1308 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1309
1334 amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); 1310 amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM);
1335 amdgpu_ring_write(ring, vmid); 1311 amdgpu_ring_write(ring, vmid);
1336 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1312 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index ea28828360d3..bed78a778e3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle)
463 463
464 amdgpu_asic_set_vce_clocks(adev, 10000, 10000); 464 amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
465 vce_v2_0_enable_mgcg(adev, true, false); 465 vce_v2_0_enable_mgcg(adev, true, false);
466 for (i = 0; i < adev->vce.num_rings; i++)
467 adev->vce.ring[i].ready = false;
468 466
469 for (i = 0; i < adev->vce.num_rings; i++) { 467 for (i = 0; i < adev->vce.num_rings; i++) {
470 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 468 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
471 if (r) 469 if (r)
472 return r; 470 return r;
473 else
474 adev->vce.ring[i].ready = true;
475 } 471 }
476 472
477 DRM_INFO("VCE initialized successfully.\n"); 473 DRM_INFO("VCE initialized successfully.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6dbd39730070..3e84840859a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -474,15 +474,10 @@ static int vce_v3_0_hw_init(void *handle)
474 474
475 amdgpu_asic_set_vce_clocks(adev, 10000, 10000); 475 amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
476 476
477 for (i = 0; i < adev->vce.num_rings; i++)
478 adev->vce.ring[i].ready = false;
479
480 for (i = 0; i < adev->vce.num_rings; i++) { 477 for (i = 0; i < adev->vce.num_rings; i++) {
481 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 478 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
482 if (r) 479 if (r)
483 return r; 480 return r;
484 else
485 adev->vce.ring[i].ready = true;
486 } 481 }
487 482
488 DRM_INFO("VCE initialized successfully.\n"); 483 DRM_INFO("VCE initialized successfully.\n");
@@ -838,8 +833,12 @@ out:
838} 833}
839 834
840static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, 835static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
841 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 836 struct amdgpu_job *job,
837 struct amdgpu_ib *ib,
838 bool ctx_switch)
842{ 839{
840 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
841
843 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 842 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
844 amdgpu_ring_write(ring, vmid); 843 amdgpu_ring_write(ring, vmid);
845 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 844 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 1c9471890bf7..0054ba1b9a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -519,15 +519,10 @@ static int vce_v4_0_hw_init(void *handle)
519 if (r) 519 if (r)
520 return r; 520 return r;
521 521
522 for (i = 0; i < adev->vce.num_rings; i++)
523 adev->vce.ring[i].ready = false;
524
525 for (i = 0; i < adev->vce.num_rings; i++) { 522 for (i = 0; i < adev->vce.num_rings; i++) {
526 r = amdgpu_ring_test_ring(&adev->vce.ring[i]); 523 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
527 if (r) 524 if (r)
528 return r; 525 return r;
529 else
530 adev->vce.ring[i].ready = true;
531 } 526 }
532 527
533 DRM_INFO("VCE initialized successfully.\n"); 528 DRM_INFO("VCE initialized successfully.\n");
@@ -549,7 +544,7 @@ static int vce_v4_0_hw_fini(void *handle)
549 } 544 }
550 545
551 for (i = 0; i < adev->vce.num_rings; i++) 546 for (i = 0; i < adev->vce.num_rings; i++)
552 adev->vce.ring[i].ready = false; 547 adev->vce.ring[i].sched.ready = false;
553 548
554 return 0; 549 return 0;
555} 550}
@@ -951,9 +946,11 @@ static int vce_v4_0_set_powergating_state(void *handle,
951} 946}
952#endif 947#endif
953 948
954static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, 949static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
955 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 950 struct amdgpu_ib *ib, bool ctx_switch)
956{ 951{
952 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
953
957 amdgpu_ring_write(ring, VCE_CMD_IB_VM); 954 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
958 amdgpu_ring_write(ring, vmid); 955 amdgpu_ring_write(ring, vmid);
959 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 956 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index eae90922fdbe..c1a03505f956 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -176,30 +176,22 @@ static int vcn_v1_0_hw_init(void *handle)
176 struct amdgpu_ring *ring = &adev->vcn.ring_dec; 176 struct amdgpu_ring *ring = &adev->vcn.ring_dec;
177 int i, r; 177 int i, r;
178 178
179 ring->ready = true; 179 r = amdgpu_ring_test_helper(ring);
180 r = amdgpu_ring_test_ring(ring); 180 if (r)
181 if (r) {
182 ring->ready = false;
183 goto done; 181 goto done;
184 }
185 182
186 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 183 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
187 ring = &adev->vcn.ring_enc[i]; 184 ring = &adev->vcn.ring_enc[i];
188 ring->ready = true; 185 ring->sched.ready = true;
189 r = amdgpu_ring_test_ring(ring); 186 r = amdgpu_ring_test_helper(ring);
190 if (r) { 187 if (r)
191 ring->ready = false;
192 goto done; 188 goto done;
193 }
194 } 189 }
195 190
196 ring = &adev->vcn.ring_jpeg; 191 ring = &adev->vcn.ring_jpeg;
197 ring->ready = true; 192 r = amdgpu_ring_test_helper(ring);
198 r = amdgpu_ring_test_ring(ring); 193 if (r)
199 if (r) {
200 ring->ready = false;
201 goto done; 194 goto done;
202 }
203 195
204done: 196done:
205 if (!r) 197 if (!r)
@@ -224,7 +216,7 @@ static int vcn_v1_0_hw_fini(void *handle)
224 if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) 216 if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
225 vcn_v1_0_stop(adev); 217 vcn_v1_0_stop(adev);
226 218
227 ring->ready = false; 219 ring->sched.ready = false;
228 220
229 return 0; 221 return 0;
230} 222}
@@ -1366,10 +1358,12 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
1366 * Write ring commands to execute the indirect buffer 1358 * Write ring commands to execute the indirect buffer
1367 */ 1359 */
1368static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, 1360static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
1369 struct amdgpu_ib *ib, 1361 struct amdgpu_job *job,
1370 unsigned vmid, bool ctx_switch) 1362 struct amdgpu_ib *ib,
1363 bool ctx_switch)
1371{ 1364{
1372 struct amdgpu_device *adev = ring->adev; 1365 struct amdgpu_device *adev = ring->adev;
1366 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1373 1367
1374 amdgpu_ring_write(ring, 1368 amdgpu_ring_write(ring,
1375 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); 1369 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
@@ -1524,8 +1518,12 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)
1524 * Write enc ring commands to execute the indirect buffer 1518 * Write enc ring commands to execute the indirect buffer
1525 */ 1519 */
1526static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, 1520static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1527 struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) 1521 struct amdgpu_job *job,
1522 struct amdgpu_ib *ib,
1523 bool ctx_switch)
1528{ 1524{
1525 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1526
1529 amdgpu_ring_write(ring, VCN_ENC_CMD_IB); 1527 amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
1530 amdgpu_ring_write(ring, vmid); 1528 amdgpu_ring_write(ring, vmid);
1531 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); 1529 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
@@ -1725,10 +1723,12 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6
1725 * Write ring commands to execute the indirect buffer. 1723 * Write ring commands to execute the indirect buffer.
1726 */ 1724 */
1727static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, 1725static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
1728 struct amdgpu_ib *ib, 1726 struct amdgpu_job *job,
1729 unsigned vmid, bool ctx_switch) 1727 struct amdgpu_ib *ib,
1728 bool ctx_switch)
1730{ 1729{
1731 struct amdgpu_device *adev = ring->adev; 1730 struct amdgpu_device *adev = ring->adev;
1731 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
1732 1732
1733 amdgpu_ring_write(ring, 1733 amdgpu_ring_write(ring,
1734 PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); 1734 PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a99f71797aa3..a0fda6f9252a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
129 else 129 else
130 wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); 130 wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
131 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); 131 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
132 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); 132 WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF);
133 133
134 /* set rptr, wptr to 0 */ 134 /* set rptr, wptr to 0 */
135 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); 135 WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 2d4473557b0d..d13fc4fcb517 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -49,6 +49,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
49 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); 49 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
50 adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); 50 adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
51 adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); 51 adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
52 adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
52 } 53 }
53 return 0; 54 return 0;
54} 55}