diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
78 files changed, 2605 insertions, 2152 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 138cb787d27e..f76bcb9c45e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile | |||
@@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ | |||
53 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ | 53 | amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ |
54 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ | 54 | amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ |
55 | amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ | 55 | amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ |
56 | amdgpu_gmc.o amdgpu_xgmi.o | 56 | amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o |
57 | 57 | ||
58 | # add asic specific block | 58 | # add asic specific block |
59 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ | 59 | amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ |
@@ -105,6 +105,7 @@ amdgpu-y += \ | |||
105 | # add GFX block | 105 | # add GFX block |
106 | amdgpu-y += \ | 106 | amdgpu-y += \ |
107 | amdgpu_gfx.o \ | 107 | amdgpu_gfx.o \ |
108 | amdgpu_rlc.o \ | ||
108 | gfx_v8_0.o \ | 109 | gfx_v8_0.o \ |
109 | gfx_v9_0.o | 110 | gfx_v9_0.o |
110 | 111 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d0102cfc8efb..42f882c633ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -75,6 +75,7 @@ | |||
75 | #include "amdgpu_sdma.h" | 75 | #include "amdgpu_sdma.h" |
76 | #include "amdgpu_dm.h" | 76 | #include "amdgpu_dm.h" |
77 | #include "amdgpu_virt.h" | 77 | #include "amdgpu_virt.h" |
78 | #include "amdgpu_csa.h" | ||
78 | #include "amdgpu_gart.h" | 79 | #include "amdgpu_gart.h" |
79 | #include "amdgpu_debugfs.h" | 80 | #include "amdgpu_debugfs.h" |
80 | #include "amdgpu_job.h" | 81 | #include "amdgpu_job.h" |
@@ -151,6 +152,7 @@ extern int amdgpu_compute_multipipe; | |||
151 | extern int amdgpu_gpu_recovery; | 152 | extern int amdgpu_gpu_recovery; |
152 | extern int amdgpu_emu_mode; | 153 | extern int amdgpu_emu_mode; |
153 | extern uint amdgpu_smu_memory_pool_size; | 154 | extern uint amdgpu_smu_memory_pool_size; |
155 | extern uint amdgpu_dc_feature_mask; | ||
154 | extern struct amdgpu_mgpu_info mgpu_info; | 156 | extern struct amdgpu_mgpu_info mgpu_info; |
155 | 157 | ||
156 | #ifdef CONFIG_DRM_AMDGPU_SI | 158 | #ifdef CONFIG_DRM_AMDGPU_SI |
@@ -432,7 +434,7 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT | |||
432 | * default non-graphics QWORD index is 0xe0 - 0xFF inclusive | 434 | * default non-graphics QWORD index is 0xe0 - 0xFF inclusive |
433 | */ | 435 | */ |
434 | 436 | ||
435 | /* sDMA engines reserved from 0xe0 -oxef */ | 437 | /* sDMA engines reserved from 0xe0 -0xef */ |
436 | AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0, | 438 | AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0, |
437 | AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1, | 439 | AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1, |
438 | AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8, | 440 | AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8, |
@@ -830,7 +832,6 @@ struct amdgpu_device { | |||
830 | bool need_dma32; | 832 | bool need_dma32; |
831 | bool need_swiotlb; | 833 | bool need_swiotlb; |
832 | bool accel_working; | 834 | bool accel_working; |
833 | struct work_struct reset_work; | ||
834 | struct notifier_block acpi_nb; | 835 | struct notifier_block acpi_nb; |
835 | struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; | 836 | struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; |
836 | struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; | 837 | struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c31a8849e9f8..60f9a87e9c74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |||
@@ -144,7 +144,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | |||
144 | KGD_MAX_QUEUES); | 144 | KGD_MAX_QUEUES); |
145 | 145 | ||
146 | /* remove the KIQ bit as well */ | 146 | /* remove the KIQ bit as well */ |
147 | if (adev->gfx.kiq.ring.ready) | 147 | if (adev->gfx.kiq.ring.sched.ready) |
148 | clear_bit(amdgpu_gfx_queue_to_bit(adev, | 148 | clear_bit(amdgpu_gfx_queue_to_bit(adev, |
149 | adev->gfx.kiq.ring.me - 1, | 149 | adev->gfx.kiq.ring.me - 1, |
150 | adev->gfx.kiq.ring.pipe, | 150 | adev->gfx.kiq.ring.pipe, |
@@ -268,9 +268,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) | |||
268 | amdgpu_device_gpu_recover(adev, NULL); | 268 | amdgpu_device_gpu_recover(adev, NULL); |
269 | } | 269 | } |
270 | 270 | ||
271 | int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | 271 | int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
272 | void **mem_obj, uint64_t *gpu_addr, | 272 | void **mem_obj, uint64_t *gpu_addr, |
273 | void **cpu_ptr, bool mqd_gfx9) | 273 | void **cpu_ptr, bool mqd_gfx9) |
274 | { | 274 | { |
275 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 275 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
276 | struct amdgpu_bo *bo = NULL; | 276 | struct amdgpu_bo *bo = NULL; |
@@ -340,7 +340,7 @@ allocate_mem_reserve_bo_failed: | |||
340 | return r; | 340 | return r; |
341 | } | 341 | } |
342 | 342 | ||
343 | void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) | 343 | void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) |
344 | { | 344 | { |
345 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; | 345 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; |
346 | 346 | ||
@@ -351,8 +351,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) | |||
351 | amdgpu_bo_unref(&(bo)); | 351 | amdgpu_bo_unref(&(bo)); |
352 | } | 352 | } |
353 | 353 | ||
354 | void get_local_mem_info(struct kgd_dev *kgd, | 354 | void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, |
355 | struct kfd_local_mem_info *mem_info) | 355 | struct kfd_local_mem_info *mem_info) |
356 | { | 356 | { |
357 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 357 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
358 | uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : | 358 | uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : |
@@ -383,7 +383,7 @@ void get_local_mem_info(struct kgd_dev *kgd, | |||
383 | mem_info->mem_clk_max = 100; | 383 | mem_info->mem_clk_max = 100; |
384 | } | 384 | } |
385 | 385 | ||
386 | uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) | 386 | uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) |
387 | { | 387 | { |
388 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 388 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
389 | 389 | ||
@@ -392,7 +392,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) | |||
392 | return 0; | 392 | return 0; |
393 | } | 393 | } |
394 | 394 | ||
395 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) | 395 | uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) |
396 | { | 396 | { |
397 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 397 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
398 | 398 | ||
@@ -405,7 +405,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) | |||
405 | return 100; | 405 | return 100; |
406 | } | 406 | } |
407 | 407 | ||
408 | void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) | 408 | void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) |
409 | { | 409 | { |
410 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | 410 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
411 | struct amdgpu_cu_info acu_info = adev->gfx.cu_info; | 411 | struct amdgpu_cu_info acu_info = adev->gfx.cu_info; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8e0d4f7196b4..bcf587b4ba98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | |||
@@ -134,16 +134,16 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); | |||
134 | void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); | 134 | void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); |
135 | 135 | ||
136 | /* Shared API */ | 136 | /* Shared API */ |
137 | int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, | 137 | int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
138 | void **mem_obj, uint64_t *gpu_addr, | 138 | void **mem_obj, uint64_t *gpu_addr, |
139 | void **cpu_ptr, bool mqd_gfx9); | 139 | void **cpu_ptr, bool mqd_gfx9); |
140 | void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); | 140 | void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); |
141 | void get_local_mem_info(struct kgd_dev *kgd, | 141 | void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, |
142 | struct kfd_local_mem_info *mem_info); | 142 | struct kfd_local_mem_info *mem_info); |
143 | uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); | 143 | uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); |
144 | 144 | ||
145 | uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); | 145 | uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); |
146 | void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); | 146 | void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); |
147 | uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); | 147 | uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); |
148 | uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); | 148 | uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); |
149 | 149 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 244d9834a381..72a357dae070 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
@@ -173,13 +173,6 @@ static int get_tile_config(struct kgd_dev *kgd, | |||
173 | } | 173 | } |
174 | 174 | ||
175 | static const struct kfd2kgd_calls kfd2kgd = { | 175 | static const struct kfd2kgd_calls kfd2kgd = { |
176 | .init_gtt_mem_allocation = alloc_gtt_mem, | ||
177 | .free_gtt_mem = free_gtt_mem, | ||
178 | .get_local_mem_info = get_local_mem_info, | ||
179 | .get_gpu_clock_counter = get_gpu_clock_counter, | ||
180 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | ||
181 | .alloc_pasid = amdgpu_pasid_alloc, | ||
182 | .free_pasid = amdgpu_pasid_free, | ||
183 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 176 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
184 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 177 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
185 | .init_interrupts = kgd_init_interrupts, | 178 | .init_interrupts = kgd_init_interrupts, |
@@ -200,28 +193,10 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
200 | .get_fw_version = get_fw_version, | 193 | .get_fw_version = get_fw_version, |
201 | .set_scratch_backing_va = set_scratch_backing_va, | 194 | .set_scratch_backing_va = set_scratch_backing_va, |
202 | .get_tile_config = get_tile_config, | 195 | .get_tile_config = get_tile_config, |
203 | .get_cu_info = get_cu_info, | ||
204 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | ||
205 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
206 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
207 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
208 | .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, | ||
209 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
210 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | 196 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
211 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
212 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
213 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
214 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
215 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
216 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
217 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
218 | .invalidate_tlbs = invalidate_tlbs, | 197 | .invalidate_tlbs = invalidate_tlbs, |
219 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | 198 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, |
220 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
221 | .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, | ||
222 | .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, | 199 | .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, |
223 | .gpu_recover = amdgpu_amdkfd_gpu_reset, | ||
224 | .set_compute_idle = amdgpu_amdkfd_set_compute_idle | ||
225 | }; | 200 | }; |
226 | 201 | ||
227 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) | 202 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 9f149914ad6c..0e2a56b6a9b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | |||
@@ -128,13 +128,6 @@ static int get_tile_config(struct kgd_dev *kgd, | |||
128 | } | 128 | } |
129 | 129 | ||
130 | static const struct kfd2kgd_calls kfd2kgd = { | 130 | static const struct kfd2kgd_calls kfd2kgd = { |
131 | .init_gtt_mem_allocation = alloc_gtt_mem, | ||
132 | .free_gtt_mem = free_gtt_mem, | ||
133 | .get_local_mem_info = get_local_mem_info, | ||
134 | .get_gpu_clock_counter = get_gpu_clock_counter, | ||
135 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | ||
136 | .alloc_pasid = amdgpu_pasid_alloc, | ||
137 | .free_pasid = amdgpu_pasid_free, | ||
138 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 131 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
139 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 132 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
140 | .init_interrupts = kgd_init_interrupts, | 133 | .init_interrupts = kgd_init_interrupts, |
@@ -157,27 +150,9 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
157 | .get_fw_version = get_fw_version, | 150 | .get_fw_version = get_fw_version, |
158 | .set_scratch_backing_va = set_scratch_backing_va, | 151 | .set_scratch_backing_va = set_scratch_backing_va, |
159 | .get_tile_config = get_tile_config, | 152 | .get_tile_config = get_tile_config, |
160 | .get_cu_info = get_cu_info, | ||
161 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | ||
162 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
163 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
164 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
165 | .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, | ||
166 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
167 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | 153 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
168 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
169 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
170 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
171 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
172 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
173 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
174 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
175 | .invalidate_tlbs = invalidate_tlbs, | 154 | .invalidate_tlbs = invalidate_tlbs, |
176 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | 155 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, |
177 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
178 | .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, | ||
179 | .gpu_recover = amdgpu_amdkfd_gpu_reset, | ||
180 | .set_compute_idle = amdgpu_amdkfd_set_compute_idle | ||
181 | }; | 156 | }; |
182 | 157 | ||
183 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) | 158 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 42cb4c4e0929..03b604c96d94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | |||
@@ -46,38 +46,9 @@ | |||
46 | #include "v9_structs.h" | 46 | #include "v9_structs.h" |
47 | #include "soc15.h" | 47 | #include "soc15.h" |
48 | #include "soc15d.h" | 48 | #include "soc15d.h" |
49 | #include "mmhub_v1_0.h" | ||
50 | #include "gfxhub_v1_0.h" | ||
49 | 51 | ||
50 | /* HACK: MMHUB and GC both have VM-related register with the same | ||
51 | * names but different offsets. Define the MMHUB register we need here | ||
52 | * with a prefix. A proper solution would be to move the functions | ||
53 | * programming these registers into gfx_v9_0.c and mmhub_v1_0.c | ||
54 | * respectively. | ||
55 | */ | ||
56 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 | ||
57 | #define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 | ||
58 | |||
59 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 | ||
60 | #define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 | ||
61 | |||
62 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b | ||
63 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 | ||
64 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c | ||
65 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 | ||
66 | |||
67 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b | ||
68 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 | ||
69 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c | ||
70 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 | ||
71 | |||
72 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b | ||
73 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 | ||
74 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c | ||
75 | #define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 | ||
76 | |||
77 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 | ||
78 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 | ||
79 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 | ||
80 | #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 | ||
81 | 52 | ||
82 | #define V9_PIPE_PER_MEC (4) | 53 | #define V9_PIPE_PER_MEC (4) |
83 | #define V9_QUEUES_PER_PIPE_MEC (8) | 54 | #define V9_QUEUES_PER_PIPE_MEC (8) |
@@ -167,13 +138,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, | |||
167 | } | 138 | } |
168 | 139 | ||
169 | static const struct kfd2kgd_calls kfd2kgd = { | 140 | static const struct kfd2kgd_calls kfd2kgd = { |
170 | .init_gtt_mem_allocation = alloc_gtt_mem, | ||
171 | .free_gtt_mem = free_gtt_mem, | ||
172 | .get_local_mem_info = get_local_mem_info, | ||
173 | .get_gpu_clock_counter = get_gpu_clock_counter, | ||
174 | .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, | ||
175 | .alloc_pasid = amdgpu_pasid_alloc, | ||
176 | .free_pasid = amdgpu_pasid_free, | ||
177 | .program_sh_mem_settings = kgd_program_sh_mem_settings, | 141 | .program_sh_mem_settings = kgd_program_sh_mem_settings, |
178 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, | 142 | .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, |
179 | .init_interrupts = kgd_init_interrupts, | 143 | .init_interrupts = kgd_init_interrupts, |
@@ -196,26 +160,9 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
196 | .get_fw_version = get_fw_version, | 160 | .get_fw_version = get_fw_version, |
197 | .set_scratch_backing_va = set_scratch_backing_va, | 161 | .set_scratch_backing_va = set_scratch_backing_va, |
198 | .get_tile_config = amdgpu_amdkfd_get_tile_config, | 162 | .get_tile_config = amdgpu_amdkfd_get_tile_config, |
199 | .get_cu_info = get_cu_info, | ||
200 | .get_vram_usage = amdgpu_amdkfd_get_vram_usage, | ||
201 | .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, | ||
202 | .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, | ||
203 | .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, | ||
204 | .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, | ||
205 | .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, | ||
206 | .set_vm_context_page_table_base = set_vm_context_page_table_base, | 163 | .set_vm_context_page_table_base = set_vm_context_page_table_base, |
207 | .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, | ||
208 | .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, | ||
209 | .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, | ||
210 | .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, | ||
211 | .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, | ||
212 | .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, | ||
213 | .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, | ||
214 | .invalidate_tlbs = invalidate_tlbs, | 164 | .invalidate_tlbs = invalidate_tlbs, |
215 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | 165 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, |
216 | .submit_ib = amdgpu_amdkfd_submit_ib, | ||
217 | .gpu_recover = amdgpu_amdkfd_gpu_reset, | ||
218 | .set_compute_idle = amdgpu_amdkfd_set_compute_idle, | ||
219 | .get_hive_id = amdgpu_amdkfd_get_hive_id, | 166 | .get_hive_id = amdgpu_amdkfd_get_hive_id, |
220 | }; | 167 | }; |
221 | 168 | ||
@@ -785,15 +732,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, | |||
785 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | 732 | static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) |
786 | { | 733 | { |
787 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; | 734 | struct amdgpu_device *adev = (struct amdgpu_device *) kgd; |
788 | uint32_t req = (1 << vmid) | | ||
789 | (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ | ||
790 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | | ||
791 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | | ||
792 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | | ||
793 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | | ||
794 | VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; | ||
795 | |||
796 | mutex_lock(&adev->srbm_mutex); | ||
797 | 735 | ||
798 | /* Use legacy mode tlb invalidation. | 736 | /* Use legacy mode tlb invalidation. |
799 | * | 737 | * |
@@ -810,34 +748,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) | |||
810 | * TODO 2: support range-based invalidation, requires kfg2kgd | 748 | * TODO 2: support range-based invalidation, requires kfg2kgd |
811 | * interface change | 749 | * interface change |
812 | */ | 750 | */ |
813 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | 751 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); |
814 | 0xffffffff); | ||
815 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | ||
816 | 0x0000001f); | ||
817 | |||
818 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
819 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), | ||
820 | 0xffffffff); | ||
821 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
822 | mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), | ||
823 | 0x0000001f); | ||
824 | |||
825 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); | ||
826 | |||
827 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), | ||
828 | req); | ||
829 | |||
830 | while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & | ||
831 | (1 << vmid))) | ||
832 | cpu_relax(); | ||
833 | |||
834 | while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, | ||
835 | mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & | ||
836 | (1 << vmid))) | ||
837 | cpu_relax(); | ||
838 | |||
839 | mutex_unlock(&adev->srbm_mutex); | ||
840 | |||
841 | } | 752 | } |
842 | 753 | ||
843 | static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) | 754 | static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) |
@@ -876,7 +787,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) | |||
876 | if (adev->in_gpu_reset) | 787 | if (adev->in_gpu_reset) |
877 | return -EIO; | 788 | return -EIO; |
878 | 789 | ||
879 | if (ring->ready) | 790 | if (ring->sched.ready) |
880 | return invalidate_tlbs_with_kiq(adev, pasid); | 791 | return invalidate_tlbs_with_kiq(adev, pasid); |
881 | 792 | ||
882 | for (vmid = 0; vmid < 16; vmid++) { | 793 | for (vmid = 0; vmid < 16; vmid++) { |
@@ -1016,7 +927,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |||
1016 | uint64_t page_table_base) | 927 | uint64_t page_table_base) |
1017 | { | 928 | { |
1018 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | 929 | struct amdgpu_device *adev = get_amdgpu_device(kgd); |
1019 | uint64_t base = page_table_base | AMDGPU_PTE_VALID; | ||
1020 | 930 | ||
1021 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { | 931 | if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { |
1022 | pr_err("trying to set page table base for wrong VMID %u\n", | 932 | pr_err("trying to set page table base for wrong VMID %u\n", |
@@ -1028,25 +938,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |||
1028 | * now, all processes share the same address space size, like | 938 | * now, all processes share the same address space size, like |
1029 | * on GFX8 and older. | 939 | * on GFX8 and older. |
1030 | */ | 940 | */ |
1031 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | 941 | mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
1032 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | ||
1033 | |||
1034 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | ||
1035 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | ||
1036 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | ||
1037 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | ||
1038 | |||
1039 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | ||
1040 | WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | ||
1041 | |||
1042 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); | ||
1043 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); | ||
1044 | |||
1045 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), | ||
1046 | lower_32_bits(adev->vm_manager.max_pfn - 1)); | ||
1047 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), | ||
1048 | upper_32_bits(adev->vm_manager.max_pfn - 1)); | ||
1049 | 942 | ||
1050 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); | 943 | gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); |
1051 | WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); | ||
1052 | } | 944 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 8816c697b205..ceadeeadfa56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | |||
@@ -330,7 +330,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
330 | case CHIP_TOPAZ: | 330 | case CHIP_TOPAZ: |
331 | if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || | 331 | if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || |
332 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || | 332 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || |
333 | ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) { | 333 | ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) || |
334 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || | ||
335 | ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { | ||
334 | info->is_kicker = true; | 336 | info->is_kicker = true; |
335 | strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); | 337 | strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); |
336 | } else | 338 | } else |
@@ -351,7 +353,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
351 | if (type == CGS_UCODE_ID_SMU) { | 353 | if (type == CGS_UCODE_ID_SMU) { |
352 | if (((adev->pdev->device == 0x67ef) && | 354 | if (((adev->pdev->device == 0x67ef) && |
353 | ((adev->pdev->revision == 0xe0) || | 355 | ((adev->pdev->revision == 0xe0) || |
354 | (adev->pdev->revision == 0xe2) || | ||
355 | (adev->pdev->revision == 0xe5))) || | 356 | (adev->pdev->revision == 0xe5))) || |
356 | ((adev->pdev->device == 0x67ff) && | 357 | ((adev->pdev->device == 0x67ff) && |
357 | ((adev->pdev->revision == 0xcf) || | 358 | ((adev->pdev->revision == 0xcf) || |
@@ -359,8 +360,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
359 | (adev->pdev->revision == 0xff)))) { | 360 | (adev->pdev->revision == 0xff)))) { |
360 | info->is_kicker = true; | 361 | info->is_kicker = true; |
361 | strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); | 362 | strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); |
362 | } else | 363 | } else if ((adev->pdev->device == 0x67ef) && |
364 | (adev->pdev->revision == 0xe2)) { | ||
365 | info->is_kicker = true; | ||
366 | strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); | ||
367 | } else { | ||
363 | strcpy(fw_name, "amdgpu/polaris11_smc.bin"); | 368 | strcpy(fw_name, "amdgpu/polaris11_smc.bin"); |
369 | } | ||
364 | } else if (type == CGS_UCODE_ID_SMU_SK) { | 370 | } else if (type == CGS_UCODE_ID_SMU_SK) { |
365 | strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); | 371 | strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); |
366 | } | 372 | } |
@@ -378,14 +384,31 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, | |||
378 | (adev->pdev->revision == 0xef))) { | 384 | (adev->pdev->revision == 0xef))) { |
379 | info->is_kicker = true; | 385 | info->is_kicker = true; |
380 | strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); | 386 | strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); |
381 | } else | 387 | } else if ((adev->pdev->device == 0x67df) && |
388 | ((adev->pdev->revision == 0xe1) || | ||
389 | (adev->pdev->revision == 0xf7))) { | ||
390 | info->is_kicker = true; | ||
391 | strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); | ||
392 | } else { | ||
382 | strcpy(fw_name, "amdgpu/polaris10_smc.bin"); | 393 | strcpy(fw_name, "amdgpu/polaris10_smc.bin"); |
394 | } | ||
383 | } else if (type == CGS_UCODE_ID_SMU_SK) { | 395 | } else if (type == CGS_UCODE_ID_SMU_SK) { |
384 | strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); | 396 | strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); |
385 | } | 397 | } |
386 | break; | 398 | break; |
387 | case CHIP_POLARIS12: | 399 | case CHIP_POLARIS12: |
388 | strcpy(fw_name, "amdgpu/polaris12_smc.bin"); | 400 | if (((adev->pdev->device == 0x6987) && |
401 | ((adev->pdev->revision == 0xc0) || | ||
402 | (adev->pdev->revision == 0xc3))) || | ||
403 | ((adev->pdev->device == 0x6981) && | ||
404 | ((adev->pdev->revision == 0x00) || | ||
405 | (adev->pdev->revision == 0x01) || | ||
406 | (adev->pdev->revision == 0x10)))) { | ||
407 | info->is_kicker = true; | ||
408 | strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); | ||
409 | } else { | ||
410 | strcpy(fw_name, "amdgpu/polaris12_smc.bin"); | ||
411 | } | ||
389 | break; | 412 | break; |
390 | case CHIP_VEGAM: | 413 | case CHIP_VEGAM: |
391 | strcpy(fw_name, "amdgpu/vegam_smc.bin"); | 414 | strcpy(fw_name, "amdgpu/vegam_smc.bin"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 35bc8fc3bc70..024dfbd87f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -1260,8 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, | |||
1260 | return 0; | 1260 | return 0; |
1261 | 1261 | ||
1262 | error_abort: | 1262 | error_abort: |
1263 | dma_fence_put(&job->base.s_fence->finished); | 1263 | drm_sched_job_cleanup(&job->base); |
1264 | job->base.s_fence = NULL; | ||
1265 | amdgpu_mn_unlock(p->mn); | 1264 | amdgpu_mn_unlock(p->mn); |
1266 | 1265 | ||
1267 | error_unlock: | 1266 | error_unlock: |
@@ -1285,7 +1284,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
1285 | 1284 | ||
1286 | r = amdgpu_cs_parser_init(&parser, data); | 1285 | r = amdgpu_cs_parser_init(&parser, data); |
1287 | if (r) { | 1286 | if (r) { |
1288 | DRM_ERROR("Failed to initialize parser !\n"); | 1287 | DRM_ERROR("Failed to initialize parser %d!\n", r); |
1289 | goto out; | 1288 | goto out; |
1290 | } | 1289 | } |
1291 | 1290 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c new file mode 100644 index 000000000000..0c590ddf250a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | |||
22 | * * Author: Monk.liu@amd.com | ||
23 | */ | ||
24 | |||
25 | #include "amdgpu.h" | ||
26 | |||
27 | uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) | ||
28 | { | ||
29 | uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; | ||
30 | |||
31 | addr -= AMDGPU_VA_RESERVED_SIZE; | ||
32 | addr = amdgpu_gmc_sign_extend(addr); | ||
33 | |||
34 | return addr; | ||
35 | } | ||
36 | |||
37 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, | ||
38 | u32 domain, uint32_t size) | ||
39 | { | ||
40 | int r; | ||
41 | void *ptr; | ||
42 | |||
43 | r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, | ||
44 | domain, bo, | ||
45 | NULL, &ptr); | ||
46 | if (!bo) | ||
47 | return -ENOMEM; | ||
48 | |||
49 | memset(ptr, 0, size); | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | void amdgpu_free_static_csa(struct amdgpu_bo **bo) | ||
54 | { | ||
55 | amdgpu_bo_free_kernel(bo, NULL, NULL); | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * amdgpu_map_static_csa should be called during amdgpu_vm_init | ||
60 | * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command | ||
61 | * submission of GFX should use this virtual address within META_DATA init | ||
62 | * package to support SRIOV gfx preemption. | ||
63 | */ | ||
64 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||
65 | struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, | ||
66 | uint64_t csa_addr, uint32_t size) | ||
67 | { | ||
68 | struct ww_acquire_ctx ticket; | ||
69 | struct list_head list; | ||
70 | struct amdgpu_bo_list_entry pd; | ||
71 | struct ttm_validate_buffer csa_tv; | ||
72 | int r; | ||
73 | |||
74 | INIT_LIST_HEAD(&list); | ||
75 | INIT_LIST_HEAD(&csa_tv.head); | ||
76 | csa_tv.bo = &bo->tbo; | ||
77 | csa_tv.shared = true; | ||
78 | |||
79 | list_add(&csa_tv.head, &list); | ||
80 | amdgpu_vm_get_pd_bo(vm, &list, &pd); | ||
81 | |||
82 | r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); | ||
83 | if (r) { | ||
84 | DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); | ||
85 | return r; | ||
86 | } | ||
87 | |||
88 | *bo_va = amdgpu_vm_bo_add(adev, vm, bo); | ||
89 | if (!*bo_va) { | ||
90 | ttm_eu_backoff_reservation(&ticket, &list); | ||
91 | DRM_ERROR("failed to create bo_va for static CSA\n"); | ||
92 | return -ENOMEM; | ||
93 | } | ||
94 | |||
95 | r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, | ||
96 | size); | ||
97 | if (r) { | ||
98 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); | ||
99 | amdgpu_vm_bo_rmv(adev, *bo_va); | ||
100 | ttm_eu_backoff_reservation(&ticket, &list); | ||
101 | return r; | ||
102 | } | ||
103 | |||
104 | r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, | ||
105 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | | ||
106 | AMDGPU_PTE_EXECUTABLE); | ||
107 | |||
108 | if (r) { | ||
109 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); | ||
110 | amdgpu_vm_bo_rmv(adev, *bo_va); | ||
111 | ttm_eu_backoff_reservation(&ticket, &list); | ||
112 | return r; | ||
113 | } | ||
114 | |||
115 | ttm_eu_backoff_reservation(&ticket, &list); | ||
116 | return 0; | ||
117 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h new file mode 100644 index 000000000000..524b4437a021 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright 2016 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | * Author: Monk.liu@amd.com | ||
23 | */ | ||
24 | |||
25 | #ifndef AMDGPU_CSA_MANAGER_H | ||
26 | #define AMDGPU_CSA_MANAGER_H | ||
27 | |||
28 | #define AMDGPU_CSA_SIZE (128 * 1024) | ||
29 | |||
30 | uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev); | ||
31 | uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); | ||
32 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, | ||
33 | u32 domain, uint32_t size); | ||
34 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||
35 | struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, | ||
36 | uint64_t csa_addr, uint32_t size); | ||
37 | void amdgpu_free_static_csa(struct amdgpu_bo **bo); | ||
38 | |||
39 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 30bc345d6fdf..590588a82471 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -1656,7 +1656,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1656 | 1656 | ||
1657 | /* right after GMC hw init, we create CSA */ | 1657 | /* right after GMC hw init, we create CSA */ |
1658 | if (amdgpu_sriov_vf(adev)) { | 1658 | if (amdgpu_sriov_vf(adev)) { |
1659 | r = amdgpu_allocate_static_csa(adev); | 1659 | r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, |
1660 | AMDGPU_GEM_DOMAIN_VRAM, | ||
1661 | AMDGPU_CSA_SIZE); | ||
1660 | if (r) { | 1662 | if (r) { |
1661 | DRM_ERROR("allocate CSA failed %d\n", r); | 1663 | DRM_ERROR("allocate CSA failed %d\n", r); |
1662 | return r; | 1664 | return r; |
@@ -1681,7 +1683,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1681 | if (r) | 1683 | if (r) |
1682 | return r; | 1684 | return r; |
1683 | 1685 | ||
1684 | amdgpu_xgmi_add_device(adev); | 1686 | if (adev->gmc.xgmi.num_physical_nodes > 1) |
1687 | amdgpu_xgmi_add_device(adev); | ||
1685 | amdgpu_amdkfd_device_init(adev); | 1688 | amdgpu_amdkfd_device_init(adev); |
1686 | 1689 | ||
1687 | if (amdgpu_sriov_vf(adev)) | 1690 | if (amdgpu_sriov_vf(adev)) |
@@ -1890,7 +1893,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) | |||
1890 | 1893 | ||
1891 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { | 1894 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { |
1892 | amdgpu_ucode_free_bo(adev); | 1895 | amdgpu_ucode_free_bo(adev); |
1893 | amdgpu_free_static_csa(adev); | 1896 | amdgpu_free_static_csa(&adev->virt.csa_obj); |
1894 | amdgpu_device_wb_fini(adev); | 1897 | amdgpu_device_wb_fini(adev); |
1895 | amdgpu_device_vram_scratch_fini(adev); | 1898 | amdgpu_device_vram_scratch_fini(adev); |
1896 | } | 1899 | } |
@@ -3295,13 +3298,35 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) | |||
3295 | return false; | 3298 | return false; |
3296 | } | 3299 | } |
3297 | 3300 | ||
3298 | if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && | 3301 | if (amdgpu_gpu_recovery == 0) |
3299 | !amdgpu_sriov_vf(adev))) { | 3302 | goto disabled; |
3300 | DRM_INFO("GPU recovery disabled.\n"); | 3303 | |
3301 | return false; | 3304 | if (amdgpu_sriov_vf(adev)) |
3305 | return true; | ||
3306 | |||
3307 | if (amdgpu_gpu_recovery == -1) { | ||
3308 | switch (adev->asic_type) { | ||
3309 | case CHIP_TOPAZ: | ||
3310 | case CHIP_TONGA: | ||
3311 | case CHIP_FIJI: | ||
3312 | case CHIP_POLARIS10: | ||
3313 | case CHIP_POLARIS11: | ||
3314 | case CHIP_POLARIS12: | ||
3315 | case CHIP_VEGAM: | ||
3316 | case CHIP_VEGA20: | ||
3317 | case CHIP_VEGA10: | ||
3318 | case CHIP_VEGA12: | ||
3319 | break; | ||
3320 | default: | ||
3321 | goto disabled; | ||
3322 | } | ||
3302 | } | 3323 | } |
3303 | 3324 | ||
3304 | return true; | 3325 | return true; |
3326 | |||
3327 | disabled: | ||
3328 | DRM_INFO("GPU recovery disabled.\n"); | ||
3329 | return false; | ||
3305 | } | 3330 | } |
3306 | 3331 | ||
3307 | /** | 3332 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 943dbf3c5da1..8de55f7f1a3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -127,6 +127,9 @@ int amdgpu_compute_multipipe = -1; | |||
127 | int amdgpu_gpu_recovery = -1; /* auto */ | 127 | int amdgpu_gpu_recovery = -1; /* auto */ |
128 | int amdgpu_emu_mode = 0; | 128 | int amdgpu_emu_mode = 0; |
129 | uint amdgpu_smu_memory_pool_size = 0; | 129 | uint amdgpu_smu_memory_pool_size = 0; |
130 | /* FBC (bit 0) disabled by default*/ | ||
131 | uint amdgpu_dc_feature_mask = 0; | ||
132 | |||
130 | struct amdgpu_mgpu_info mgpu_info = { | 133 | struct amdgpu_mgpu_info mgpu_info = { |
131 | .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), | 134 | .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), |
132 | }; | 135 | }; |
@@ -631,6 +634,14 @@ module_param(halt_if_hws_hang, int, 0644); | |||
631 | MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); | 634 | MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); |
632 | #endif | 635 | #endif |
633 | 636 | ||
637 | /** | ||
638 | * DOC: dcfeaturemask (uint) | ||
639 | * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. | ||
640 | * The default is the current set of stable display features. | ||
641 | */ | ||
642 | MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))"); | ||
643 | module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); | ||
644 | |||
634 | static const struct pci_device_id pciidlist[] = { | 645 | static const struct pci_device_id pciidlist[] = { |
635 | #ifdef CONFIG_DRM_AMDGPU_SI | 646 | #ifdef CONFIG_DRM_AMDGPU_SI |
636 | {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, | 647 | {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5448cf27654e..ee47c11e92ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
398 | ring->fence_drv.irq_type = irq_type; | 398 | ring->fence_drv.irq_type = irq_type; |
399 | ring->fence_drv.initialized = true; | 399 | ring->fence_drv.initialized = true; |
400 | 400 | ||
401 | dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " | 401 | DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " |
402 | "cpu addr 0x%p\n", ring->idx, | 402 | "0x%016llx, cpu addr 0x%p\n", ring->name, |
403 | ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); | 403 | ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); |
404 | return 0; | 404 | return 0; |
405 | } | 405 | } |
406 | 406 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 11fea28f8ad3..6d11e1721147 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | |||
@@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | |||
248 | } | 248 | } |
249 | mb(); | 249 | mb(); |
250 | amdgpu_asic_flush_hdp(adev, NULL); | 250 | amdgpu_asic_flush_hdp(adev, NULL); |
251 | amdgpu_gmc_flush_gpu_tlb(adev, 0); | 251 | amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); |
252 | return 0; | 252 | return 0; |
253 | } | 253 | } |
254 | 254 | ||
@@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, | |||
259 | * @offset: offset into the GPU's gart aperture | 259 | * @offset: offset into the GPU's gart aperture |
260 | * @pages: number of pages to bind | 260 | * @pages: number of pages to bind |
261 | * @dma_addr: DMA addresses of pages | 261 | * @dma_addr: DMA addresses of pages |
262 | * @flags: page table entry flags | ||
263 | * @dst: CPU address of the gart table | ||
262 | * | 264 | * |
263 | * Map the dma_addresses into GART entries (all asics). | 265 | * Map the dma_addresses into GART entries (all asics). |
264 | * Returns 0 for success, -EINVAL for failure. | 266 | * Returns 0 for success, -EINVAL for failure. |
@@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, | |||
331 | 333 | ||
332 | mb(); | 334 | mb(); |
333 | amdgpu_asic_flush_hdp(adev, NULL); | 335 | amdgpu_asic_flush_hdp(adev, NULL); |
334 | amdgpu_gmc_flush_gpu_tlb(adev, 0); | 336 | amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); |
335 | return 0; | 337 | return 0; |
336 | } | 338 | } |
337 | 339 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 9ff62887e4e3..afa2e2877d87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | |||
@@ -41,6 +41,7 @@ struct amdgpu_bo; | |||
41 | 41 | ||
42 | struct amdgpu_gart { | 42 | struct amdgpu_gart { |
43 | struct amdgpu_bo *bo; | 43 | struct amdgpu_bo *bo; |
44 | /* CPU kmapped address of gart table */ | ||
44 | void *ptr; | 45 | void *ptr; |
45 | unsigned num_gpu_pages; | 46 | unsigned num_gpu_pages; |
46 | unsigned num_cpu_pages; | 47 | unsigned num_cpu_pages; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1a656b8657f7..6a70c0b7105f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <drm/drmP.h> | 25 | #include <drm/drmP.h> |
26 | #include "amdgpu.h" | 26 | #include "amdgpu.h" |
27 | #include "amdgpu_gfx.h" | 27 | #include "amdgpu_gfx.h" |
28 | #include "amdgpu_rlc.h" | ||
28 | 29 | ||
29 | /* delay 0.1 second to enable gfx off feature */ | 30 | /* delay 0.1 second to enable gfx off feature */ |
30 | #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) | 31 | #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index b61b5c11aead..f790e15bcd08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | |||
@@ -29,6 +29,7 @@ | |||
29 | */ | 29 | */ |
30 | #include "clearstate_defs.h" | 30 | #include "clearstate_defs.h" |
31 | #include "amdgpu_ring.h" | 31 | #include "amdgpu_ring.h" |
32 | #include "amdgpu_rlc.h" | ||
32 | 33 | ||
33 | /* GFX current status */ | 34 | /* GFX current status */ |
34 | #define AMDGPU_GFX_NORMAL_MODE 0x00000000L | 35 | #define AMDGPU_GFX_NORMAL_MODE 0x00000000L |
@@ -37,59 +38,6 @@ | |||
37 | #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L | 38 | #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L |
38 | #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L | 39 | #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L |
39 | 40 | ||
40 | |||
41 | struct amdgpu_rlc_funcs { | ||
42 | void (*enter_safe_mode)(struct amdgpu_device *adev); | ||
43 | void (*exit_safe_mode)(struct amdgpu_device *adev); | ||
44 | }; | ||
45 | |||
46 | struct amdgpu_rlc { | ||
47 | /* for power gating */ | ||
48 | struct amdgpu_bo *save_restore_obj; | ||
49 | uint64_t save_restore_gpu_addr; | ||
50 | volatile uint32_t *sr_ptr; | ||
51 | const u32 *reg_list; | ||
52 | u32 reg_list_size; | ||
53 | /* for clear state */ | ||
54 | struct amdgpu_bo *clear_state_obj; | ||
55 | uint64_t clear_state_gpu_addr; | ||
56 | volatile uint32_t *cs_ptr; | ||
57 | const struct cs_section_def *cs_data; | ||
58 | u32 clear_state_size; | ||
59 | /* for cp tables */ | ||
60 | struct amdgpu_bo *cp_table_obj; | ||
61 | uint64_t cp_table_gpu_addr; | ||
62 | volatile uint32_t *cp_table_ptr; | ||
63 | u32 cp_table_size; | ||
64 | |||
65 | /* safe mode for updating CG/PG state */ | ||
66 | bool in_safe_mode; | ||
67 | const struct amdgpu_rlc_funcs *funcs; | ||
68 | |||
69 | /* for firmware data */ | ||
70 | u32 save_and_restore_offset; | ||
71 | u32 clear_state_descriptor_offset; | ||
72 | u32 avail_scratch_ram_locations; | ||
73 | u32 reg_restore_list_size; | ||
74 | u32 reg_list_format_start; | ||
75 | u32 reg_list_format_separate_start; | ||
76 | u32 starting_offsets_start; | ||
77 | u32 reg_list_format_size_bytes; | ||
78 | u32 reg_list_size_bytes; | ||
79 | u32 reg_list_format_direct_reg_list_length; | ||
80 | u32 save_restore_list_cntl_size_bytes; | ||
81 | u32 save_restore_list_gpm_size_bytes; | ||
82 | u32 save_restore_list_srm_size_bytes; | ||
83 | |||
84 | u32 *register_list_format; | ||
85 | u32 *register_restore; | ||
86 | u8 *save_restore_list_cntl; | ||
87 | u8 *save_restore_list_gpm; | ||
88 | u8 *save_restore_list_srm; | ||
89 | |||
90 | bool is_rlc_v2_1; | ||
91 | }; | ||
92 | |||
93 | #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES | 41 | #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES |
94 | 42 | ||
95 | struct amdgpu_mec { | 43 | struct amdgpu_mec { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6fa7ef446e46..8c57924c075f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | |||
@@ -64,7 +64,7 @@ struct amdgpu_vmhub { | |||
64 | struct amdgpu_gmc_funcs { | 64 | struct amdgpu_gmc_funcs { |
65 | /* flush the vm tlb via mmio */ | 65 | /* flush the vm tlb via mmio */ |
66 | void (*flush_gpu_tlb)(struct amdgpu_device *adev, | 66 | void (*flush_gpu_tlb)(struct amdgpu_device *adev, |
67 | uint32_t vmid); | 67 | uint32_t vmid, uint32_t flush_type); |
68 | /* flush the vm tlb via ring */ | 68 | /* flush the vm tlb via ring */ |
69 | uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, | 69 | uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, |
70 | uint64_t pd_addr); | 70 | uint64_t pd_addr); |
@@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs { | |||
89 | 89 | ||
90 | struct amdgpu_xgmi { | 90 | struct amdgpu_xgmi { |
91 | /* from psp */ | 91 | /* from psp */ |
92 | u64 device_id; | 92 | u64 node_id; |
93 | u64 hive_id; | 93 | u64 hive_id; |
94 | /* fixed per family */ | 94 | /* fixed per family */ |
95 | u64 node_segment_size; | 95 | u64 node_segment_size; |
@@ -151,7 +151,7 @@ struct amdgpu_gmc { | |||
151 | struct amdgpu_xgmi xgmi; | 151 | struct amdgpu_xgmi xgmi; |
152 | }; | 152 | }; |
153 | 153 | ||
154 | #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) | 154 | #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) |
155 | #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) | 155 | #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) |
156 | #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) | 156 | #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) |
157 | #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) | 157 | #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b8963b725dfa..c48207b377bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
146 | fence_ctx = 0; | 146 | fence_ctx = 0; |
147 | } | 147 | } |
148 | 148 | ||
149 | if (!ring->ready) { | 149 | if (!ring->sched.ready) { |
150 | dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); | 150 | dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); |
151 | return -EINVAL; | 151 | return -EINVAL; |
152 | } | 152 | } |
@@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
221 | !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ | 221 | !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ |
222 | continue; | 222 | continue; |
223 | 223 | ||
224 | amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, | 224 | amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch); |
225 | need_ctx_switch); | ||
226 | need_ctx_switch = false; | 225 | need_ctx_switch = false; |
227 | } | 226 | } |
228 | 227 | ||
@@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) | |||
347 | tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; | 346 | tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; |
348 | } | 347 | } |
349 | 348 | ||
350 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { | 349 | for (i = 0; i < adev->num_rings; ++i) { |
351 | struct amdgpu_ring *ring = adev->rings[i]; | 350 | struct amdgpu_ring *ring = adev->rings[i]; |
352 | long tmo; | 351 | long tmo; |
353 | 352 | ||
354 | if (!ring || !ring->ready) | 353 | /* KIQ rings don't have an IB test because we never submit IBs |
355 | continue; | 354 | * to them and they have no interrupt support. |
356 | |||
357 | /* skip IB tests for KIQ in general for the below reasons: | ||
358 | * 1. We never submit IBs to the KIQ | ||
359 | * 2. KIQ doesn't use the EOP interrupts, | ||
360 | * we use some other CP interrupt. | ||
361 | */ | 355 | */ |
362 | if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) | 356 | if (!ring->sched.ready || !ring->funcs->test_ib) |
363 | continue; | 357 | continue; |
364 | 358 | ||
365 | /* MM engine need more time */ | 359 | /* MM engine need more time */ |
@@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) | |||
374 | tmo = tmo_gfx; | 368 | tmo = tmo_gfx; |
375 | 369 | ||
376 | r = amdgpu_ring_test_ib(ring, tmo); | 370 | r = amdgpu_ring_test_ib(ring, tmo); |
377 | if (r) { | 371 | if (!r) { |
378 | ring->ready = false; | 372 | DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", |
379 | 373 | ring->name); | |
380 | if (ring == &adev->gfx.gfx_ring[0]) { | 374 | continue; |
381 | /* oh, oh, that's really bad */ | 375 | } |
382 | DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); | 376 | |
383 | adev->accel_working = false; | 377 | ring->sched.ready = false; |
384 | return r; | 378 | DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n", |
385 | 379 | ring->name, r); | |
386 | } else { | 380 | |
387 | /* still not good, but we can live with it */ | 381 | if (ring == &adev->gfx.gfx_ring[0]) { |
388 | DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); | 382 | /* oh, oh, that's really bad */ |
389 | ret = r; | 383 | adev->accel_working = false; |
390 | } | 384 | return r; |
385 | |||
386 | } else { | ||
387 | ret = r; | ||
391 | } | 388 | } |
392 | } | 389 | } |
393 | return ret; | 390 | return ret; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 52c17f6219a7..6b6524f04ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | |||
@@ -94,23 +94,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /** | 96 | /** |
97 | * amdgpu_irq_reset_work_func - execute GPU reset | ||
98 | * | ||
99 | * @work: work struct pointer | ||
100 | * | ||
101 | * Execute scheduled GPU reset (Cayman+). | ||
102 | * This function is called when the IRQ handler thinks we need a GPU reset. | ||
103 | */ | ||
104 | static void amdgpu_irq_reset_work_func(struct work_struct *work) | ||
105 | { | ||
106 | struct amdgpu_device *adev = container_of(work, struct amdgpu_device, | ||
107 | reset_work); | ||
108 | |||
109 | if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev)) | ||
110 | amdgpu_device_gpu_recover(adev, NULL); | ||
111 | } | ||
112 | |||
113 | /** | ||
114 | * amdgpu_irq_disable_all - disable *all* interrupts | 97 | * amdgpu_irq_disable_all - disable *all* interrupts |
115 | * | 98 | * |
116 | * @adev: amdgpu device pointer | 99 | * @adev: amdgpu device pointer |
@@ -262,15 +245,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev) | |||
262 | amdgpu_hotplug_work_func); | 245 | amdgpu_hotplug_work_func); |
263 | } | 246 | } |
264 | 247 | ||
265 | INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); | ||
266 | |||
267 | adev->irq.installed = true; | 248 | adev->irq.installed = true; |
268 | r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); | 249 | r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); |
269 | if (r) { | 250 | if (r) { |
270 | adev->irq.installed = false; | 251 | adev->irq.installed = false; |
271 | if (!amdgpu_device_has_dc_support(adev)) | 252 | if (!amdgpu_device_has_dc_support(adev)) |
272 | flush_work(&adev->hotplug_work); | 253 | flush_work(&adev->hotplug_work); |
273 | cancel_work_sync(&adev->reset_work); | ||
274 | return r; | 254 | return r; |
275 | } | 255 | } |
276 | adev->ddev->max_vblank_count = 0x00ffffff; | 256 | adev->ddev->max_vblank_count = 0x00ffffff; |
@@ -299,7 +279,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) | |||
299 | pci_disable_msi(adev->pdev); | 279 | pci_disable_msi(adev->pdev); |
300 | if (!amdgpu_device_has_dc_support(adev)) | 280 | if (!amdgpu_device_has_dc_support(adev)) |
301 | flush_work(&adev->hotplug_work); | 281 | flush_work(&adev->hotplug_work); |
302 | cancel_work_sync(&adev->reset_work); | ||
303 | } | 282 | } |
304 | 283 | ||
305 | for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { | 284 | for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 755f733bf0d9..e0af44fd6a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) | |||
112 | struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); | 112 | struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); |
113 | struct amdgpu_job *job = to_amdgpu_job(s_job); | 113 | struct amdgpu_job *job = to_amdgpu_job(s_job); |
114 | 114 | ||
115 | drm_sched_job_cleanup(s_job); | ||
116 | |||
115 | amdgpu_ring_priority_put(ring, s_job->s_priority); | 117 | amdgpu_ring_priority_put(ring, s_job->s_priority); |
116 | dma_fence_put(job->fence); | 118 | dma_fence_put(job->fence); |
117 | amdgpu_sync_free(&job->sync); | 119 | amdgpu_sync_free(&job->sync); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 57cfe78a262b..e1b46a6703de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | |||
@@ -33,6 +33,8 @@ | |||
33 | #define to_amdgpu_job(sched_job) \ | 33 | #define to_amdgpu_job(sched_job) \ |
34 | container_of((sched_job), struct amdgpu_job, base) | 34 | container_of((sched_job), struct amdgpu_job, base) |
35 | 35 | ||
36 | #define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0) | ||
37 | |||
36 | struct amdgpu_fence; | 38 | struct amdgpu_fence; |
37 | 39 | ||
38 | struct amdgpu_job { | 40 | struct amdgpu_job { |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 81732a84c2ab..9b3164c0f861 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | |||
@@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
336 | case AMDGPU_HW_IP_GFX: | 336 | case AMDGPU_HW_IP_GFX: |
337 | type = AMD_IP_BLOCK_TYPE_GFX; | 337 | type = AMD_IP_BLOCK_TYPE_GFX; |
338 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) | 338 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
339 | if (adev->gfx.gfx_ring[i].ready) | 339 | if (adev->gfx.gfx_ring[i].sched.ready) |
340 | ++num_rings; | 340 | ++num_rings; |
341 | ib_start_alignment = 32; | 341 | ib_start_alignment = 32; |
342 | ib_size_alignment = 32; | 342 | ib_size_alignment = 32; |
@@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
344 | case AMDGPU_HW_IP_COMPUTE: | 344 | case AMDGPU_HW_IP_COMPUTE: |
345 | type = AMD_IP_BLOCK_TYPE_GFX; | 345 | type = AMD_IP_BLOCK_TYPE_GFX; |
346 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 346 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
347 | if (adev->gfx.compute_ring[i].ready) | 347 | if (adev->gfx.compute_ring[i].sched.ready) |
348 | ++num_rings; | 348 | ++num_rings; |
349 | ib_start_alignment = 32; | 349 | ib_start_alignment = 32; |
350 | ib_size_alignment = 32; | 350 | ib_size_alignment = 32; |
@@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
352 | case AMDGPU_HW_IP_DMA: | 352 | case AMDGPU_HW_IP_DMA: |
353 | type = AMD_IP_BLOCK_TYPE_SDMA; | 353 | type = AMD_IP_BLOCK_TYPE_SDMA; |
354 | for (i = 0; i < adev->sdma.num_instances; i++) | 354 | for (i = 0; i < adev->sdma.num_instances; i++) |
355 | if (adev->sdma.instance[i].ring.ready) | 355 | if (adev->sdma.instance[i].ring.sched.ready) |
356 | ++num_rings; | 356 | ++num_rings; |
357 | ib_start_alignment = 256; | 357 | ib_start_alignment = 256; |
358 | ib_size_alignment = 4; | 358 | ib_size_alignment = 4; |
@@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
363 | if (adev->uvd.harvest_config & (1 << i)) | 363 | if (adev->uvd.harvest_config & (1 << i)) |
364 | continue; | 364 | continue; |
365 | 365 | ||
366 | if (adev->uvd.inst[i].ring.ready) | 366 | if (adev->uvd.inst[i].ring.sched.ready) |
367 | ++num_rings; | 367 | ++num_rings; |
368 | } | 368 | } |
369 | ib_start_alignment = 64; | 369 | ib_start_alignment = 64; |
@@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
372 | case AMDGPU_HW_IP_VCE: | 372 | case AMDGPU_HW_IP_VCE: |
373 | type = AMD_IP_BLOCK_TYPE_VCE; | 373 | type = AMD_IP_BLOCK_TYPE_VCE; |
374 | for (i = 0; i < adev->vce.num_rings; i++) | 374 | for (i = 0; i < adev->vce.num_rings; i++) |
375 | if (adev->vce.ring[i].ready) | 375 | if (adev->vce.ring[i].sched.ready) |
376 | ++num_rings; | 376 | ++num_rings; |
377 | ib_start_alignment = 4; | 377 | ib_start_alignment = 4; |
378 | ib_size_alignment = 1; | 378 | ib_size_alignment = 1; |
@@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
384 | continue; | 384 | continue; |
385 | 385 | ||
386 | for (j = 0; j < adev->uvd.num_enc_rings; j++) | 386 | for (j = 0; j < adev->uvd.num_enc_rings; j++) |
387 | if (adev->uvd.inst[i].ring_enc[j].ready) | 387 | if (adev->uvd.inst[i].ring_enc[j].sched.ready) |
388 | ++num_rings; | 388 | ++num_rings; |
389 | } | 389 | } |
390 | ib_start_alignment = 64; | 390 | ib_start_alignment = 64; |
@@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
392 | break; | 392 | break; |
393 | case AMDGPU_HW_IP_VCN_DEC: | 393 | case AMDGPU_HW_IP_VCN_DEC: |
394 | type = AMD_IP_BLOCK_TYPE_VCN; | 394 | type = AMD_IP_BLOCK_TYPE_VCN; |
395 | if (adev->vcn.ring_dec.ready) | 395 | if (adev->vcn.ring_dec.sched.ready) |
396 | ++num_rings; | 396 | ++num_rings; |
397 | ib_start_alignment = 16; | 397 | ib_start_alignment = 16; |
398 | ib_size_alignment = 16; | 398 | ib_size_alignment = 16; |
@@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, | |||
400 | case AMDGPU_HW_IP_VCN_ENC: | 400 | case AMDGPU_HW_IP_VCN_ENC: |
401 | type = AMD_IP_BLOCK_TYPE_VCN; | 401 | type = AMD_IP_BLOCK_TYPE_VCN; |
402 | for (i = 0; i < adev->vcn.num_enc_rings; i++) | 402 | for (i = 0; i < adev->vcn.num_enc_rings; i++) |
403 | if (adev->vcn.ring_enc[i].ready) | 403 | if (adev->vcn.ring_enc[i].sched.ready) |
404 | ++num_rings; | 404 | ++num_rings; |
405 | ib_start_alignment = 64; | 405 | ib_start_alignment = 64; |
406 | ib_size_alignment = 1; | 406 | ib_size_alignment = 1; |
407 | break; | 407 | break; |
408 | case AMDGPU_HW_IP_VCN_JPEG: | 408 | case AMDGPU_HW_IP_VCN_JPEG: |
409 | type = AMD_IP_BLOCK_TYPE_VCN; | 409 | type = AMD_IP_BLOCK_TYPE_VCN; |
410 | if (adev->vcn.ring_jpeg.ready) | 410 | if (adev->vcn.ring_jpeg.sched.ready) |
411 | ++num_rings; | 411 | ++num_rings; |
412 | ib_start_alignment = 16; | 412 | ib_start_alignment = 16; |
413 | ib_size_alignment = 16; | 413 | ib_size_alignment = 16; |
@@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) | |||
978 | } | 978 | } |
979 | 979 | ||
980 | if (amdgpu_sriov_vf(adev)) { | 980 | if (amdgpu_sriov_vf(adev)) { |
981 | r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); | 981 | uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; |
982 | |||
983 | r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, | ||
984 | &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE); | ||
982 | if (r) | 985 | if (r) |
983 | goto error_vm; | 986 | goto error_vm; |
984 | } | 987 | } |
@@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, | |||
1048 | pasid = fpriv->vm.pasid; | 1051 | pasid = fpriv->vm.pasid; |
1049 | pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); | 1052 | pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); |
1050 | 1053 | ||
1051 | amdgpu_vm_fini(adev, &fpriv->vm); | ||
1052 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); | 1054 | amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); |
1055 | amdgpu_vm_fini(adev, &fpriv->vm); | ||
1053 | 1056 | ||
1054 | if (pasid) | 1057 | if (pasid) |
1055 | amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); | 1058 | amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index b9e9e8b02fb7..11723d8fffbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | |||
@@ -57,7 +57,6 @@ struct amdgpu_hpd; | |||
57 | #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) | 57 | #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) |
58 | #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) | 58 | #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) |
59 | #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) | 59 | #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) |
60 | #define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base) | ||
61 | 60 | ||
62 | #define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); | 61 | #define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); |
63 | 62 | ||
@@ -325,7 +324,7 @@ struct amdgpu_mode_info { | |||
325 | struct card_info *atom_card_info; | 324 | struct card_info *atom_card_info; |
326 | bool mode_config_initialized; | 325 | bool mode_config_initialized; |
327 | struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; | 326 | struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; |
328 | struct amdgpu_plane *planes[AMDGPU_MAX_PLANES]; | 327 | struct drm_plane *planes[AMDGPU_MAX_PLANES]; |
329 | struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; | 328 | struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; |
330 | /* DVI-I properties */ | 329 | /* DVI-I properties */ |
331 | struct drm_property *coherent_mode_property; | 330 | struct drm_property *coherent_mode_property; |
@@ -434,11 +433,6 @@ struct amdgpu_crtc { | |||
434 | struct drm_pending_vblank_event *event; | 433 | struct drm_pending_vblank_event *event; |
435 | }; | 434 | }; |
436 | 435 | ||
437 | struct amdgpu_plane { | ||
438 | struct drm_plane base; | ||
439 | enum drm_plane_type plane_type; | ||
440 | }; | ||
441 | |||
442 | struct amdgpu_encoder_atom_dig { | 436 | struct amdgpu_encoder_atom_dig { |
443 | bool linkb; | 437 | bool linkb; |
444 | /* atom dig */ | 438 | /* atom dig */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 59cc678de8c1..7235cd0b0fa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |||
@@ -2129,7 +2129,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | |||
2129 | 2129 | ||
2130 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { | 2130 | for (i = 0; i < AMDGPU_MAX_RINGS; i++) { |
2131 | struct amdgpu_ring *ring = adev->rings[i]; | 2131 | struct amdgpu_ring *ring = adev->rings[i]; |
2132 | if (ring && ring->ready) | 2132 | if (ring && ring->sched.ready) |
2133 | amdgpu_fence_wait_empty(ring); | 2133 | amdgpu_fence_wait_empty(ring); |
2134 | } | 2134 | } |
2135 | 2135 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 25d2f3e757f1..e05dc66b1090 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | |||
@@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle) | |||
90 | adev->psp.sos_fw = NULL; | 90 | adev->psp.sos_fw = NULL; |
91 | release_firmware(adev->psp.asd_fw); | 91 | release_firmware(adev->psp.asd_fw); |
92 | adev->psp.asd_fw = NULL; | 92 | adev->psp.asd_fw = NULL; |
93 | release_firmware(adev->psp.ta_fw); | ||
94 | adev->psp.ta_fw = NULL; | ||
93 | return 0; | 95 | return 0; |
94 | } | 96 | } |
95 | 97 | ||
@@ -118,21 +120,25 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, | |||
118 | static int | 120 | static int |
119 | psp_cmd_submit_buf(struct psp_context *psp, | 121 | psp_cmd_submit_buf(struct psp_context *psp, |
120 | struct amdgpu_firmware_info *ucode, | 122 | struct amdgpu_firmware_info *ucode, |
121 | struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, | 123 | struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr) |
122 | int index) | ||
123 | { | 124 | { |
124 | int ret; | 125 | int ret; |
126 | int index; | ||
125 | 127 | ||
126 | memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); | 128 | memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); |
127 | 129 | ||
128 | memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); | 130 | memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); |
129 | 131 | ||
132 | index = atomic_inc_return(&psp->fence_value); | ||
130 | ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, | 133 | ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, |
131 | fence_mc_addr, index); | 134 | fence_mc_addr, index); |
135 | if (ret) { | ||
136 | atomic_dec(&psp->fence_value); | ||
137 | return ret; | ||
138 | } | ||
132 | 139 | ||
133 | while (*((unsigned int *)psp->fence_buf) != index) { | 140 | while (*((unsigned int *)psp->fence_buf) != index) |
134 | msleep(1); | 141 | msleep(1); |
135 | } | ||
136 | 142 | ||
137 | /* the status field must be 0 after FW is loaded */ | 143 | /* the status field must be 0 after FW is loaded */ |
138 | if (ucode && psp->cmd_buf_mem->resp.status) { | 144 | if (ucode && psp->cmd_buf_mem->resp.status) { |
@@ -191,7 +197,7 @@ static int psp_tmr_load(struct psp_context *psp) | |||
191 | PSP_TMR_SIZE, psp->tmr_mc_addr); | 197 | PSP_TMR_SIZE, psp->tmr_mc_addr); |
192 | 198 | ||
193 | ret = psp_cmd_submit_buf(psp, NULL, cmd, | 199 | ret = psp_cmd_submit_buf(psp, NULL, cmd, |
194 | psp->fence_buf_mc_addr, 1); | 200 | psp->fence_buf_mc_addr); |
195 | if (ret) | 201 | if (ret) |
196 | goto failed; | 202 | goto failed; |
197 | 203 | ||
@@ -258,13 +264,194 @@ static int psp_asd_load(struct psp_context *psp) | |||
258 | psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); | 264 | psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); |
259 | 265 | ||
260 | ret = psp_cmd_submit_buf(psp, NULL, cmd, | 266 | ret = psp_cmd_submit_buf(psp, NULL, cmd, |
261 | psp->fence_buf_mc_addr, 2); | 267 | psp->fence_buf_mc_addr); |
268 | |||
269 | kfree(cmd); | ||
270 | |||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, | ||
275 | uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, | ||
276 | uint32_t xgmi_ta_size, uint32_t shared_size) | ||
277 | { | ||
278 | cmd->cmd_id = GFX_CMD_ID_LOAD_TA; | ||
279 | cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc); | ||
280 | cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc); | ||
281 | cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size; | ||
282 | |||
283 | cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared); | ||
284 | cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared); | ||
285 | cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; | ||
286 | } | ||
287 | |||
288 | static int psp_xgmi_init_shared_buf(struct psp_context *psp) | ||
289 | { | ||
290 | int ret; | ||
291 | |||
292 | /* | ||
293 | * Allocate 16k memory aligned to 4k from Frame Buffer (local | ||
294 | * physical) for xgmi ta <-> Driver | ||
295 | */ | ||
296 | ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE, | ||
297 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | ||
298 | &psp->xgmi_context.xgmi_shared_bo, | ||
299 | &psp->xgmi_context.xgmi_shared_mc_addr, | ||
300 | &psp->xgmi_context.xgmi_shared_buf); | ||
301 | |||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | static int psp_xgmi_load(struct psp_context *psp) | ||
306 | { | ||
307 | int ret; | ||
308 | struct psp_gfx_cmd_resp *cmd; | ||
309 | |||
310 | /* | ||
311 | * TODO: bypass the loading in sriov for now | ||
312 | */ | ||
313 | if (amdgpu_sriov_vf(psp->adev)) | ||
314 | return 0; | ||
315 | |||
316 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); | ||
317 | if (!cmd) | ||
318 | return -ENOMEM; | ||
319 | |||
320 | memset(psp->fw_pri_buf, 0, PSP_1_MEG); | ||
321 | memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); | ||
322 | |||
323 | psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, | ||
324 | psp->xgmi_context.xgmi_shared_mc_addr, | ||
325 | psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE); | ||
326 | |||
327 | ret = psp_cmd_submit_buf(psp, NULL, cmd, | ||
328 | psp->fence_buf_mc_addr); | ||
329 | |||
330 | if (!ret) { | ||
331 | psp->xgmi_context.initialized = 1; | ||
332 | psp->xgmi_context.session_id = cmd->resp.session_id; | ||
333 | } | ||
334 | |||
335 | kfree(cmd); | ||
336 | |||
337 | return ret; | ||
338 | } | ||
339 | |||
340 | static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, | ||
341 | uint32_t xgmi_session_id) | ||
342 | { | ||
343 | cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; | ||
344 | cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id; | ||
345 | } | ||
346 | |||
347 | static int psp_xgmi_unload(struct psp_context *psp) | ||
348 | { | ||
349 | int ret; | ||
350 | struct psp_gfx_cmd_resp *cmd; | ||
351 | |||
352 | /* | ||
353 | * TODO: bypass the unloading in sriov for now | ||
354 | */ | ||
355 | if (amdgpu_sriov_vf(psp->adev)) | ||
356 | return 0; | ||
357 | |||
358 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); | ||
359 | if (!cmd) | ||
360 | return -ENOMEM; | ||
361 | |||
362 | psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); | ||
363 | |||
364 | ret = psp_cmd_submit_buf(psp, NULL, cmd, | ||
365 | psp->fence_buf_mc_addr); | ||
262 | 366 | ||
263 | kfree(cmd); | 367 | kfree(cmd); |
264 | 368 | ||
265 | return ret; | 369 | return ret; |
266 | } | 370 | } |
267 | 371 | ||
372 | static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, | ||
373 | uint32_t ta_cmd_id, | ||
374 | uint32_t xgmi_session_id) | ||
375 | { | ||
376 | cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; | ||
377 | cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id; | ||
378 | cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; | ||
379 | /* Note: cmd_invoke_cmd.buf is not used for now */ | ||
380 | } | ||
381 | |||
382 | int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) | ||
383 | { | ||
384 | int ret; | ||
385 | struct psp_gfx_cmd_resp *cmd; | ||
386 | |||
387 | /* | ||
388 | * TODO: bypass the loading in sriov for now | ||
389 | */ | ||
390 | if (amdgpu_sriov_vf(psp->adev)) | ||
391 | return 0; | ||
392 | |||
393 | cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); | ||
394 | if (!cmd) | ||
395 | return -ENOMEM; | ||
396 | |||
397 | psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id, | ||
398 | psp->xgmi_context.session_id); | ||
399 | |||
400 | ret = psp_cmd_submit_buf(psp, NULL, cmd, | ||
401 | psp->fence_buf_mc_addr); | ||
402 | |||
403 | kfree(cmd); | ||
404 | |||
405 | return ret; | ||
406 | } | ||
407 | |||
408 | static int psp_xgmi_terminate(struct psp_context *psp) | ||
409 | { | ||
410 | int ret; | ||
411 | |||
412 | if (!psp->xgmi_context.initialized) | ||
413 | return 0; | ||
414 | |||
415 | ret = psp_xgmi_unload(psp); | ||
416 | if (ret) | ||
417 | return ret; | ||
418 | |||
419 | psp->xgmi_context.initialized = 0; | ||
420 | |||
421 | /* free xgmi shared memory */ | ||
422 | amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, | ||
423 | &psp->xgmi_context.xgmi_shared_mc_addr, | ||
424 | &psp->xgmi_context.xgmi_shared_buf); | ||
425 | |||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | static int psp_xgmi_initialize(struct psp_context *psp) | ||
430 | { | ||
431 | struct ta_xgmi_shared_memory *xgmi_cmd; | ||
432 | int ret; | ||
433 | |||
434 | if (!psp->xgmi_context.initialized) { | ||
435 | ret = psp_xgmi_init_shared_buf(psp); | ||
436 | if (ret) | ||
437 | return ret; | ||
438 | } | ||
439 | |||
440 | /* Load XGMI TA */ | ||
441 | ret = psp_xgmi_load(psp); | ||
442 | if (ret) | ||
443 | return ret; | ||
444 | |||
445 | /* Initialize XGMI session */ | ||
446 | xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); | ||
447 | memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); | ||
448 | xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; | ||
449 | |||
450 | ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); | ||
451 | |||
452 | return ret; | ||
453 | } | ||
454 | |||
268 | static int psp_hw_start(struct psp_context *psp) | 455 | static int psp_hw_start(struct psp_context *psp) |
269 | { | 456 | { |
270 | struct amdgpu_device *adev = psp->adev; | 457 | struct amdgpu_device *adev = psp->adev; |
@@ -292,6 +479,15 @@ static int psp_hw_start(struct psp_context *psp) | |||
292 | if (ret) | 479 | if (ret) |
293 | return ret; | 480 | return ret; |
294 | 481 | ||
482 | if (adev->gmc.xgmi.num_physical_nodes > 1) { | ||
483 | ret = psp_xgmi_initialize(psp); | ||
484 | /* Warning the XGMI seesion initialize failure | ||
485 | * Instead of stop driver initialization | ||
486 | */ | ||
487 | if (ret) | ||
488 | dev_err(psp->adev->dev, | ||
489 | "XGMI: Failed to initialize XGMI session\n"); | ||
490 | } | ||
295 | return 0; | 491 | return 0; |
296 | } | 492 | } |
297 | 493 | ||
@@ -321,7 +517,7 @@ static int psp_np_fw_load(struct psp_context *psp) | |||
321 | return ret; | 517 | return ret; |
322 | 518 | ||
323 | ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, | 519 | ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, |
324 | psp->fence_buf_mc_addr, i + 3); | 520 | psp->fence_buf_mc_addr); |
325 | if (ret) | 521 | if (ret) |
326 | return ret; | 522 | return ret; |
327 | 523 | ||
@@ -452,6 +648,10 @@ static int psp_hw_fini(void *handle) | |||
452 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) | 648 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
453 | return 0; | 649 | return 0; |
454 | 650 | ||
651 | if (adev->gmc.xgmi.num_physical_nodes > 1 && | ||
652 | psp->xgmi_context.initialized == 1) | ||
653 | psp_xgmi_terminate(psp); | ||
654 | |||
455 | psp_ring_destroy(psp, PSP_RING_TYPE__KM); | 655 | psp_ring_destroy(psp, PSP_RING_TYPE__KM); |
456 | 656 | ||
457 | amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); | 657 | amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); |
@@ -479,6 +679,15 @@ static int psp_suspend(void *handle) | |||
479 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) | 679 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
480 | return 0; | 680 | return 0; |
481 | 681 | ||
682 | if (adev->gmc.xgmi.num_physical_nodes > 1 && | ||
683 | psp->xgmi_context.initialized == 1) { | ||
684 | ret = psp_xgmi_terminate(psp); | ||
685 | if (ret) { | ||
686 | DRM_ERROR("Failed to terminate xgmi ta\n"); | ||
687 | return ret; | ||
688 | } | ||
689 | } | ||
690 | |||
482 | ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); | 691 | ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); |
483 | if (ret) { | 692 | if (ret) { |
484 | DRM_ERROR("PSP ring stop failed\n"); | 693 | DRM_ERROR("PSP ring stop failed\n"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 8b8720e9c3f0..9ec5d1a666a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | |||
@@ -27,14 +27,17 @@ | |||
27 | 27 | ||
28 | #include "amdgpu.h" | 28 | #include "amdgpu.h" |
29 | #include "psp_gfx_if.h" | 29 | #include "psp_gfx_if.h" |
30 | #include "ta_xgmi_if.h" | ||
30 | 31 | ||
31 | #define PSP_FENCE_BUFFER_SIZE 0x1000 | 32 | #define PSP_FENCE_BUFFER_SIZE 0x1000 |
32 | #define PSP_CMD_BUFFER_SIZE 0x1000 | 33 | #define PSP_CMD_BUFFER_SIZE 0x1000 |
33 | #define PSP_ASD_SHARED_MEM_SIZE 0x4000 | 34 | #define PSP_ASD_SHARED_MEM_SIZE 0x4000 |
35 | #define PSP_XGMI_SHARED_MEM_SIZE 0x4000 | ||
34 | #define PSP_1_MEG 0x100000 | 36 | #define PSP_1_MEG 0x100000 |
35 | #define PSP_TMR_SIZE 0x400000 | 37 | #define PSP_TMR_SIZE 0x400000 |
36 | 38 | ||
37 | struct psp_context; | 39 | struct psp_context; |
40 | struct psp_xgmi_node_info; | ||
38 | struct psp_xgmi_topology_info; | 41 | struct psp_xgmi_topology_info; |
39 | 42 | ||
40 | enum psp_ring_type | 43 | enum psp_ring_type |
@@ -80,12 +83,20 @@ struct psp_funcs | |||
80 | enum AMDGPU_UCODE_ID ucode_type); | 83 | enum AMDGPU_UCODE_ID ucode_type); |
81 | bool (*smu_reload_quirk)(struct psp_context *psp); | 84 | bool (*smu_reload_quirk)(struct psp_context *psp); |
82 | int (*mode1_reset)(struct psp_context *psp); | 85 | int (*mode1_reset)(struct psp_context *psp); |
83 | uint64_t (*xgmi_get_device_id)(struct psp_context *psp); | 86 | uint64_t (*xgmi_get_node_id)(struct psp_context *psp); |
84 | uint64_t (*xgmi_get_hive_id)(struct psp_context *psp); | 87 | uint64_t (*xgmi_get_hive_id)(struct psp_context *psp); |
85 | int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, | 88 | int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, |
86 | struct psp_xgmi_topology_info *topology); | 89 | struct psp_xgmi_topology_info *topology); |
87 | int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, | 90 | int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, |
88 | struct psp_xgmi_topology_info *topology); | 91 | struct psp_xgmi_topology_info *topology); |
92 | }; | ||
93 | |||
94 | struct psp_xgmi_context { | ||
95 | uint8_t initialized; | ||
96 | uint32_t session_id; | ||
97 | struct amdgpu_bo *xgmi_shared_bo; | ||
98 | uint64_t xgmi_shared_mc_addr; | ||
99 | void *xgmi_shared_buf; | ||
89 | }; | 100 | }; |
90 | 101 | ||
91 | struct psp_context | 102 | struct psp_context |
@@ -96,7 +107,7 @@ struct psp_context | |||
96 | 107 | ||
97 | const struct psp_funcs *funcs; | 108 | const struct psp_funcs *funcs; |
98 | 109 | ||
99 | /* fence buffer */ | 110 | /* firmware buffer */ |
100 | struct amdgpu_bo *fw_pri_bo; | 111 | struct amdgpu_bo *fw_pri_bo; |
101 | uint64_t fw_pri_mc_addr; | 112 | uint64_t fw_pri_mc_addr; |
102 | void *fw_pri_buf; | 113 | void *fw_pri_buf; |
@@ -134,6 +145,16 @@ struct psp_context | |||
134 | struct amdgpu_bo *cmd_buf_bo; | 145 | struct amdgpu_bo *cmd_buf_bo; |
135 | uint64_t cmd_buf_mc_addr; | 146 | uint64_t cmd_buf_mc_addr; |
136 | struct psp_gfx_cmd_resp *cmd_buf_mem; | 147 | struct psp_gfx_cmd_resp *cmd_buf_mem; |
148 | |||
149 | /* fence value associated with cmd buffer */ | ||
150 | atomic_t fence_value; | ||
151 | |||
152 | /* xgmi ta firmware and buffer */ | ||
153 | const struct firmware *ta_fw; | ||
154 | uint32_t ta_xgmi_ucode_version; | ||
155 | uint32_t ta_xgmi_ucode_size; | ||
156 | uint8_t *ta_xgmi_start_addr; | ||
157 | struct psp_xgmi_context xgmi_context; | ||
137 | }; | 158 | }; |
138 | 159 | ||
139 | struct amdgpu_psp_funcs { | 160 | struct amdgpu_psp_funcs { |
@@ -141,21 +162,17 @@ struct amdgpu_psp_funcs { | |||
141 | enum AMDGPU_UCODE_ID); | 162 | enum AMDGPU_UCODE_ID); |
142 | }; | 163 | }; |
143 | 164 | ||
165 | #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 | ||
166 | struct psp_xgmi_node_info { | ||
167 | uint64_t node_id; | ||
168 | uint8_t num_hops; | ||
169 | uint8_t is_sharing_enabled; | ||
170 | enum ta_xgmi_assigned_sdma_engine sdma_engine; | ||
171 | }; | ||
172 | |||
144 | struct psp_xgmi_topology_info { | 173 | struct psp_xgmi_topology_info { |
145 | /* Generated by PSP to identify the GPU instance within xgmi connection */ | 174 | uint32_t num_nodes; |
146 | uint64_t device_id; | 175 | struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; |
147 | /* | ||
148 | * If all bits set to 0 , driver indicates it wants to retrieve the xgmi | ||
149 | * connection vector topology, but not access enable the connections | ||
150 | * if some or all bits are set to 1, driver indicates it want to retrieve the | ||
151 | * current xgmi topology and access enable the link to GPU[i] associated | ||
152 | * with the bit position in the vector. | ||
153 | * On return,: bits indicated which xgmi links are present/active depending | ||
154 | * on the value passed in. The relative bit offset for the relative GPU index | ||
155 | * within the hive is always marked active. | ||
156 | */ | ||
157 | uint32_t connection_mask; | ||
158 | uint32_t reserved; /* must be 0 */ | ||
159 | }; | 176 | }; |
160 | 177 | ||
161 | #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) | 178 | #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) |
@@ -177,8 +194,8 @@ struct psp_xgmi_topology_info { | |||
177 | ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) | 194 | ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) |
178 | #define psp_mode1_reset(psp) \ | 195 | #define psp_mode1_reset(psp) \ |
179 | ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) | 196 | ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) |
180 | #define psp_xgmi_get_device_id(psp) \ | 197 | #define psp_xgmi_get_node_id(psp) \ |
181 | ((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0) | 198 | ((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0) |
182 | #define psp_xgmi_get_hive_id(psp) \ | 199 | #define psp_xgmi_get_hive_id(psp) \ |
183 | ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0) | 200 | ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0) |
184 | #define psp_xgmi_get_topology_info(psp, num_device, topology) \ | 201 | #define psp_xgmi_get_topology_info(psp, num_device, topology) \ |
@@ -199,6 +216,8 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, | |||
199 | extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; | 216 | extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; |
200 | 217 | ||
201 | int psp_gpu_reset(struct amdgpu_device *adev); | 218 | int psp_gpu_reset(struct amdgpu_device *adev); |
219 | int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); | ||
220 | |||
202 | extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; | 221 | extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; |
203 | 222 | ||
204 | #endif | 223 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index b70e85ec147d..5b75bdc8dc28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | |||
@@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, | |||
338 | */ | 338 | */ |
339 | void amdgpu_ring_fini(struct amdgpu_ring *ring) | 339 | void amdgpu_ring_fini(struct amdgpu_ring *ring) |
340 | { | 340 | { |
341 | ring->ready = false; | 341 | ring->sched.ready = false; |
342 | 342 | ||
343 | /* Not to finish a ring which is not initialized */ | 343 | /* Not to finish a ring which is not initialized */ |
344 | if (!(ring->adev) || !(ring->adev->rings[ring->idx])) | 344 | if (!(ring->adev) || !(ring->adev->rings[ring->idx])) |
@@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring) | |||
500 | debugfs_remove(ring->ent); | 500 | debugfs_remove(ring->ent); |
501 | #endif | 501 | #endif |
502 | } | 502 | } |
503 | |||
504 | /** | ||
505 | * amdgpu_ring_test_helper - tests ring and set sched readiness status | ||
506 | * | ||
507 | * @ring: ring to try the recovery on | ||
508 | * | ||
509 | * Tests ring and set sched readiness status | ||
510 | * | ||
511 | * Returns 0 on success, error on failure. | ||
512 | */ | ||
513 | int amdgpu_ring_test_helper(struct amdgpu_ring *ring) | ||
514 | { | ||
515 | struct amdgpu_device *adev = ring->adev; | ||
516 | int r; | ||
517 | |||
518 | r = amdgpu_ring_test_ring(ring); | ||
519 | if (r) | ||
520 | DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n", | ||
521 | ring->name, r); | ||
522 | else | ||
523 | DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n", | ||
524 | ring->name); | ||
525 | |||
526 | ring->sched.ready = !r; | ||
527 | return r; | ||
528 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4caa301ce454..0beb01fef83f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | |||
@@ -129,8 +129,9 @@ struct amdgpu_ring_funcs { | |||
129 | unsigned emit_ib_size; | 129 | unsigned emit_ib_size; |
130 | /* command emit functions */ | 130 | /* command emit functions */ |
131 | void (*emit_ib)(struct amdgpu_ring *ring, | 131 | void (*emit_ib)(struct amdgpu_ring *ring, |
132 | struct amdgpu_job *job, | ||
132 | struct amdgpu_ib *ib, | 133 | struct amdgpu_ib *ib, |
133 | unsigned vmid, bool ctx_switch); | 134 | bool ctx_switch); |
134 | void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, | 135 | void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, |
135 | uint64_t seq, unsigned flags); | 136 | uint64_t seq, unsigned flags); |
136 | void (*emit_pipeline_sync)(struct amdgpu_ring *ring); | 137 | void (*emit_pipeline_sync)(struct amdgpu_ring *ring); |
@@ -189,7 +190,6 @@ struct amdgpu_ring { | |||
189 | uint64_t gpu_addr; | 190 | uint64_t gpu_addr; |
190 | uint64_t ptr_mask; | 191 | uint64_t ptr_mask; |
191 | uint32_t buf_mask; | 192 | uint32_t buf_mask; |
192 | bool ready; | ||
193 | u32 idx; | 193 | u32 idx; |
194 | u32 me; | 194 | u32 me; |
195 | u32 pipe; | 195 | u32 pipe; |
@@ -229,7 +229,7 @@ struct amdgpu_ring { | |||
229 | #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) | 229 | #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) |
230 | #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) | 230 | #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) |
231 | #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) | 231 | #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) |
232 | #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) | 232 | #define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c))) |
233 | #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) | 233 | #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) |
234 | #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) | 234 | #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) |
235 | #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) | 235 | #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) |
@@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, | |||
313 | ring->count_dw -= count_dw; | 313 | ring->count_dw -= count_dw; |
314 | } | 314 | } |
315 | 315 | ||
316 | int amdgpu_ring_test_helper(struct amdgpu_ring *ring); | ||
317 | |||
316 | #endif | 318 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c new file mode 100644 index 000000000000..c8793e6cc3c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c | |||
@@ -0,0 +1,282 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Advanced Micro Devices, Inc. | ||
3 | * Copyright 2008 Red Hat Inc. | ||
4 | * Copyright 2009 Jerome Glisse. | ||
5 | * | ||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
7 | * copy of this software and associated documentation files (the "Software"), | ||
8 | * to deal in the Software without restriction, including without limitation | ||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
11 | * Software is furnished to do so, subject to the following conditions: | ||
12 | * | ||
13 | * The above copyright notice and this permission notice shall be included in | ||
14 | * all copies or substantial portions of the Software. | ||
15 | * | ||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
22 | * OTHER DEALINGS IN THE SOFTWARE. | ||
23 | * | ||
24 | */ | ||
25 | #include <linux/firmware.h> | ||
26 | #include "amdgpu.h" | ||
27 | #include "amdgpu_gfx.h" | ||
28 | #include "amdgpu_rlc.h" | ||
29 | |||
30 | /** | ||
31 | * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode | ||
32 | * | ||
33 | * @adev: amdgpu_device pointer | ||
34 | * | ||
35 | * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. | ||
36 | */ | ||
37 | void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) | ||
38 | { | ||
39 | if (adev->gfx.rlc.in_safe_mode) | ||
40 | return; | ||
41 | |||
42 | /* if RLC is not enabled, do nothing */ | ||
43 | if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) | ||
44 | return; | ||
45 | |||
46 | if (adev->cg_flags & | ||
47 | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | | ||
48 | AMD_CG_SUPPORT_GFX_3D_CGCG)) { | ||
49 | adev->gfx.rlc.funcs->set_safe_mode(adev); | ||
50 | adev->gfx.rlc.in_safe_mode = true; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode | ||
56 | * | ||
57 | * @adev: amdgpu_device pointer | ||
58 | * | ||
59 | * Set RLC exit safe mode if RLC is enabled and have entered into safe mode. | ||
60 | */ | ||
61 | void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) | ||
62 | { | ||
63 | if (!(adev->gfx.rlc.in_safe_mode)) | ||
64 | return; | ||
65 | |||
66 | /* if RLC is not enabled, do nothing */ | ||
67 | if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) | ||
68 | return; | ||
69 | |||
70 | if (adev->cg_flags & | ||
71 | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | | ||
72 | AMD_CG_SUPPORT_GFX_3D_CGCG)) { | ||
73 | adev->gfx.rlc.funcs->unset_safe_mode(adev); | ||
74 | adev->gfx.rlc.in_safe_mode = false; | ||
75 | } | ||
76 | } | ||
77 | |||
78 | /** | ||
79 | * amdgpu_gfx_rlc_init_sr - Init save restore block | ||
80 | * | ||
81 | * @adev: amdgpu_device pointer | ||
82 | * @dws: the size of save restore block | ||
83 | * | ||
84 | * Allocate and setup value to save restore block of rlc. | ||
85 | * Returns 0 on succeess or negative error code if allocate failed. | ||
86 | */ | ||
87 | int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) | ||
88 | { | ||
89 | const u32 *src_ptr; | ||
90 | volatile u32 *dst_ptr; | ||
91 | u32 i; | ||
92 | int r; | ||
93 | |||
94 | /* allocate save restore block */ | ||
95 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | ||
96 | AMDGPU_GEM_DOMAIN_VRAM, | ||
97 | &adev->gfx.rlc.save_restore_obj, | ||
98 | &adev->gfx.rlc.save_restore_gpu_addr, | ||
99 | (void **)&adev->gfx.rlc.sr_ptr); | ||
100 | if (r) { | ||
101 | dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); | ||
102 | amdgpu_gfx_rlc_fini(adev); | ||
103 | return r; | ||
104 | } | ||
105 | |||
106 | /* write the sr buffer */ | ||
107 | src_ptr = adev->gfx.rlc.reg_list; | ||
108 | dst_ptr = adev->gfx.rlc.sr_ptr; | ||
109 | for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) | ||
110 | dst_ptr[i] = cpu_to_le32(src_ptr[i]); | ||
111 | amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); | ||
112 | amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); | ||
113 | |||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * amdgpu_gfx_rlc_init_csb - Init clear state block | ||
119 | * | ||
120 | * @adev: amdgpu_device pointer | ||
121 | * | ||
122 | * Allocate and setup value to clear state block of rlc. | ||
123 | * Returns 0 on succeess or negative error code if allocate failed. | ||
124 | */ | ||
125 | int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) | ||
126 | { | ||
127 | volatile u32 *dst_ptr; | ||
128 | u32 dws; | ||
129 | int r; | ||
130 | |||
131 | /* allocate clear state block */ | ||
132 | adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); | ||
133 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | ||
134 | AMDGPU_GEM_DOMAIN_VRAM, | ||
135 | &adev->gfx.rlc.clear_state_obj, | ||
136 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
137 | (void **)&adev->gfx.rlc.cs_ptr); | ||
138 | if (r) { | ||
139 | dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r); | ||
140 | amdgpu_gfx_rlc_fini(adev); | ||
141 | return r; | ||
142 | } | ||
143 | |||
144 | /* set up the cs buffer */ | ||
145 | dst_ptr = adev->gfx.rlc.cs_ptr; | ||
146 | adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); | ||
147 | amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); | ||
148 | amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); | ||
149 | amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * amdgpu_gfx_rlc_init_cpt - Init cp table | ||
156 | * | ||
157 | * @adev: amdgpu_device pointer | ||
158 | * | ||
159 | * Allocate and setup value to cp table of rlc. | ||
160 | * Returns 0 on succeess or negative error code if allocate failed. | ||
161 | */ | ||
162 | int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev) | ||
163 | { | ||
164 | int r; | ||
165 | |||
166 | r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, | ||
167 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | ||
168 | &adev->gfx.rlc.cp_table_obj, | ||
169 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
170 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
171 | if (r) { | ||
172 | dev_err(adev->dev, "(%d) failed to create cp table bo\n", r); | ||
173 | amdgpu_gfx_rlc_fini(adev); | ||
174 | return r; | ||
175 | } | ||
176 | |||
177 | /* set up the cp table */ | ||
178 | amdgpu_gfx_rlc_setup_cp_table(adev); | ||
179 | amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); | ||
180 | amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table | ||
187 | * | ||
188 | * @adev: amdgpu_device pointer | ||
189 | * | ||
190 | * Write cp firmware data into cp table. | ||
191 | */ | ||
192 | void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) | ||
193 | { | ||
194 | const __le32 *fw_data; | ||
195 | volatile u32 *dst_ptr; | ||
196 | int me, i, max_me; | ||
197 | u32 bo_offset = 0; | ||
198 | u32 table_offset, table_size; | ||
199 | |||
200 | max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev); | ||
201 | |||
202 | /* write the cp table buffer */ | ||
203 | dst_ptr = adev->gfx.rlc.cp_table_ptr; | ||
204 | for (me = 0; me < max_me; me++) { | ||
205 | if (me == 0) { | ||
206 | const struct gfx_firmware_header_v1_0 *hdr = | ||
207 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; | ||
208 | fw_data = (const __le32 *) | ||
209 | (adev->gfx.ce_fw->data + | ||
210 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
211 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
212 | table_size = le32_to_cpu(hdr->jt_size); | ||
213 | } else if (me == 1) { | ||
214 | const struct gfx_firmware_header_v1_0 *hdr = | ||
215 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; | ||
216 | fw_data = (const __le32 *) | ||
217 | (adev->gfx.pfp_fw->data + | ||
218 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
219 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
220 | table_size = le32_to_cpu(hdr->jt_size); | ||
221 | } else if (me == 2) { | ||
222 | const struct gfx_firmware_header_v1_0 *hdr = | ||
223 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | ||
224 | fw_data = (const __le32 *) | ||
225 | (adev->gfx.me_fw->data + | ||
226 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
227 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
228 | table_size = le32_to_cpu(hdr->jt_size); | ||
229 | } else if (me == 3) { | ||
230 | const struct gfx_firmware_header_v1_0 *hdr = | ||
231 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; | ||
232 | fw_data = (const __le32 *) | ||
233 | (adev->gfx.mec_fw->data + | ||
234 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
235 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
236 | table_size = le32_to_cpu(hdr->jt_size); | ||
237 | } else if (me == 4) { | ||
238 | const struct gfx_firmware_header_v1_0 *hdr = | ||
239 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; | ||
240 | fw_data = (const __le32 *) | ||
241 | (adev->gfx.mec2_fw->data + | ||
242 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
243 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
244 | table_size = le32_to_cpu(hdr->jt_size); | ||
245 | } | ||
246 | |||
247 | for (i = 0; i < table_size; i ++) { | ||
248 | dst_ptr[bo_offset + i] = | ||
249 | cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); | ||
250 | } | ||
251 | |||
252 | bo_offset += table_size; | ||
253 | } | ||
254 | } | ||
255 | |||
256 | /** | ||
257 | * amdgpu_gfx_rlc_fini - Free BO which used for RLC | ||
258 | * | ||
259 | * @adev: amdgpu_device pointer | ||
260 | * | ||
261 | * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block | ||
262 | * and rlc_jump_table_block. | ||
263 | */ | ||
264 | void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) | ||
265 | { | ||
266 | /* save restore block */ | ||
267 | if (adev->gfx.rlc.save_restore_obj) { | ||
268 | amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, | ||
269 | &adev->gfx.rlc.save_restore_gpu_addr, | ||
270 | (void **)&adev->gfx.rlc.sr_ptr); | ||
271 | } | ||
272 | |||
273 | /* clear state block */ | ||
274 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, | ||
275 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
276 | (void **)&adev->gfx.rlc.cs_ptr); | ||
277 | |||
278 | /* jump table block */ | ||
279 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, | ||
280 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
281 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
282 | } | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h new file mode 100644 index 000000000000..49a8ab52113b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * Copyright 2014 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef __AMDGPU_RLC_H__ | ||
25 | #define __AMDGPU_RLC_H__ | ||
26 | |||
27 | #include "clearstate_defs.h" | ||
28 | |||
29 | struct amdgpu_rlc_funcs { | ||
30 | bool (*is_rlc_enabled)(struct amdgpu_device *adev); | ||
31 | void (*set_safe_mode)(struct amdgpu_device *adev); | ||
32 | void (*unset_safe_mode)(struct amdgpu_device *adev); | ||
33 | int (*init)(struct amdgpu_device *adev); | ||
34 | u32 (*get_csb_size)(struct amdgpu_device *adev); | ||
35 | void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); | ||
36 | int (*get_cp_table_num)(struct amdgpu_device *adev); | ||
37 | int (*resume)(struct amdgpu_device *adev); | ||
38 | void (*stop)(struct amdgpu_device *adev); | ||
39 | void (*reset)(struct amdgpu_device *adev); | ||
40 | void (*start)(struct amdgpu_device *adev); | ||
41 | }; | ||
42 | |||
43 | struct amdgpu_rlc { | ||
44 | /* for power gating */ | ||
45 | struct amdgpu_bo *save_restore_obj; | ||
46 | uint64_t save_restore_gpu_addr; | ||
47 | volatile uint32_t *sr_ptr; | ||
48 | const u32 *reg_list; | ||
49 | u32 reg_list_size; | ||
50 | /* for clear state */ | ||
51 | struct amdgpu_bo *clear_state_obj; | ||
52 | uint64_t clear_state_gpu_addr; | ||
53 | volatile uint32_t *cs_ptr; | ||
54 | const struct cs_section_def *cs_data; | ||
55 | u32 clear_state_size; | ||
56 | /* for cp tables */ | ||
57 | struct amdgpu_bo *cp_table_obj; | ||
58 | uint64_t cp_table_gpu_addr; | ||
59 | volatile uint32_t *cp_table_ptr; | ||
60 | u32 cp_table_size; | ||
61 | |||
62 | /* safe mode for updating CG/PG state */ | ||
63 | bool in_safe_mode; | ||
64 | const struct amdgpu_rlc_funcs *funcs; | ||
65 | |||
66 | /* for firmware data */ | ||
67 | u32 save_and_restore_offset; | ||
68 | u32 clear_state_descriptor_offset; | ||
69 | u32 avail_scratch_ram_locations; | ||
70 | u32 reg_restore_list_size; | ||
71 | u32 reg_list_format_start; | ||
72 | u32 reg_list_format_separate_start; | ||
73 | u32 starting_offsets_start; | ||
74 | u32 reg_list_format_size_bytes; | ||
75 | u32 reg_list_size_bytes; | ||
76 | u32 reg_list_format_direct_reg_list_length; | ||
77 | u32 save_restore_list_cntl_size_bytes; | ||
78 | u32 save_restore_list_gpm_size_bytes; | ||
79 | u32 save_restore_list_srm_size_bytes; | ||
80 | |||
81 | u32 *register_list_format; | ||
82 | u32 *register_restore; | ||
83 | u8 *save_restore_list_cntl; | ||
84 | u8 *save_restore_list_gpm; | ||
85 | u8 *save_restore_list_srm; | ||
86 | |||
87 | bool is_rlc_v2_1; | ||
88 | }; | ||
89 | |||
90 | void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); | ||
91 | void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); | ||
92 | int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); | ||
93 | int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); | ||
94 | int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); | ||
95 | void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); | ||
96 | void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); | ||
97 | |||
98 | #endif | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index bc9244b429ef..115bb0c99b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | |||
@@ -28,17 +28,31 @@ | |||
28 | * GPU SDMA IP block helpers function. | 28 | * GPU SDMA IP block helpers function. |
29 | */ | 29 | */ |
30 | 30 | ||
31 | struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | 31 | struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring) |
32 | { | 32 | { |
33 | struct amdgpu_device *adev = ring->adev; | 33 | struct amdgpu_device *adev = ring->adev; |
34 | int i; | 34 | int i; |
35 | 35 | ||
36 | for (i = 0; i < adev->sdma.num_instances; i++) | 36 | for (i = 0; i < adev->sdma.num_instances; i++) |
37 | if (&adev->sdma.instance[i].ring == ring) | 37 | if (ring == &adev->sdma.instance[i].ring || |
38 | break; | 38 | ring == &adev->sdma.instance[i].page) |
39 | return &adev->sdma.instance[i]; | ||
39 | 40 | ||
40 | if (i < AMDGPU_MAX_SDMA_INSTANCES) | 41 | return NULL; |
41 | return &adev->sdma.instance[i]; | 42 | } |
42 | else | 43 | |
43 | return NULL; | 44 | int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) |
45 | { | ||
46 | struct amdgpu_device *adev = ring->adev; | ||
47 | int i; | ||
48 | |||
49 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
50 | if (ring == &adev->sdma.instance[i].ring || | ||
51 | ring == &adev->sdma.instance[i].page) { | ||
52 | *index = i; | ||
53 | return 0; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | return -EINVAL; | ||
44 | } | 58 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 500113ec65ca..16b1a6ae5ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | |||
@@ -41,6 +41,7 @@ struct amdgpu_sdma_instance { | |||
41 | uint32_t feature_version; | 41 | uint32_t feature_version; |
42 | 42 | ||
43 | struct amdgpu_ring ring; | 43 | struct amdgpu_ring ring; |
44 | struct amdgpu_ring page; | ||
44 | bool burst_nop; | 45 | bool burst_nop; |
45 | }; | 46 | }; |
46 | 47 | ||
@@ -50,6 +51,7 @@ struct amdgpu_sdma { | |||
50 | struct amdgpu_irq_src illegal_inst_irq; | 51 | struct amdgpu_irq_src illegal_inst_irq; |
51 | int num_instances; | 52 | int num_instances; |
52 | uint32_t srbm_soft_reset; | 53 | uint32_t srbm_soft_reset; |
54 | bool has_page_queue; | ||
53 | }; | 55 | }; |
54 | 56 | ||
55 | /* | 57 | /* |
@@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs { | |||
92 | #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) | 94 | #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) |
93 | 95 | ||
94 | struct amdgpu_sdma_instance * | 96 | struct amdgpu_sdma_instance * |
95 | amdgpu_get_sdma_instance(struct amdgpu_ring *ring); | 97 | amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); |
98 | int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); | ||
96 | 99 | ||
97 | #endif | 100 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index e9bf70e2ac51..626abca770a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | |||
@@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, | |||
218 | TP_ARGS(vm, ring, job), | 218 | TP_ARGS(vm, ring, job), |
219 | TP_STRUCT__entry( | 219 | TP_STRUCT__entry( |
220 | __field(u32, pasid) | 220 | __field(u32, pasid) |
221 | __string(ring, ring->name) | ||
221 | __field(u32, ring) | 222 | __field(u32, ring) |
222 | __field(u32, vmid) | 223 | __field(u32, vmid) |
223 | __field(u32, vm_hub) | 224 | __field(u32, vm_hub) |
@@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id, | |||
227 | 228 | ||
228 | TP_fast_assign( | 229 | TP_fast_assign( |
229 | __entry->pasid = vm->pasid; | 230 | __entry->pasid = vm->pasid; |
230 | __entry->ring = ring->idx; | 231 | __assign_str(ring, ring->name) |
231 | __entry->vmid = job->vmid; | 232 | __entry->vmid = job->vmid; |
232 | __entry->vm_hub = ring->funcs->vmhub, | 233 | __entry->vm_hub = ring->funcs->vmhub, |
233 | __entry->pd_addr = job->vm_pd_addr; | 234 | __entry->pd_addr = job->vm_pd_addr; |
234 | __entry->needs_flush = job->vm_needs_flush; | 235 | __entry->needs_flush = job->vm_needs_flush; |
235 | ), | 236 | ), |
236 | TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", | 237 | TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", |
237 | __entry->pasid, __entry->ring, __entry->vmid, | 238 | __entry->pasid, __get_str(ring), __entry->vmid, |
238 | __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) | 239 | __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) |
239 | ); | 240 | ); |
240 | 241 | ||
@@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush, | |||
366 | uint64_t pd_addr), | 367 | uint64_t pd_addr), |
367 | TP_ARGS(ring, vmid, pd_addr), | 368 | TP_ARGS(ring, vmid, pd_addr), |
368 | TP_STRUCT__entry( | 369 | TP_STRUCT__entry( |
369 | __field(u32, ring) | 370 | __string(ring, ring->name) |
370 | __field(u32, vmid) | 371 | __field(u32, vmid) |
371 | __field(u32, vm_hub) | 372 | __field(u32, vm_hub) |
372 | __field(u64, pd_addr) | 373 | __field(u64, pd_addr) |
373 | ), | 374 | ), |
374 | 375 | ||
375 | TP_fast_assign( | 376 | TP_fast_assign( |
376 | __entry->ring = ring->idx; | 377 | __assign_str(ring, ring->name) |
377 | __entry->vmid = vmid; | 378 | __entry->vmid = vmid; |
378 | __entry->vm_hub = ring->funcs->vmhub; | 379 | __entry->vm_hub = ring->funcs->vmhub; |
379 | __entry->pd_addr = pd_addr; | 380 | __entry->pd_addr = pd_addr; |
380 | ), | 381 | ), |
381 | TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", | 382 | TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", |
382 | __entry->ring, __entry->vmid, | 383 | __get_str(ring), __entry->vmid, |
383 | __entry->vm_hub,__entry->pd_addr) | 384 | __entry->vm_hub,__entry->pd_addr) |
384 | ); | 385 | ); |
385 | 386 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a44fc12ae1f9..c91ec3101d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |||
@@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, | |||
61 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); | 61 | static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); |
62 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); | 62 | static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); |
63 | 63 | ||
64 | /* | ||
65 | * Global memory. | ||
66 | */ | ||
67 | |||
68 | /** | ||
69 | * amdgpu_ttm_mem_global_init - Initialize and acquire reference to | ||
70 | * memory object | ||
71 | * | ||
72 | * @ref: Object for initialization. | ||
73 | * | ||
74 | * This is called by drm_global_item_ref() when an object is being | ||
75 | * initialized. | ||
76 | */ | ||
77 | static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) | ||
78 | { | ||
79 | return ttm_mem_global_init(ref->object); | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * amdgpu_ttm_mem_global_release - Drop reference to a memory object | ||
84 | * | ||
85 | * @ref: Object being removed | ||
86 | * | ||
87 | * This is called by drm_global_item_unref() when an object is being | ||
88 | * released. | ||
89 | */ | ||
90 | static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) | ||
91 | { | ||
92 | ttm_mem_global_release(ref->object); | ||
93 | } | ||
94 | |||
95 | /** | ||
96 | * amdgpu_ttm_global_init - Initialize global TTM memory reference structures. | ||
97 | * | ||
98 | * @adev: AMDGPU device for which the global structures need to be registered. | ||
99 | * | ||
100 | * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() | ||
101 | * during bring up. | ||
102 | */ | ||
103 | static int amdgpu_ttm_global_init(struct amdgpu_device *adev) | ||
104 | { | ||
105 | struct drm_global_reference *global_ref; | ||
106 | int r; | ||
107 | |||
108 | /* ensure reference is false in case init fails */ | ||
109 | adev->mman.mem_global_referenced = false; | ||
110 | |||
111 | global_ref = &adev->mman.mem_global_ref; | ||
112 | global_ref->global_type = DRM_GLOBAL_TTM_MEM; | ||
113 | global_ref->size = sizeof(struct ttm_mem_global); | ||
114 | global_ref->init = &amdgpu_ttm_mem_global_init; | ||
115 | global_ref->release = &amdgpu_ttm_mem_global_release; | ||
116 | r = drm_global_item_ref(global_ref); | ||
117 | if (r) { | ||
118 | DRM_ERROR("Failed setting up TTM memory accounting " | ||
119 | "subsystem.\n"); | ||
120 | goto error_mem; | ||
121 | } | ||
122 | |||
123 | adev->mman.bo_global_ref.mem_glob = | ||
124 | adev->mman.mem_global_ref.object; | ||
125 | global_ref = &adev->mman.bo_global_ref.ref; | ||
126 | global_ref->global_type = DRM_GLOBAL_TTM_BO; | ||
127 | global_ref->size = sizeof(struct ttm_bo_global); | ||
128 | global_ref->init = &ttm_bo_global_init; | ||
129 | global_ref->release = &ttm_bo_global_release; | ||
130 | r = drm_global_item_ref(global_ref); | ||
131 | if (r) { | ||
132 | DRM_ERROR("Failed setting up TTM BO subsystem.\n"); | ||
133 | goto error_bo; | ||
134 | } | ||
135 | |||
136 | mutex_init(&adev->mman.gtt_window_lock); | ||
137 | |||
138 | adev->mman.mem_global_referenced = true; | ||
139 | |||
140 | return 0; | ||
141 | |||
142 | error_bo: | ||
143 | drm_global_item_unref(&adev->mman.mem_global_ref); | ||
144 | error_mem: | ||
145 | return r; | ||
146 | } | ||
147 | |||
148 | static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) | ||
149 | { | ||
150 | if (adev->mman.mem_global_referenced) { | ||
151 | mutex_destroy(&adev->mman.gtt_window_lock); | ||
152 | drm_global_item_unref(&adev->mman.bo_global_ref.ref); | ||
153 | drm_global_item_unref(&adev->mman.mem_global_ref); | ||
154 | adev->mman.mem_global_referenced = false; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) | 64 | static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) |
159 | { | 65 | { |
160 | return 0; | 66 | return 0; |
@@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) | |||
1758 | int r; | 1664 | int r; |
1759 | u64 vis_vram_limit; | 1665 | u64 vis_vram_limit; |
1760 | 1666 | ||
1761 | /* initialize global references for vram/gtt */ | 1667 | mutex_init(&adev->mman.gtt_window_lock); |
1762 | r = amdgpu_ttm_global_init(adev); | 1668 | |
1763 | if (r) { | ||
1764 | return r; | ||
1765 | } | ||
1766 | /* No others user of address space so set it to 0 */ | 1669 | /* No others user of address space so set it to 0 */ |
1767 | r = ttm_bo_device_init(&adev->mman.bdev, | 1670 | r = ttm_bo_device_init(&adev->mman.bdev, |
1768 | adev->mman.bo_global_ref.ref.object, | ||
1769 | &amdgpu_bo_driver, | 1671 | &amdgpu_bo_driver, |
1770 | adev->ddev->anon_inode->i_mapping, | 1672 | adev->ddev->anon_inode->i_mapping, |
1771 | DRM_FILE_PAGE_OFFSET, | 1673 | DRM_FILE_PAGE_OFFSET, |
@@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) | |||
1922 | ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); | 1824 | ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); |
1923 | ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); | 1825 | ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); |
1924 | ttm_bo_device_release(&adev->mman.bdev); | 1826 | ttm_bo_device_release(&adev->mman.bdev); |
1925 | amdgpu_ttm_global_fini(adev); | ||
1926 | adev->mman.initialized = false; | 1827 | adev->mman.initialized = false; |
1927 | DRM_INFO("amdgpu: ttm finalized\n"); | 1828 | DRM_INFO("amdgpu: ttm finalized\n"); |
1928 | } | 1829 | } |
@@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, | |||
2069 | unsigned i; | 1970 | unsigned i; |
2070 | int r; | 1971 | int r; |
2071 | 1972 | ||
2072 | if (direct_submit && !ring->ready) { | 1973 | if (direct_submit && !ring->sched.ready) { |
2073 | DRM_ERROR("Trying to move memory with ring turned off.\n"); | 1974 | DRM_ERROR("Trying to move memory with ring turned off.\n"); |
2074 | return -EINVAL; | 1975 | return -EINVAL; |
2075 | } | 1976 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index fe8f276e9811..b5b2d101f7db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | |||
@@ -39,8 +39,6 @@ | |||
39 | #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 | 39 | #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 |
40 | 40 | ||
41 | struct amdgpu_mman { | 41 | struct amdgpu_mman { |
42 | struct ttm_bo_global_ref bo_global_ref; | ||
43 | struct drm_global_reference mem_global_ref; | ||
44 | struct ttm_bo_device bdev; | 42 | struct ttm_bo_device bdev; |
45 | bool mem_global_referenced; | 43 | bool mem_global_referenced; |
46 | bool initialized; | 44 | bool initialized; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index aa6641b944a0..7ac25a1c7853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | |||
@@ -58,6 +58,17 @@ struct psp_firmware_header_v1_0 { | |||
58 | }; | 58 | }; |
59 | 59 | ||
60 | /* version_major=1, version_minor=0 */ | 60 | /* version_major=1, version_minor=0 */ |
61 | struct ta_firmware_header_v1_0 { | ||
62 | struct common_firmware_header header; | ||
63 | uint32_t ta_xgmi_ucode_version; | ||
64 | uint32_t ta_xgmi_offset_bytes; | ||
65 | uint32_t ta_xgmi_size_bytes; | ||
66 | uint32_t ta_ras_ucode_version; | ||
67 | uint32_t ta_ras_offset_bytes; | ||
68 | uint32_t ta_ras_size_bytes; | ||
69 | }; | ||
70 | |||
71 | /* version_major=1, version_minor=0 */ | ||
61 | struct gfx_firmware_header_v1_0 { | 72 | struct gfx_firmware_header_v1_0 { |
62 | struct common_firmware_header header; | 73 | struct common_firmware_header header; |
63 | uint32_t ucode_feature_version; | 74 | uint32_t ucode_feature_version; |
@@ -170,6 +181,7 @@ union amdgpu_firmware_header { | |||
170 | struct mc_firmware_header_v1_0 mc; | 181 | struct mc_firmware_header_v1_0 mc; |
171 | struct smc_firmware_header_v1_0 smc; | 182 | struct smc_firmware_header_v1_0 smc; |
172 | struct psp_firmware_header_v1_0 psp; | 183 | struct psp_firmware_header_v1_0 psp; |
184 | struct ta_firmware_header_v1_0 ta; | ||
173 | struct gfx_firmware_header_v1_0 gfx; | 185 | struct gfx_firmware_header_v1_0 gfx; |
174 | struct rlc_firmware_header_v1_0 rlc; | 186 | struct rlc_firmware_header_v1_0 rlc; |
175 | struct rlc_firmware_header_v2_0 rlc_v2_0; | 187 | struct rlc_firmware_header_v2_0 rlc_v2_0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e5a6db6beab7..69896f451e8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | |||
@@ -1243,30 +1243,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1243 | { | 1243 | { |
1244 | struct dma_fence *fence; | 1244 | struct dma_fence *fence; |
1245 | long r; | 1245 | long r; |
1246 | uint32_t ip_instance = ring->me; | ||
1247 | 1246 | ||
1248 | r = amdgpu_uvd_get_create_msg(ring, 1, NULL); | 1247 | r = amdgpu_uvd_get_create_msg(ring, 1, NULL); |
1249 | if (r) { | 1248 | if (r) |
1250 | DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); | ||
1251 | goto error; | 1249 | goto error; |
1252 | } | ||
1253 | 1250 | ||
1254 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); | 1251 | r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); |
1255 | if (r) { | 1252 | if (r) |
1256 | DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); | ||
1257 | goto error; | 1253 | goto error; |
1258 | } | ||
1259 | 1254 | ||
1260 | r = dma_fence_wait_timeout(fence, false, timeout); | 1255 | r = dma_fence_wait_timeout(fence, false, timeout); |
1261 | if (r == 0) { | 1256 | if (r == 0) |
1262 | DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); | ||
1263 | r = -ETIMEDOUT; | 1257 | r = -ETIMEDOUT; |
1264 | } else if (r < 0) { | 1258 | else if (r > 0) |
1265 | DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); | ||
1266 | } else { | ||
1267 | DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); | ||
1268 | r = 0; | 1259 | r = 0; |
1269 | } | ||
1270 | 1260 | ||
1271 | dma_fence_put(fence); | 1261 | dma_fence_put(fence); |
1272 | 1262 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 5f3f54073818..98a1b2ce2b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | |||
@@ -1032,8 +1032,10 @@ out: | |||
1032 | * @ib: the IB to execute | 1032 | * @ib: the IB to execute |
1033 | * | 1033 | * |
1034 | */ | 1034 | */ |
1035 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, | 1035 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, |
1036 | unsigned vmid, bool ctx_switch) | 1036 | struct amdgpu_job *job, |
1037 | struct amdgpu_ib *ib, | ||
1038 | bool ctx_switch) | ||
1037 | { | 1039 | { |
1038 | amdgpu_ring_write(ring, VCE_CMD_IB); | 1040 | amdgpu_ring_write(ring, VCE_CMD_IB); |
1039 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 1041 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
@@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |||
1079 | return 0; | 1081 | return 0; |
1080 | 1082 | ||
1081 | r = amdgpu_ring_alloc(ring, 16); | 1083 | r = amdgpu_ring_alloc(ring, 16); |
1082 | if (r) { | 1084 | if (r) |
1083 | DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", | ||
1084 | ring->idx, r); | ||
1085 | return r; | 1085 | return r; |
1086 | } | 1086 | |
1087 | amdgpu_ring_write(ring, VCE_CMD_END); | 1087 | amdgpu_ring_write(ring, VCE_CMD_END); |
1088 | amdgpu_ring_commit(ring); | 1088 | amdgpu_ring_commit(ring); |
1089 | 1089 | ||
@@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |||
1093 | DRM_UDELAY(1); | 1093 | DRM_UDELAY(1); |
1094 | } | 1094 | } |
1095 | 1095 | ||
1096 | if (i < timeout) { | 1096 | if (i >= timeout) |
1097 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | ||
1098 | ring->idx, i); | ||
1099 | } else { | ||
1100 | DRM_ERROR("amdgpu: ring %d test failed\n", | ||
1101 | ring->idx); | ||
1102 | r = -ETIMEDOUT; | 1097 | r = -ETIMEDOUT; |
1103 | } | ||
1104 | 1098 | ||
1105 | return r; | 1099 | return r; |
1106 | } | 1100 | } |
@@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1121 | return 0; | 1115 | return 0; |
1122 | 1116 | ||
1123 | r = amdgpu_vce_get_create_msg(ring, 1, NULL); | 1117 | r = amdgpu_vce_get_create_msg(ring, 1, NULL); |
1124 | if (r) { | 1118 | if (r) |
1125 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
1126 | goto error; | 1119 | goto error; |
1127 | } | ||
1128 | 1120 | ||
1129 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); | 1121 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); |
1130 | if (r) { | 1122 | if (r) |
1131 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
1132 | goto error; | 1123 | goto error; |
1133 | } | ||
1134 | 1124 | ||
1135 | r = dma_fence_wait_timeout(fence, false, timeout); | 1125 | r = dma_fence_wait_timeout(fence, false, timeout); |
1136 | if (r == 0) { | 1126 | if (r == 0) |
1137 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
1138 | r = -ETIMEDOUT; | 1127 | r = -ETIMEDOUT; |
1139 | } else if (r < 0) { | 1128 | else if (r > 0) |
1140 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
1141 | } else { | ||
1142 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
1143 | r = 0; | 1129 | r = 0; |
1144 | } | 1130 | |
1145 | error: | 1131 | error: |
1146 | dma_fence_put(fence); | 1132 | dma_fence_put(fence); |
1147 | return r; | 1133 | return r; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index a1f209eed4c4..50293652af14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | |||
@@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |||
65 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); | 65 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); |
66 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); | 66 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); |
67 | int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); | 67 | int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); |
68 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, | 68 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, |
69 | unsigned vmid, bool ctx_switch); | 69 | struct amdgpu_ib *ib, bool ctx_switch); |
70 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | 70 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, |
71 | unsigned flags); | 71 | unsigned flags); |
72 | int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); | 72 | int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 27da13df2f11..e2e42e3fbcf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | |||
@@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) | |||
425 | 425 | ||
426 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); | 426 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); |
427 | r = amdgpu_ring_alloc(ring, 3); | 427 | r = amdgpu_ring_alloc(ring, 3); |
428 | if (r) { | 428 | if (r) |
429 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
430 | ring->idx, r); | ||
431 | return r; | 429 | return r; |
432 | } | 430 | |
433 | amdgpu_ring_write(ring, | 431 | amdgpu_ring_write(ring, |
434 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); | 432 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); |
435 | amdgpu_ring_write(ring, 0xDEADBEEF); | 433 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) | |||
441 | DRM_UDELAY(1); | 439 | DRM_UDELAY(1); |
442 | } | 440 | } |
443 | 441 | ||
444 | if (i < adev->usec_timeout) { | 442 | if (i >= adev->usec_timeout) |
445 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 443 | r = -ETIMEDOUT; |
446 | ring->idx, i); | 444 | |
447 | } else { | ||
448 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
449 | ring->idx, tmp); | ||
450 | r = -EINVAL; | ||
451 | } | ||
452 | return r; | 445 | return r; |
453 | } | 446 | } |
454 | 447 | ||
@@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
570 | long r; | 563 | long r; |
571 | 564 | ||
572 | r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); | 565 | r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); |
573 | if (r) { | 566 | if (r) |
574 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
575 | goto error; | 567 | goto error; |
576 | } | ||
577 | 568 | ||
578 | r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); | 569 | r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); |
579 | if (r) { | 570 | if (r) |
580 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
581 | goto error; | 571 | goto error; |
582 | } | ||
583 | 572 | ||
584 | r = dma_fence_wait_timeout(fence, false, timeout); | 573 | r = dma_fence_wait_timeout(fence, false, timeout); |
585 | if (r == 0) { | 574 | if (r == 0) |
586 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
587 | r = -ETIMEDOUT; | 575 | r = -ETIMEDOUT; |
588 | } else if (r < 0) { | 576 | else if (r > 0) |
589 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
590 | } else { | ||
591 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
592 | r = 0; | 577 | r = 0; |
593 | } | ||
594 | 578 | ||
595 | dma_fence_put(fence); | 579 | dma_fence_put(fence); |
596 | |||
597 | error: | 580 | error: |
598 | return r; | 581 | return r; |
599 | } | 582 | } |
@@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
606 | int r; | 589 | int r; |
607 | 590 | ||
608 | r = amdgpu_ring_alloc(ring, 16); | 591 | r = amdgpu_ring_alloc(ring, 16); |
609 | if (r) { | 592 | if (r) |
610 | DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", | ||
611 | ring->idx, r); | ||
612 | return r; | 593 | return r; |
613 | } | 594 | |
614 | amdgpu_ring_write(ring, VCN_ENC_CMD_END); | 595 | amdgpu_ring_write(ring, VCN_ENC_CMD_END); |
615 | amdgpu_ring_commit(ring); | 596 | amdgpu_ring_commit(ring); |
616 | 597 | ||
@@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
620 | DRM_UDELAY(1); | 601 | DRM_UDELAY(1); |
621 | } | 602 | } |
622 | 603 | ||
623 | if (i < adev->usec_timeout) { | 604 | if (i >= adev->usec_timeout) |
624 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | ||
625 | ring->idx, i); | ||
626 | } else { | ||
627 | DRM_ERROR("amdgpu: ring %d test failed\n", | ||
628 | ring->idx); | ||
629 | r = -ETIMEDOUT; | 605 | r = -ETIMEDOUT; |
630 | } | ||
631 | 606 | ||
632 | return r; | 607 | return r; |
633 | } | 608 | } |
@@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
742 | long r; | 717 | long r; |
743 | 718 | ||
744 | r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); | 719 | r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); |
745 | if (r) { | 720 | if (r) |
746 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
747 | goto error; | 721 | goto error; |
748 | } | ||
749 | 722 | ||
750 | r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); | 723 | r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); |
751 | if (r) { | 724 | if (r) |
752 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
753 | goto error; | 725 | goto error; |
754 | } | ||
755 | 726 | ||
756 | r = dma_fence_wait_timeout(fence, false, timeout); | 727 | r = dma_fence_wait_timeout(fence, false, timeout); |
757 | if (r == 0) { | 728 | if (r == 0) |
758 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
759 | r = -ETIMEDOUT; | 729 | r = -ETIMEDOUT; |
760 | } else if (r < 0) { | 730 | else if (r > 0) |
761 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
762 | } else { | ||
763 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
764 | r = 0; | 731 | r = 0; |
765 | } | 732 | |
766 | error: | 733 | error: |
767 | dma_fence_put(fence); | 734 | dma_fence_put(fence); |
768 | return r; | 735 | return r; |
@@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) | |||
778 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); | 745 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); |
779 | r = amdgpu_ring_alloc(ring, 3); | 746 | r = amdgpu_ring_alloc(ring, 3); |
780 | 747 | ||
781 | if (r) { | 748 | if (r) |
782 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
783 | ring->idx, r); | ||
784 | return r; | 749 | return r; |
785 | } | ||
786 | 750 | ||
787 | amdgpu_ring_write(ring, | 751 | amdgpu_ring_write(ring, |
788 | PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); | 752 | PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); |
@@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) | |||
796 | DRM_UDELAY(1); | 760 | DRM_UDELAY(1); |
797 | } | 761 | } |
798 | 762 | ||
799 | if (i < adev->usec_timeout) { | 763 | if (i >= adev->usec_timeout) |
800 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 764 | r = -ETIMEDOUT; |
801 | ring->idx, i); | ||
802 | } else { | ||
803 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
804 | ring->idx, tmp); | ||
805 | r = -EINVAL; | ||
806 | } | ||
807 | 765 | ||
808 | return r; | 766 | return r; |
809 | } | 767 | } |
@@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
856 | long r = 0; | 814 | long r = 0; |
857 | 815 | ||
858 | r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); | 816 | r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); |
859 | if (r) { | 817 | if (r) |
860 | DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); | ||
861 | goto error; | 818 | goto error; |
862 | } | ||
863 | 819 | ||
864 | r = dma_fence_wait_timeout(fence, false, timeout); | 820 | r = dma_fence_wait_timeout(fence, false, timeout); |
865 | if (r == 0) { | 821 | if (r == 0) { |
866 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
867 | r = -ETIMEDOUT; | 822 | r = -ETIMEDOUT; |
868 | goto error; | 823 | goto error; |
869 | } else if (r < 0) { | 824 | } else if (r < 0) { |
870 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
871 | goto error; | 825 | goto error; |
872 | } else | 826 | } else { |
873 | r = 0; | 827 | r = 0; |
828 | } | ||
874 | 829 | ||
875 | for (i = 0; i < adev->usec_timeout; i++) { | 830 | for (i = 0; i < adev->usec_timeout; i++) { |
876 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); | 831 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); |
@@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
879 | DRM_UDELAY(1); | 834 | DRM_UDELAY(1); |
880 | } | 835 | } |
881 | 836 | ||
882 | if (i < adev->usec_timeout) | 837 | if (i >= adev->usec_timeout) |
883 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | 838 | r = -ETIMEDOUT; |
884 | else { | ||
885 | DRM_ERROR("ib test failed (0x%08X)\n", tmp); | ||
886 | r = -EINVAL; | ||
887 | } | ||
888 | 839 | ||
889 | dma_fence_put(fence); | 840 | dma_fence_put(fence); |
890 | |||
891 | error: | 841 | error: |
892 | return r; | 842 | return r; |
893 | } | 843 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f2f358aa0597..cfee74732edb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | |||
@@ -23,16 +23,6 @@ | |||
23 | 23 | ||
24 | #include "amdgpu.h" | 24 | #include "amdgpu.h" |
25 | 25 | ||
26 | uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) | ||
27 | { | ||
28 | uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; | ||
29 | |||
30 | addr -= AMDGPU_VA_RESERVED_SIZE; | ||
31 | addr = amdgpu_gmc_sign_extend(addr); | ||
32 | |||
33 | return addr; | ||
34 | } | ||
35 | |||
36 | bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) | 26 | bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) |
37 | { | 27 | { |
38 | /* By now all MMIO pages except mailbox are blocked */ | 28 | /* By now all MMIO pages except mailbox are blocked */ |
@@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) | |||
41 | return RREG32_NO_KIQ(0xc040) == 0xffffffff; | 31 | return RREG32_NO_KIQ(0xc040) == 0xffffffff; |
42 | } | 32 | } |
43 | 33 | ||
44 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev) | ||
45 | { | ||
46 | int r; | ||
47 | void *ptr; | ||
48 | |||
49 | r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE, | ||
50 | AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj, | ||
51 | &adev->virt.csa_vmid0_addr, &ptr); | ||
52 | if (r) | ||
53 | return r; | ||
54 | |||
55 | memset(ptr, 0, AMDGPU_CSA_SIZE); | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | void amdgpu_free_static_csa(struct amdgpu_device *adev) { | ||
60 | amdgpu_bo_free_kernel(&adev->virt.csa_obj, | ||
61 | &adev->virt.csa_vmid0_addr, | ||
62 | NULL); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * amdgpu_map_static_csa should be called during amdgpu_vm_init | ||
67 | * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command | ||
68 | * submission of GFX should use this virtual address within META_DATA init | ||
69 | * package to support SRIOV gfx preemption. | ||
70 | */ | ||
71 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||
72 | struct amdgpu_bo_va **bo_va) | ||
73 | { | ||
74 | uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; | ||
75 | struct ww_acquire_ctx ticket; | ||
76 | struct list_head list; | ||
77 | struct amdgpu_bo_list_entry pd; | ||
78 | struct ttm_validate_buffer csa_tv; | ||
79 | int r; | ||
80 | |||
81 | INIT_LIST_HEAD(&list); | ||
82 | INIT_LIST_HEAD(&csa_tv.head); | ||
83 | csa_tv.bo = &adev->virt.csa_obj->tbo; | ||
84 | csa_tv.shared = true; | ||
85 | |||
86 | list_add(&csa_tv.head, &list); | ||
87 | amdgpu_vm_get_pd_bo(vm, &list, &pd); | ||
88 | |||
89 | r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); | ||
90 | if (r) { | ||
91 | DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); | ||
92 | return r; | ||
93 | } | ||
94 | |||
95 | *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); | ||
96 | if (!*bo_va) { | ||
97 | ttm_eu_backoff_reservation(&ticket, &list); | ||
98 | DRM_ERROR("failed to create bo_va for static CSA\n"); | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | |||
102 | r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, | ||
103 | AMDGPU_CSA_SIZE); | ||
104 | if (r) { | ||
105 | DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); | ||
106 | amdgpu_vm_bo_rmv(adev, *bo_va); | ||
107 | ttm_eu_backoff_reservation(&ticket, &list); | ||
108 | return r; | ||
109 | } | ||
110 | |||
111 | r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, | ||
112 | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | | ||
113 | AMDGPU_PTE_EXECUTABLE); | ||
114 | |||
115 | if (r) { | ||
116 | DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); | ||
117 | amdgpu_vm_bo_rmv(adev, *bo_va); | ||
118 | ttm_eu_backoff_reservation(&ticket, &list); | ||
119 | return r; | ||
120 | } | ||
121 | |||
122 | ttm_eu_backoff_reservation(&ticket, &list); | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | void amdgpu_virt_init_setting(struct amdgpu_device *adev) | 34 | void amdgpu_virt_init_setting(struct amdgpu_device *adev) |
127 | { | 35 | { |
128 | /* enable virtual display */ | 36 | /* enable virtual display */ |
@@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | |||
162 | if (r < 1 && (adev->in_gpu_reset || in_interrupt())) | 70 | if (r < 1 && (adev->in_gpu_reset || in_interrupt())) |
163 | goto failed_kiq_read; | 71 | goto failed_kiq_read; |
164 | 72 | ||
165 | if (in_interrupt()) | 73 | might_sleep(); |
166 | might_sleep(); | ||
167 | |||
168 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | 74 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
169 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | 75 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
170 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | 76 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); |
@@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) | |||
210 | if (r < 1 && (adev->in_gpu_reset || in_interrupt())) | 116 | if (r < 1 && (adev->in_gpu_reset || in_interrupt())) |
211 | goto failed_kiq_write; | 117 | goto failed_kiq_write; |
212 | 118 | ||
213 | if (in_interrupt()) | 119 | might_sleep(); |
214 | might_sleep(); | ||
215 | |||
216 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | 120 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { |
217 | 121 | ||
218 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | 122 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); |
@@ -228,6 +132,46 @@ failed_kiq_write: | |||
228 | pr_err("failed to write reg:%x\n", reg); | 132 | pr_err("failed to write reg:%x\n", reg); |
229 | } | 133 | } |
230 | 134 | ||
135 | void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, | ||
136 | uint32_t reg0, uint32_t reg1, | ||
137 | uint32_t ref, uint32_t mask) | ||
138 | { | ||
139 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
140 | struct amdgpu_ring *ring = &kiq->ring; | ||
141 | signed long r, cnt = 0; | ||
142 | unsigned long flags; | ||
143 | uint32_t seq; | ||
144 | |||
145 | spin_lock_irqsave(&kiq->ring_lock, flags); | ||
146 | amdgpu_ring_alloc(ring, 32); | ||
147 | amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, | ||
148 | ref, mask); | ||
149 | amdgpu_fence_emit_polling(ring, &seq); | ||
150 | amdgpu_ring_commit(ring); | ||
151 | spin_unlock_irqrestore(&kiq->ring_lock, flags); | ||
152 | |||
153 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | ||
154 | |||
155 | /* don't wait anymore for IRQ context */ | ||
156 | if (r < 1 && in_interrupt()) | ||
157 | goto failed_kiq; | ||
158 | |||
159 | might_sleep(); | ||
160 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | ||
161 | |||
162 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | ||
163 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | ||
164 | } | ||
165 | |||
166 | if (cnt > MAX_KIQ_REG_TRY) | ||
167 | goto failed_kiq; | ||
168 | |||
169 | return; | ||
170 | |||
171 | failed_kiq: | ||
172 | pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); | ||
173 | } | ||
174 | |||
231 | /** | 175 | /** |
232 | * amdgpu_virt_request_full_gpu() - request full gpu access | 176 | * amdgpu_virt_request_full_gpu() - request full gpu access |
233 | * @amdgpu: amdgpu device. | 177 | * @amdgpu: amdgpu device. |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 880ac113a3a9..0728fbc9a692 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | |||
@@ -238,7 +238,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; | |||
238 | struct amdgpu_virt { | 238 | struct amdgpu_virt { |
239 | uint32_t caps; | 239 | uint32_t caps; |
240 | struct amdgpu_bo *csa_obj; | 240 | struct amdgpu_bo *csa_obj; |
241 | uint64_t csa_vmid0_addr; | ||
242 | bool chained_ib_support; | 241 | bool chained_ib_support; |
243 | uint32_t reg_val_offs; | 242 | uint32_t reg_val_offs; |
244 | struct amdgpu_irq_src ack_irq; | 243 | struct amdgpu_irq_src ack_irq; |
@@ -251,8 +250,6 @@ struct amdgpu_virt { | |||
251 | uint32_t gim_feature; | 250 | uint32_t gim_feature; |
252 | }; | 251 | }; |
253 | 252 | ||
254 | #define AMDGPU_CSA_SIZE (8 * 1024) | ||
255 | |||
256 | #define amdgpu_sriov_enabled(adev) \ | 253 | #define amdgpu_sriov_enabled(adev) \ |
257 | ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) | 254 | ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) |
258 | 255 | ||
@@ -277,17 +274,13 @@ static inline bool is_virtual_machine(void) | |||
277 | #endif | 274 | #endif |
278 | } | 275 | } |
279 | 276 | ||
280 | struct amdgpu_vm; | ||
281 | |||
282 | uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); | ||
283 | bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); | 277 | bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); |
284 | int amdgpu_allocate_static_csa(struct amdgpu_device *adev); | ||
285 | int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||
286 | struct amdgpu_bo_va **bo_va); | ||
287 | void amdgpu_free_static_csa(struct amdgpu_device *adev); | ||
288 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); | 278 | void amdgpu_virt_init_setting(struct amdgpu_device *adev); |
289 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); | 279 | uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); |
290 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); | 280 | void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); |
281 | void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, | ||
282 | uint32_t reg0, uint32_t rreg1, | ||
283 | uint32_t ref, uint32_t mask); | ||
291 | int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); | 284 | int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); |
292 | int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); | 285 | int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); |
293 | int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); | 286 | int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d6c47972062a..58a2363040dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |||
@@ -1632,13 +1632,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1632 | continue; | 1632 | continue; |
1633 | } | 1633 | } |
1634 | 1634 | ||
1635 | /* First check if the entry is already handled */ | ||
1636 | if (cursor.pfn < frag_start) { | ||
1637 | cursor.entry->huge = true; | ||
1638 | amdgpu_vm_pt_next(adev, &cursor); | ||
1639 | continue; | ||
1640 | } | ||
1641 | |||
1642 | /* If it isn't already handled it can't be a huge page */ | 1635 | /* If it isn't already handled it can't be a huge page */ |
1643 | if (cursor.entry->huge) { | 1636 | if (cursor.entry->huge) { |
1644 | /* Add the entry to the relocated list to update it. */ | 1637 | /* Add the entry to the relocated list to update it. */ |
@@ -1701,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, | |||
1701 | } | 1694 | } |
1702 | } while (frag_start < entry_end); | 1695 | } while (frag_start < entry_end); |
1703 | 1696 | ||
1704 | if (frag >= shift) | 1697 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { |
1698 | /* Mark all child entries as huge */ | ||
1699 | while (cursor.pfn < frag_start) { | ||
1700 | cursor.entry->huge = true; | ||
1701 | amdgpu_vm_pt_next(adev, &cursor); | ||
1702 | } | ||
1703 | |||
1704 | } else if (frag >= shift) { | ||
1705 | /* or just move on to the next on the same level. */ | ||
1705 | amdgpu_vm_pt_next(adev, &cursor); | 1706 | amdgpu_vm_pt_next(adev, &cursor); |
1707 | } | ||
1706 | } | 1708 | } |
1707 | 1709 | ||
1708 | return 0; | 1710 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 897afbb348c1..909216a9b447 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | |||
@@ -63,7 +63,7 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) | |||
63 | 63 | ||
64 | int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | 64 | int amdgpu_xgmi_add_device(struct amdgpu_device *adev) |
65 | { | 65 | { |
66 | struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE]; | 66 | struct psp_xgmi_topology_info *tmp_topology; |
67 | struct amdgpu_hive_info *hive; | 67 | struct amdgpu_hive_info *hive; |
68 | struct amdgpu_xgmi *entry; | 68 | struct amdgpu_xgmi *entry; |
69 | struct amdgpu_device *tmp_adev; | 69 | struct amdgpu_device *tmp_adev; |
@@ -73,10 +73,12 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | |||
73 | if ((adev->asic_type < CHIP_VEGA20) || | 73 | if ((adev->asic_type < CHIP_VEGA20) || |
74 | (adev->flags & AMD_IS_APU) ) | 74 | (adev->flags & AMD_IS_APU) ) |
75 | return 0; | 75 | return 0; |
76 | adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp); | 76 | adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); |
77 | adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); | 77 | adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); |
78 | 78 | ||
79 | memset(&tmp_topology[0], 0, sizeof(tmp_topology)); | 79 | tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL); |
80 | if (!tmp_topology) | ||
81 | return -ENOMEM; | ||
80 | mutex_lock(&xgmi_mutex); | 82 | mutex_lock(&xgmi_mutex); |
81 | hive = amdgpu_get_xgmi_hive(adev); | 83 | hive = amdgpu_get_xgmi_hive(adev); |
82 | if (!hive) | 84 | if (!hive) |
@@ -84,23 +86,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | |||
84 | 86 | ||
85 | list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); | 87 | list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); |
86 | list_for_each_entry(entry, &hive->device_list, head) | 88 | list_for_each_entry(entry, &hive->device_list, head) |
87 | tmp_topology[count++].device_id = entry->device_id; | 89 | tmp_topology->nodes[count++].node_id = entry->node_id; |
88 | 90 | ||
89 | ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology); | 91 | /* Each psp need to get the latest topology */ |
90 | if (ret) { | 92 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { |
91 | dev_err(adev->dev, | 93 | ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology); |
92 | "XGMI: Get topology failure on device %llx, hive %llx, ret %d", | 94 | if (ret) { |
93 | adev->gmc.xgmi.device_id, | 95 | dev_err(tmp_adev->dev, |
94 | adev->gmc.xgmi.hive_id, ret); | 96 | "XGMI: Get topology failure on device %llx, hive %llx, ret %d", |
95 | goto exit; | 97 | tmp_adev->gmc.xgmi.node_id, |
98 | tmp_adev->gmc.xgmi.hive_id, ret); | ||
99 | /* To do : continue with some node failed or disable the whole hive */ | ||
100 | break; | ||
101 | } | ||
96 | } | 102 | } |
103 | |||
97 | /* Each psp need to set the latest topology */ | 104 | /* Each psp need to set the latest topology */ |
98 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { | 105 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { |
99 | ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); | 106 | ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); |
100 | if (ret) { | 107 | if (ret) { |
101 | dev_err(tmp_adev->dev, | 108 | dev_err(tmp_adev->dev, |
102 | "XGMI: Set topology failure on device %llx, hive %llx, ret %d", | 109 | "XGMI: Set topology failure on device %llx, hive %llx, ret %d", |
103 | tmp_adev->gmc.xgmi.device_id, | 110 | tmp_adev->gmc.xgmi.node_id, |
104 | tmp_adev->gmc.xgmi.hive_id, ret); | 111 | tmp_adev->gmc.xgmi.hive_id, ret); |
105 | /* To do : continue with some node failed or disable the whole hive */ | 112 | /* To do : continue with some node failed or disable the whole hive */ |
106 | break; | 113 | break; |
@@ -113,7 +120,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | |||
113 | 120 | ||
114 | exit: | 121 | exit: |
115 | mutex_unlock(&xgmi_mutex); | 122 | mutex_unlock(&xgmi_mutex); |
123 | kfree(tmp_topology); | ||
116 | return ret; | 124 | return ret; |
117 | } | 125 | } |
118 | |||
119 | |||
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 79220a91abe3..86e14c754dd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c | |||
@@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable) | |||
743 | 743 | ||
744 | if (pi->caps_sq_ramping || pi->caps_db_ramping || | 744 | if (pi->caps_sq_ramping || pi->caps_db_ramping || |
745 | pi->caps_td_ramping || pi->caps_tcp_ramping) { | 745 | pi->caps_td_ramping || pi->caps_tcp_ramping) { |
746 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 746 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
747 | 747 | ||
748 | if (enable) { | 748 | if (enable) { |
749 | ret = ci_program_pt_config_registers(adev, didt_config_ci); | 749 | ret = ci_program_pt_config_registers(adev, didt_config_ci); |
750 | if (ret) { | 750 | if (ret) { |
751 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 751 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
752 | return ret; | 752 | return ret; |
753 | } | 753 | } |
754 | } | 754 | } |
755 | 755 | ||
756 | ci_do_enable_didt(adev, enable); | 756 | ci_do_enable_didt(adev, enable); |
757 | 757 | ||
758 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 758 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
759 | } | 759 | } |
760 | 760 | ||
761 | return 0; | 761 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index b918c8886b75..45795191de1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c | |||
@@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) | |||
198 | 198 | ||
199 | static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 199 | static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
200 | { | 200 | { |
201 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 201 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
202 | int i; | 202 | int i; |
203 | 203 | ||
204 | for (i = 0; i < count; i++) | 204 | for (i = 0; i < count; i++) |
@@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | |||
218 | * Schedule an IB in the DMA ring (CIK). | 218 | * Schedule an IB in the DMA ring (CIK). |
219 | */ | 219 | */ |
220 | static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, | 220 | static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, |
221 | struct amdgpu_job *job, | ||
221 | struct amdgpu_ib *ib, | 222 | struct amdgpu_ib *ib, |
222 | unsigned vmid, bool ctx_switch) | 223 | bool ctx_switch) |
223 | { | 224 | { |
225 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
224 | u32 extra_bits = vmid & 0xf; | 226 | u32 extra_bits = vmid & 0xf; |
225 | 227 | ||
226 | /* IB packet must end on a 8 DW boundary */ | 228 | /* IB packet must end on a 8 DW boundary */ |
@@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) | |||
316 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); | 318 | WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); |
317 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); | 319 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); |
318 | } | 320 | } |
319 | sdma0->ready = false; | 321 | sdma0->sched.ready = false; |
320 | sdma1->ready = false; | 322 | sdma1->sched.ready = false; |
321 | } | 323 | } |
322 | 324 | ||
323 | /** | 325 | /** |
@@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) | |||
494 | /* enable DMA IBs */ | 496 | /* enable DMA IBs */ |
495 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); | 497 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
496 | 498 | ||
497 | ring->ready = true; | 499 | ring->sched.ready = true; |
498 | } | 500 | } |
499 | 501 | ||
500 | cik_sdma_enable(adev, true); | 502 | cik_sdma_enable(adev, true); |
501 | 503 | ||
502 | for (i = 0; i < adev->sdma.num_instances; i++) { | 504 | for (i = 0; i < adev->sdma.num_instances; i++) { |
503 | ring = &adev->sdma.instance[i].ring; | 505 | ring = &adev->sdma.instance[i].ring; |
504 | r = amdgpu_ring_test_ring(ring); | 506 | r = amdgpu_ring_test_helper(ring); |
505 | if (r) { | 507 | if (r) |
506 | ring->ready = false; | ||
507 | return r; | 508 | return r; |
508 | } | ||
509 | 509 | ||
510 | if (adev->mman.buffer_funcs_ring == ring) | 510 | if (adev->mman.buffer_funcs_ring == ring) |
511 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | 511 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
@@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) | |||
618 | u64 gpu_addr; | 618 | u64 gpu_addr; |
619 | 619 | ||
620 | r = amdgpu_device_wb_get(adev, &index); | 620 | r = amdgpu_device_wb_get(adev, &index); |
621 | if (r) { | 621 | if (r) |
622 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | ||
623 | return r; | 622 | return r; |
624 | } | ||
625 | 623 | ||
626 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 624 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
627 | tmp = 0xCAFEDEAD; | 625 | tmp = 0xCAFEDEAD; |
628 | adev->wb.wb[index] = cpu_to_le32(tmp); | 626 | adev->wb.wb[index] = cpu_to_le32(tmp); |
629 | 627 | ||
630 | r = amdgpu_ring_alloc(ring, 5); | 628 | r = amdgpu_ring_alloc(ring, 5); |
631 | if (r) { | 629 | if (r) |
632 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 630 | goto error_free_wb; |
633 | amdgpu_device_wb_free(adev, index); | 631 | |
634 | return r; | ||
635 | } | ||
636 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); | 632 | amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); |
637 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); | 633 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); |
638 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); | 634 | amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); |
@@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) | |||
647 | DRM_UDELAY(1); | 643 | DRM_UDELAY(1); |
648 | } | 644 | } |
649 | 645 | ||
650 | if (i < adev->usec_timeout) { | 646 | if (i >= adev->usec_timeout) |
651 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 647 | r = -ETIMEDOUT; |
652 | } else { | ||
653 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
654 | ring->idx, tmp); | ||
655 | r = -EINVAL; | ||
656 | } | ||
657 | amdgpu_device_wb_free(adev, index); | ||
658 | 648 | ||
649 | error_free_wb: | ||
650 | amdgpu_device_wb_free(adev, index); | ||
659 | return r; | 651 | return r; |
660 | } | 652 | } |
661 | 653 | ||
@@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
678 | long r; | 670 | long r; |
679 | 671 | ||
680 | r = amdgpu_device_wb_get(adev, &index); | 672 | r = amdgpu_device_wb_get(adev, &index); |
681 | if (r) { | 673 | if (r) |
682 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
683 | return r; | 674 | return r; |
684 | } | ||
685 | 675 | ||
686 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 676 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
687 | tmp = 0xCAFEDEAD; | 677 | tmp = 0xCAFEDEAD; |
688 | adev->wb.wb[index] = cpu_to_le32(tmp); | 678 | adev->wb.wb[index] = cpu_to_le32(tmp); |
689 | memset(&ib, 0, sizeof(ib)); | 679 | memset(&ib, 0, sizeof(ib)); |
690 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 680 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
691 | if (r) { | 681 | if (r) |
692 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
693 | goto err0; | 682 | goto err0; |
694 | } | ||
695 | 683 | ||
696 | ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, | 684 | ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, |
697 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); | 685 | SDMA_WRITE_SUB_OPCODE_LINEAR, 0); |
@@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
706 | 694 | ||
707 | r = dma_fence_wait_timeout(f, false, timeout); | 695 | r = dma_fence_wait_timeout(f, false, timeout); |
708 | if (r == 0) { | 696 | if (r == 0) { |
709 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
710 | r = -ETIMEDOUT; | 697 | r = -ETIMEDOUT; |
711 | goto err1; | 698 | goto err1; |
712 | } else if (r < 0) { | 699 | } else if (r < 0) { |
713 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
714 | goto err1; | 700 | goto err1; |
715 | } | 701 | } |
716 | tmp = le32_to_cpu(adev->wb.wb[index]); | 702 | tmp = le32_to_cpu(adev->wb.wb[index]); |
717 | if (tmp == 0xDEADBEEF) { | 703 | if (tmp == 0xDEADBEEF) |
718 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
719 | r = 0; | 704 | r = 0; |
720 | } else { | 705 | else |
721 | DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); | ||
722 | r = -EINVAL; | 706 | r = -EINVAL; |
723 | } | ||
724 | 707 | ||
725 | err1: | 708 | err1: |
726 | amdgpu_ib_free(adev, &ib, NULL); | 709 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, | |||
822 | */ | 805 | */ |
823 | static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) | 806 | static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
824 | { | 807 | { |
825 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 808 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
826 | u32 pad_count; | 809 | u32 pad_count; |
827 | int i; | 810 | int i; |
828 | 811 | ||
@@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, | |||
1214 | struct amdgpu_irq_src *source, | 1197 | struct amdgpu_irq_src *source, |
1215 | struct amdgpu_iv_entry *entry) | 1198 | struct amdgpu_iv_entry *entry) |
1216 | { | 1199 | { |
1200 | u8 instance_id; | ||
1201 | |||
1217 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); | 1202 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
1218 | schedule_work(&adev->reset_work); | 1203 | instance_id = (entry->ring_id & 0x3) >> 0; |
1204 | drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); | ||
1219 | return 0; | 1205 | return 0; |
1220 | } | 1206 | } |
1221 | 1207 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index d76eb27945dc..1dc3013ea1d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | |||
@@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1775 | int r; | 1775 | int r; |
1776 | 1776 | ||
1777 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 1777 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
1778 | if (r) { | 1778 | if (r) |
1779 | DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); | ||
1780 | return r; | 1779 | return r; |
1781 | } | 1780 | |
1782 | WREG32(scratch, 0xCAFEDEAD); | 1781 | WREG32(scratch, 0xCAFEDEAD); |
1783 | 1782 | ||
1784 | r = amdgpu_ring_alloc(ring, 3); | 1783 | r = amdgpu_ring_alloc(ring, 3); |
1785 | if (r) { | 1784 | if (r) |
1786 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); | 1785 | goto error_free_scratch; |
1787 | amdgpu_gfx_scratch_free(adev, scratch); | 1786 | |
1788 | return r; | ||
1789 | } | ||
1790 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); | 1787 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); |
1791 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START)); | 1788 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START)); |
1792 | amdgpu_ring_write(ring, 0xDEADBEEF); | 1789 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1798 | break; | 1795 | break; |
1799 | DRM_UDELAY(1); | 1796 | DRM_UDELAY(1); |
1800 | } | 1797 | } |
1801 | if (i < adev->usec_timeout) { | 1798 | |
1802 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 1799 | if (i >= adev->usec_timeout) |
1803 | } else { | 1800 | r = -ETIMEDOUT; |
1804 | DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", | 1801 | |
1805 | ring->idx, scratch, tmp); | 1802 | error_free_scratch: |
1806 | r = -EINVAL; | ||
1807 | } | ||
1808 | amdgpu_gfx_scratch_free(adev, scratch); | 1803 | amdgpu_gfx_scratch_free(adev, scratch); |
1809 | return r; | 1804 | return r; |
1810 | } | 1805 | } |
@@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | |||
1845 | } | 1840 | } |
1846 | 1841 | ||
1847 | static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, | 1842 | static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, |
1843 | struct amdgpu_job *job, | ||
1848 | struct amdgpu_ib *ib, | 1844 | struct amdgpu_ib *ib, |
1849 | unsigned vmid, bool ctx_switch) | 1845 | bool ctx_switch) |
1850 | { | 1846 | { |
1847 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1851 | u32 header, control = 0; | 1848 | u32 header, control = 0; |
1852 | 1849 | ||
1853 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ | 1850 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ |
@@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1892 | long r; | 1889 | long r; |
1893 | 1890 | ||
1894 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 1891 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
1895 | if (r) { | 1892 | if (r) |
1896 | DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); | ||
1897 | return r; | 1893 | return r; |
1898 | } | 1894 | |
1899 | WREG32(scratch, 0xCAFEDEAD); | 1895 | WREG32(scratch, 0xCAFEDEAD); |
1900 | memset(&ib, 0, sizeof(ib)); | 1896 | memset(&ib, 0, sizeof(ib)); |
1901 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 1897 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
1902 | if (r) { | 1898 | if (r) |
1903 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
1904 | goto err1; | 1899 | goto err1; |
1905 | } | 1900 | |
1906 | ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); | 1901 | ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); |
1907 | ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START)); | 1902 | ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START)); |
1908 | ib.ptr[2] = 0xDEADBEEF; | 1903 | ib.ptr[2] = 0xDEADBEEF; |
@@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1914 | 1909 | ||
1915 | r = dma_fence_wait_timeout(f, false, timeout); | 1910 | r = dma_fence_wait_timeout(f, false, timeout); |
1916 | if (r == 0) { | 1911 | if (r == 0) { |
1917 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
1918 | r = -ETIMEDOUT; | 1912 | r = -ETIMEDOUT; |
1919 | goto err2; | 1913 | goto err2; |
1920 | } else if (r < 0) { | 1914 | } else if (r < 0) { |
1921 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
1922 | goto err2; | 1915 | goto err2; |
1923 | } | 1916 | } |
1924 | tmp = RREG32(scratch); | 1917 | tmp = RREG32(scratch); |
1925 | if (tmp == 0xDEADBEEF) { | 1918 | if (tmp == 0xDEADBEEF) |
1926 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
1927 | r = 0; | 1919 | r = 0; |
1928 | } else { | 1920 | else |
1929 | DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", | ||
1930 | scratch, tmp); | ||
1931 | r = -EINVAL; | 1921 | r = -EINVAL; |
1932 | } | ||
1933 | 1922 | ||
1934 | err2: | 1923 | err2: |
1935 | amdgpu_ib_free(adev, &ib, NULL); | 1924 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) | |||
1950 | CP_ME_CNTL__CE_HALT_MASK)); | 1939 | CP_ME_CNTL__CE_HALT_MASK)); |
1951 | WREG32(mmSCRATCH_UMSK, 0); | 1940 | WREG32(mmSCRATCH_UMSK, 0); |
1952 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) | 1941 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
1953 | adev->gfx.gfx_ring[i].ready = false; | 1942 | adev->gfx.gfx_ring[i].sched.ready = false; |
1954 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 1943 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
1955 | adev->gfx.compute_ring[i].ready = false; | 1944 | adev->gfx.compute_ring[i].sched.ready = false; |
1956 | } | 1945 | } |
1957 | udelay(50); | 1946 | udelay(50); |
1958 | } | 1947 | } |
@@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
2124 | 2113 | ||
2125 | /* start the rings */ | 2114 | /* start the rings */ |
2126 | gfx_v6_0_cp_gfx_start(adev); | 2115 | gfx_v6_0_cp_gfx_start(adev); |
2127 | ring->ready = true; | 2116 | r = amdgpu_ring_test_helper(ring); |
2128 | r = amdgpu_ring_test_ring(ring); | 2117 | if (r) |
2129 | if (r) { | ||
2130 | ring->ready = false; | ||
2131 | return r; | 2118 | return r; |
2132 | } | ||
2133 | 2119 | ||
2134 | return 0; | 2120 | return 0; |
2135 | } | 2121 | } |
@@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) | |||
2227 | WREG32(mmCP_RB2_CNTL, tmp); | 2213 | WREG32(mmCP_RB2_CNTL, tmp); |
2228 | WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); | 2214 | WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); |
2229 | 2215 | ||
2230 | adev->gfx.compute_ring[0].ready = false; | ||
2231 | adev->gfx.compute_ring[1].ready = false; | ||
2232 | 2216 | ||
2233 | for (i = 0; i < 2; i++) { | 2217 | for (i = 0; i < 2; i++) { |
2234 | r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]); | 2218 | r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]); |
2235 | if (r) | 2219 | if (r) |
2236 | return r; | 2220 | return r; |
2237 | adev->gfx.compute_ring[i].ready = true; | ||
2238 | } | 2221 | } |
2239 | 2222 | ||
2240 | return 0; | 2223 | return 0; |
@@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, | |||
2368 | amdgpu_ring_write(ring, val); | 2351 | amdgpu_ring_write(ring, val); |
2369 | } | 2352 | } |
2370 | 2353 | ||
2371 | static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) | ||
2372 | { | ||
2373 | amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); | ||
2374 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); | ||
2375 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); | ||
2376 | } | ||
2377 | |||
2378 | static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) | 2354 | static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) |
2379 | { | 2355 | { |
2380 | const u32 *src_ptr; | 2356 | const u32 *src_ptr; |
2381 | volatile u32 *dst_ptr; | 2357 | volatile u32 *dst_ptr; |
2382 | u32 dws, i; | 2358 | u32 dws; |
2383 | u64 reg_list_mc_addr; | 2359 | u64 reg_list_mc_addr; |
2384 | const struct cs_section_def *cs_data; | 2360 | const struct cs_section_def *cs_data; |
2385 | int r; | 2361 | int r; |
@@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) | |||
2394 | cs_data = adev->gfx.rlc.cs_data; | 2370 | cs_data = adev->gfx.rlc.cs_data; |
2395 | 2371 | ||
2396 | if (src_ptr) { | 2372 | if (src_ptr) { |
2397 | /* save restore block */ | 2373 | /* init save restore block */ |
2398 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | 2374 | r = amdgpu_gfx_rlc_init_sr(adev, dws); |
2399 | AMDGPU_GEM_DOMAIN_VRAM, | 2375 | if (r) |
2400 | &adev->gfx.rlc.save_restore_obj, | ||
2401 | &adev->gfx.rlc.save_restore_gpu_addr, | ||
2402 | (void **)&adev->gfx.rlc.sr_ptr); | ||
2403 | if (r) { | ||
2404 | dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", | ||
2405 | r); | ||
2406 | gfx_v6_0_rlc_fini(adev); | ||
2407 | return r; | 2376 | return r; |
2408 | } | ||
2409 | |||
2410 | /* write the sr buffer */ | ||
2411 | dst_ptr = adev->gfx.rlc.sr_ptr; | ||
2412 | for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) | ||
2413 | dst_ptr[i] = cpu_to_le32(src_ptr[i]); | ||
2414 | |||
2415 | amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); | ||
2416 | amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); | ||
2417 | } | 2377 | } |
2418 | 2378 | ||
2419 | if (cs_data) { | 2379 | if (cs_data) { |
@@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) | |||
2428 | (void **)&adev->gfx.rlc.cs_ptr); | 2388 | (void **)&adev->gfx.rlc.cs_ptr); |
2429 | if (r) { | 2389 | if (r) { |
2430 | dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); | 2390 | dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); |
2431 | gfx_v6_0_rlc_fini(adev); | 2391 | amdgpu_gfx_rlc_fini(adev); |
2432 | return r; | 2392 | return r; |
2433 | } | 2393 | } |
2434 | 2394 | ||
@@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) | |||
2549 | if (!adev->gfx.rlc_fw) | 2509 | if (!adev->gfx.rlc_fw) |
2550 | return -EINVAL; | 2510 | return -EINVAL; |
2551 | 2511 | ||
2552 | gfx_v6_0_rlc_stop(adev); | 2512 | adev->gfx.rlc.funcs->stop(adev); |
2553 | gfx_v6_0_rlc_reset(adev); | 2513 | adev->gfx.rlc.funcs->reset(adev); |
2554 | gfx_v6_0_init_pg(adev); | 2514 | gfx_v6_0_init_pg(adev); |
2555 | gfx_v6_0_init_cg(adev); | 2515 | gfx_v6_0_init_cg(adev); |
2556 | 2516 | ||
@@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) | |||
2578 | WREG32(mmRLC_UCODE_ADDR, 0); | 2538 | WREG32(mmRLC_UCODE_ADDR, 0); |
2579 | 2539 | ||
2580 | gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); | 2540 | gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); |
2581 | gfx_v6_0_rlc_start(adev); | 2541 | adev->gfx.rlc.funcs->start(adev); |
2582 | 2542 | ||
2583 | return 0; | 2543 | return 0; |
2584 | } | 2544 | } |
@@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { | |||
3075 | .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q | 3035 | .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q |
3076 | }; | 3036 | }; |
3077 | 3037 | ||
3038 | static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = { | ||
3039 | .init = gfx_v6_0_rlc_init, | ||
3040 | .resume = gfx_v6_0_rlc_resume, | ||
3041 | .stop = gfx_v6_0_rlc_stop, | ||
3042 | .reset = gfx_v6_0_rlc_reset, | ||
3043 | .start = gfx_v6_0_rlc_start | ||
3044 | }; | ||
3045 | |||
3078 | static int gfx_v6_0_early_init(void *handle) | 3046 | static int gfx_v6_0_early_init(void *handle) |
3079 | { | 3047 | { |
3080 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 3048 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
@@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle) | |||
3082 | adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; | 3050 | adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; |
3083 | adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; | 3051 | adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; |
3084 | adev->gfx.funcs = &gfx_v6_0_gfx_funcs; | 3052 | adev->gfx.funcs = &gfx_v6_0_gfx_funcs; |
3053 | adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs; | ||
3085 | gfx_v6_0_set_ring_funcs(adev); | 3054 | gfx_v6_0_set_ring_funcs(adev); |
3086 | gfx_v6_0_set_irq_funcs(adev); | 3055 | gfx_v6_0_set_irq_funcs(adev); |
3087 | 3056 | ||
@@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle) | |||
3114 | return r; | 3083 | return r; |
3115 | } | 3084 | } |
3116 | 3085 | ||
3117 | r = gfx_v6_0_rlc_init(adev); | 3086 | r = adev->gfx.rlc.funcs->init(adev); |
3118 | if (r) { | 3087 | if (r) { |
3119 | DRM_ERROR("Failed to init rlc BOs!\n"); | 3088 | DRM_ERROR("Failed to init rlc BOs!\n"); |
3120 | return r; | 3089 | return r; |
@@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle) | |||
3165 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 3134 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
3166 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 3135 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
3167 | 3136 | ||
3168 | gfx_v6_0_rlc_fini(adev); | 3137 | amdgpu_gfx_rlc_fini(adev); |
3169 | 3138 | ||
3170 | return 0; | 3139 | return 0; |
3171 | } | 3140 | } |
@@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle) | |||
3177 | 3146 | ||
3178 | gfx_v6_0_constants_init(adev); | 3147 | gfx_v6_0_constants_init(adev); |
3179 | 3148 | ||
3180 | r = gfx_v6_0_rlc_resume(adev); | 3149 | r = adev->gfx.rlc.funcs->resume(adev); |
3181 | if (r) | 3150 | if (r) |
3182 | return r; | 3151 | return r; |
3183 | 3152 | ||
@@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle) | |||
3195 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 3164 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
3196 | 3165 | ||
3197 | gfx_v6_0_cp_enable(adev, false); | 3166 | gfx_v6_0_cp_enable(adev, false); |
3198 | gfx_v6_0_rlc_stop(adev); | 3167 | adev->gfx.rlc.funcs->stop(adev); |
3199 | gfx_v6_0_fini_pg(adev); | 3168 | gfx_v6_0_fini_pg(adev); |
3200 | 3169 | ||
3201 | return 0; | 3170 | return 0; |
@@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev, | |||
3393 | return 0; | 3362 | return 0; |
3394 | } | 3363 | } |
3395 | 3364 | ||
3365 | static void gfx_v6_0_fault(struct amdgpu_device *adev, | ||
3366 | struct amdgpu_iv_entry *entry) | ||
3367 | { | ||
3368 | struct amdgpu_ring *ring; | ||
3369 | |||
3370 | switch (entry->ring_id) { | ||
3371 | case 0: | ||
3372 | ring = &adev->gfx.gfx_ring[0]; | ||
3373 | break; | ||
3374 | case 1: | ||
3375 | case 2: | ||
3376 | ring = &adev->gfx.compute_ring[entry->ring_id - 1]; | ||
3377 | break; | ||
3378 | default: | ||
3379 | return; | ||
3380 | } | ||
3381 | drm_sched_fault(&ring->sched); | ||
3382 | } | ||
3383 | |||
3396 | static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, | 3384 | static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, |
3397 | struct amdgpu_irq_src *source, | 3385 | struct amdgpu_irq_src *source, |
3398 | struct amdgpu_iv_entry *entry) | 3386 | struct amdgpu_iv_entry *entry) |
3399 | { | 3387 | { |
3400 | DRM_ERROR("Illegal register access in command stream\n"); | 3388 | DRM_ERROR("Illegal register access in command stream\n"); |
3401 | schedule_work(&adev->reset_work); | 3389 | gfx_v6_0_fault(adev, entry); |
3402 | return 0; | 3390 | return 0; |
3403 | } | 3391 | } |
3404 | 3392 | ||
@@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, | |||
3407 | struct amdgpu_iv_entry *entry) | 3395 | struct amdgpu_iv_entry *entry) |
3408 | { | 3396 | { |
3409 | DRM_ERROR("Illegal instruction in command stream\n"); | 3397 | DRM_ERROR("Illegal instruction in command stream\n"); |
3410 | schedule_work(&adev->reset_work); | 3398 | gfx_v6_0_fault(adev, entry); |
3411 | return 0; | 3399 | return 0; |
3412 | } | 3400 | } |
3413 | 3401 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0e72bc09939a..f467b9bd090d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] = | |||
882 | 882 | ||
883 | static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); | 883 | static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); |
884 | static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); | 884 | static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); |
885 | static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); | ||
886 | static void gfx_v7_0_init_pg(struct amdgpu_device *adev); | 885 | static void gfx_v7_0_init_pg(struct amdgpu_device *adev); |
887 | static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); | 886 | static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); |
888 | 887 | ||
@@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
2064 | int r; | 2063 | int r; |
2065 | 2064 | ||
2066 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 2065 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
2067 | if (r) { | 2066 | if (r) |
2068 | DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); | ||
2069 | return r; | 2067 | return r; |
2070 | } | 2068 | |
2071 | WREG32(scratch, 0xCAFEDEAD); | 2069 | WREG32(scratch, 0xCAFEDEAD); |
2072 | r = amdgpu_ring_alloc(ring, 3); | 2070 | r = amdgpu_ring_alloc(ring, 3); |
2073 | if (r) { | 2071 | if (r) |
2074 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); | 2072 | goto error_free_scratch; |
2075 | amdgpu_gfx_scratch_free(adev, scratch); | 2073 | |
2076 | return r; | ||
2077 | } | ||
2078 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | 2074 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
2079 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | 2075 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
2080 | amdgpu_ring_write(ring, 0xDEADBEEF); | 2076 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
2086 | break; | 2082 | break; |
2087 | DRM_UDELAY(1); | 2083 | DRM_UDELAY(1); |
2088 | } | 2084 | } |
2089 | if (i < adev->usec_timeout) { | 2085 | if (i >= adev->usec_timeout) |
2090 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 2086 | r = -ETIMEDOUT; |
2091 | } else { | 2087 | |
2092 | DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", | 2088 | error_free_scratch: |
2093 | ring->idx, scratch, tmp); | ||
2094 | r = -EINVAL; | ||
2095 | } | ||
2096 | amdgpu_gfx_scratch_free(adev, scratch); | 2089 | amdgpu_gfx_scratch_free(adev, scratch); |
2097 | return r; | 2090 | return r; |
2098 | } | 2091 | } |
@@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, | |||
2233 | * on the gfx ring for execution by the GPU. | 2226 | * on the gfx ring for execution by the GPU. |
2234 | */ | 2227 | */ |
2235 | static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | 2228 | static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
2236 | struct amdgpu_ib *ib, | 2229 | struct amdgpu_job *job, |
2237 | unsigned vmid, bool ctx_switch) | 2230 | struct amdgpu_ib *ib, |
2231 | bool ctx_switch) | ||
2238 | { | 2232 | { |
2233 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
2239 | u32 header, control = 0; | 2234 | u32 header, control = 0; |
2240 | 2235 | ||
2241 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ | 2236 | /* insert SWITCH_BUFFER packet before first IB in the ring frame */ |
@@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
2262 | } | 2257 | } |
2263 | 2258 | ||
2264 | static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, | 2259 | static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
2260 | struct amdgpu_job *job, | ||
2265 | struct amdgpu_ib *ib, | 2261 | struct amdgpu_ib *ib, |
2266 | unsigned vmid, bool ctx_switch) | 2262 | bool ctx_switch) |
2267 | { | 2263 | { |
2264 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
2268 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); | 2265 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); |
2269 | 2266 | ||
2270 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); | 2267 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
@@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
2316 | long r; | 2313 | long r; |
2317 | 2314 | ||
2318 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 2315 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
2319 | if (r) { | 2316 | if (r) |
2320 | DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); | ||
2321 | return r; | 2317 | return r; |
2322 | } | 2318 | |
2323 | WREG32(scratch, 0xCAFEDEAD); | 2319 | WREG32(scratch, 0xCAFEDEAD); |
2324 | memset(&ib, 0, sizeof(ib)); | 2320 | memset(&ib, 0, sizeof(ib)); |
2325 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 2321 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
2326 | if (r) { | 2322 | if (r) |
2327 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
2328 | goto err1; | 2323 | goto err1; |
2329 | } | 2324 | |
2330 | ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); | 2325 | ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); |
2331 | ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); | 2326 | ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); |
2332 | ib.ptr[2] = 0xDEADBEEF; | 2327 | ib.ptr[2] = 0xDEADBEEF; |
@@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
2338 | 2333 | ||
2339 | r = dma_fence_wait_timeout(f, false, timeout); | 2334 | r = dma_fence_wait_timeout(f, false, timeout); |
2340 | if (r == 0) { | 2335 | if (r == 0) { |
2341 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
2342 | r = -ETIMEDOUT; | 2336 | r = -ETIMEDOUT; |
2343 | goto err2; | 2337 | goto err2; |
2344 | } else if (r < 0) { | 2338 | } else if (r < 0) { |
2345 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
2346 | goto err2; | 2339 | goto err2; |
2347 | } | 2340 | } |
2348 | tmp = RREG32(scratch); | 2341 | tmp = RREG32(scratch); |
2349 | if (tmp == 0xDEADBEEF) { | 2342 | if (tmp == 0xDEADBEEF) |
2350 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
2351 | r = 0; | 2343 | r = 0; |
2352 | } else { | 2344 | else |
2353 | DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", | ||
2354 | scratch, tmp); | ||
2355 | r = -EINVAL; | 2345 | r = -EINVAL; |
2356 | } | ||
2357 | 2346 | ||
2358 | err2: | 2347 | err2: |
2359 | amdgpu_ib_free(adev, &ib, NULL); | 2348 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) | |||
2403 | } else { | 2392 | } else { |
2404 | WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); | 2393 | WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); |
2405 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) | 2394 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
2406 | adev->gfx.gfx_ring[i].ready = false; | 2395 | adev->gfx.gfx_ring[i].sched.ready = false; |
2407 | } | 2396 | } |
2408 | udelay(50); | 2397 | udelay(50); |
2409 | } | 2398 | } |
@@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
2613 | 2602 | ||
2614 | /* start the ring */ | 2603 | /* start the ring */ |
2615 | gfx_v7_0_cp_gfx_start(adev); | 2604 | gfx_v7_0_cp_gfx_start(adev); |
2616 | ring->ready = true; | 2605 | r = amdgpu_ring_test_helper(ring); |
2617 | r = amdgpu_ring_test_ring(ring); | 2606 | if (r) |
2618 | if (r) { | ||
2619 | ring->ready = false; | ||
2620 | return r; | 2607 | return r; |
2621 | } | ||
2622 | 2608 | ||
2623 | return 0; | 2609 | return 0; |
2624 | } | 2610 | } |
@@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) | |||
2675 | } else { | 2661 | } else { |
2676 | WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); | 2662 | WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
2677 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 2663 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
2678 | adev->gfx.compute_ring[i].ready = false; | 2664 | adev->gfx.compute_ring[i].sched.ready = false; |
2679 | } | 2665 | } |
2680 | udelay(50); | 2666 | udelay(50); |
2681 | } | 2667 | } |
@@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) | |||
2781 | * GFX7_MEC_HPD_SIZE * 2; | 2767 | * GFX7_MEC_HPD_SIZE * 2; |
2782 | 2768 | ||
2783 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, | 2769 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, |
2784 | AMDGPU_GEM_DOMAIN_GTT, | 2770 | AMDGPU_GEM_DOMAIN_VRAM, |
2785 | &adev->gfx.mec.hpd_eop_obj, | 2771 | &adev->gfx.mec.hpd_eop_obj, |
2786 | &adev->gfx.mec.hpd_eop_gpu_addr, | 2772 | &adev->gfx.mec.hpd_eop_gpu_addr, |
2787 | (void **)&hpd); | 2773 | (void **)&hpd); |
@@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) | |||
3106 | 3092 | ||
3107 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 3093 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
3108 | ring = &adev->gfx.compute_ring[i]; | 3094 | ring = &adev->gfx.compute_ring[i]; |
3109 | ring->ready = true; | 3095 | amdgpu_ring_test_helper(ring); |
3110 | r = amdgpu_ring_test_ring(ring); | ||
3111 | if (r) | ||
3112 | ring->ready = false; | ||
3113 | } | 3096 | } |
3114 | 3097 | ||
3115 | return 0; | 3098 | return 0; |
@@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, | |||
3268 | * The RLC is a multi-purpose microengine that handles a | 3251 | * The RLC is a multi-purpose microengine that handles a |
3269 | * variety of functions. | 3252 | * variety of functions. |
3270 | */ | 3253 | */ |
3271 | static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) | ||
3272 | { | ||
3273 | amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); | ||
3274 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); | ||
3275 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); | ||
3276 | } | ||
3277 | |||
3278 | static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) | 3254 | static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) |
3279 | { | 3255 | { |
3280 | const u32 *src_ptr; | 3256 | const u32 *src_ptr; |
3281 | volatile u32 *dst_ptr; | 3257 | u32 dws; |
3282 | u32 dws, i; | ||
3283 | const struct cs_section_def *cs_data; | 3258 | const struct cs_section_def *cs_data; |
3284 | int r; | 3259 | int r; |
3285 | 3260 | ||
@@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) | |||
3306 | cs_data = adev->gfx.rlc.cs_data; | 3281 | cs_data = adev->gfx.rlc.cs_data; |
3307 | 3282 | ||
3308 | if (src_ptr) { | 3283 | if (src_ptr) { |
3309 | /* save restore block */ | 3284 | /* init save restore block */ |
3310 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | 3285 | r = amdgpu_gfx_rlc_init_sr(adev, dws); |
3311 | AMDGPU_GEM_DOMAIN_VRAM, | 3286 | if (r) |
3312 | &adev->gfx.rlc.save_restore_obj, | ||
3313 | &adev->gfx.rlc.save_restore_gpu_addr, | ||
3314 | (void **)&adev->gfx.rlc.sr_ptr); | ||
3315 | if (r) { | ||
3316 | dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); | ||
3317 | gfx_v7_0_rlc_fini(adev); | ||
3318 | return r; | 3287 | return r; |
3319 | } | ||
3320 | |||
3321 | /* write the sr buffer */ | ||
3322 | dst_ptr = adev->gfx.rlc.sr_ptr; | ||
3323 | for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) | ||
3324 | dst_ptr[i] = cpu_to_le32(src_ptr[i]); | ||
3325 | amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); | ||
3326 | amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); | ||
3327 | } | 3288 | } |
3328 | 3289 | ||
3329 | if (cs_data) { | 3290 | if (cs_data) { |
3330 | /* clear state block */ | 3291 | /* init clear state block */ |
3331 | adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); | 3292 | r = amdgpu_gfx_rlc_init_csb(adev); |
3332 | 3293 | if (r) | |
3333 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | ||
3334 | AMDGPU_GEM_DOMAIN_VRAM, | ||
3335 | &adev->gfx.rlc.clear_state_obj, | ||
3336 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
3337 | (void **)&adev->gfx.rlc.cs_ptr); | ||
3338 | if (r) { | ||
3339 | dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); | ||
3340 | gfx_v7_0_rlc_fini(adev); | ||
3341 | return r; | 3294 | return r; |
3342 | } | ||
3343 | |||
3344 | /* set up the cs buffer */ | ||
3345 | dst_ptr = adev->gfx.rlc.cs_ptr; | ||
3346 | gfx_v7_0_get_csb_buffer(adev, dst_ptr); | ||
3347 | amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); | ||
3348 | amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); | ||
3349 | } | 3295 | } |
3350 | 3296 | ||
3351 | if (adev->gfx.rlc.cp_table_size) { | 3297 | if (adev->gfx.rlc.cp_table_size) { |
3352 | 3298 | r = amdgpu_gfx_rlc_init_cpt(adev); | |
3353 | r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, | 3299 | if (r) |
3354 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | ||
3355 | &adev->gfx.rlc.cp_table_obj, | ||
3356 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
3357 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
3358 | if (r) { | ||
3359 | dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); | ||
3360 | gfx_v7_0_rlc_fini(adev); | ||
3361 | return r; | 3300 | return r; |
3362 | } | ||
3363 | |||
3364 | gfx_v7_0_init_cp_pg_table(adev); | ||
3365 | |||
3366 | amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); | ||
3367 | amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); | ||
3368 | |||
3369 | } | 3301 | } |
3370 | 3302 | ||
3371 | return 0; | 3303 | return 0; |
@@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev) | |||
3446 | return orig; | 3378 | return orig; |
3447 | } | 3379 | } |
3448 | 3380 | ||
3449 | static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) | 3381 | static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) |
3382 | { | ||
3383 | return true; | ||
3384 | } | ||
3385 | |||
3386 | static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) | ||
3450 | { | 3387 | { |
3451 | u32 tmp, i, mask; | 3388 | u32 tmp, i, mask; |
3452 | 3389 | ||
@@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) | |||
3468 | } | 3405 | } |
3469 | } | 3406 | } |
3470 | 3407 | ||
3471 | static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) | 3408 | static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) |
3472 | { | 3409 | { |
3473 | u32 tmp; | 3410 | u32 tmp; |
3474 | 3411 | ||
@@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) | |||
3545 | adev->gfx.rlc_feature_version = le32_to_cpu( | 3482 | adev->gfx.rlc_feature_version = le32_to_cpu( |
3546 | hdr->ucode_feature_version); | 3483 | hdr->ucode_feature_version); |
3547 | 3484 | ||
3548 | gfx_v7_0_rlc_stop(adev); | 3485 | adev->gfx.rlc.funcs->stop(adev); |
3549 | 3486 | ||
3550 | /* disable CG */ | 3487 | /* disable CG */ |
3551 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; | 3488 | tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; |
3552 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); | 3489 | WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); |
3553 | 3490 | ||
3554 | gfx_v7_0_rlc_reset(adev); | 3491 | adev->gfx.rlc.funcs->reset(adev); |
3555 | 3492 | ||
3556 | gfx_v7_0_init_pg(adev); | 3493 | gfx_v7_0_init_pg(adev); |
3557 | 3494 | ||
@@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) | |||
3582 | if (adev->asic_type == CHIP_BONAIRE) | 3519 | if (adev->asic_type == CHIP_BONAIRE) |
3583 | WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); | 3520 | WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); |
3584 | 3521 | ||
3585 | gfx_v7_0_rlc_start(adev); | 3522 | adev->gfx.rlc.funcs->start(adev); |
3586 | 3523 | ||
3587 | return 0; | 3524 | return 0; |
3588 | } | 3525 | } |
@@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable) | |||
3784 | WREG32(mmRLC_PG_CNTL, data); | 3721 | WREG32(mmRLC_PG_CNTL, data); |
3785 | } | 3722 | } |
3786 | 3723 | ||
3787 | static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) | 3724 | static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) |
3788 | { | 3725 | { |
3789 | const __le32 *fw_data; | ||
3790 | volatile u32 *dst_ptr; | ||
3791 | int me, i, max_me = 4; | ||
3792 | u32 bo_offset = 0; | ||
3793 | u32 table_offset, table_size; | ||
3794 | |||
3795 | if (adev->asic_type == CHIP_KAVERI) | 3726 | if (adev->asic_type == CHIP_KAVERI) |
3796 | max_me = 5; | 3727 | return 5; |
3797 | 3728 | else | |
3798 | if (adev->gfx.rlc.cp_table_ptr == NULL) | 3729 | return 4; |
3799 | return; | ||
3800 | |||
3801 | /* write the cp table buffer */ | ||
3802 | dst_ptr = adev->gfx.rlc.cp_table_ptr; | ||
3803 | for (me = 0; me < max_me; me++) { | ||
3804 | if (me == 0) { | ||
3805 | const struct gfx_firmware_header_v1_0 *hdr = | ||
3806 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; | ||
3807 | fw_data = (const __le32 *) | ||
3808 | (adev->gfx.ce_fw->data + | ||
3809 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
3810 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
3811 | table_size = le32_to_cpu(hdr->jt_size); | ||
3812 | } else if (me == 1) { | ||
3813 | const struct gfx_firmware_header_v1_0 *hdr = | ||
3814 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; | ||
3815 | fw_data = (const __le32 *) | ||
3816 | (adev->gfx.pfp_fw->data + | ||
3817 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
3818 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
3819 | table_size = le32_to_cpu(hdr->jt_size); | ||
3820 | } else if (me == 2) { | ||
3821 | const struct gfx_firmware_header_v1_0 *hdr = | ||
3822 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | ||
3823 | fw_data = (const __le32 *) | ||
3824 | (adev->gfx.me_fw->data + | ||
3825 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
3826 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
3827 | table_size = le32_to_cpu(hdr->jt_size); | ||
3828 | } else if (me == 3) { | ||
3829 | const struct gfx_firmware_header_v1_0 *hdr = | ||
3830 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; | ||
3831 | fw_data = (const __le32 *) | ||
3832 | (adev->gfx.mec_fw->data + | ||
3833 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
3834 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
3835 | table_size = le32_to_cpu(hdr->jt_size); | ||
3836 | } else { | ||
3837 | const struct gfx_firmware_header_v1_0 *hdr = | ||
3838 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; | ||
3839 | fw_data = (const __le32 *) | ||
3840 | (adev->gfx.mec2_fw->data + | ||
3841 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
3842 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
3843 | table_size = le32_to_cpu(hdr->jt_size); | ||
3844 | } | ||
3845 | |||
3846 | for (i = 0; i < table_size; i ++) { | ||
3847 | dst_ptr[bo_offset + i] = | ||
3848 | cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); | ||
3849 | } | ||
3850 | |||
3851 | bo_offset += table_size; | ||
3852 | } | ||
3853 | } | 3730 | } |
3854 | 3731 | ||
3855 | static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, | 3732 | static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, |
@@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { | |||
4288 | }; | 4165 | }; |
4289 | 4166 | ||
4290 | static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { | 4167 | static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { |
4291 | .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, | 4168 | .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, |
4292 | .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode | 4169 | .set_safe_mode = gfx_v7_0_set_safe_mode, |
4170 | .unset_safe_mode = gfx_v7_0_unset_safe_mode, | ||
4171 | .init = gfx_v7_0_rlc_init, | ||
4172 | .get_csb_size = gfx_v7_0_get_csb_size, | ||
4173 | .get_csb_buffer = gfx_v7_0_get_csb_buffer, | ||
4174 | .get_cp_table_num = gfx_v7_0_cp_pg_table_num, | ||
4175 | .resume = gfx_v7_0_rlc_resume, | ||
4176 | .stop = gfx_v7_0_rlc_stop, | ||
4177 | .reset = gfx_v7_0_rlc_reset, | ||
4178 | .start = gfx_v7_0_rlc_start | ||
4293 | }; | 4179 | }; |
4294 | 4180 | ||
4295 | static int gfx_v7_0_early_init(void *handle) | 4181 | static int gfx_v7_0_early_init(void *handle) |
@@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle) | |||
4540 | return r; | 4426 | return r; |
4541 | } | 4427 | } |
4542 | 4428 | ||
4543 | r = gfx_v7_0_rlc_init(adev); | 4429 | r = adev->gfx.rlc.funcs->init(adev); |
4544 | if (r) { | 4430 | if (r) { |
4545 | DRM_ERROR("Failed to init rlc BOs!\n"); | 4431 | DRM_ERROR("Failed to init rlc BOs!\n"); |
4546 | return r; | 4432 | return r; |
@@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle) | |||
4604 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); | 4490 | amdgpu_ring_fini(&adev->gfx.compute_ring[i]); |
4605 | 4491 | ||
4606 | gfx_v7_0_cp_compute_fini(adev); | 4492 | gfx_v7_0_cp_compute_fini(adev); |
4607 | gfx_v7_0_rlc_fini(adev); | 4493 | amdgpu_gfx_rlc_fini(adev); |
4608 | gfx_v7_0_mec_fini(adev); | 4494 | gfx_v7_0_mec_fini(adev); |
4609 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, | 4495 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, |
4610 | &adev->gfx.rlc.clear_state_gpu_addr, | 4496 | &adev->gfx.rlc.clear_state_gpu_addr, |
@@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle) | |||
4627 | gfx_v7_0_constants_init(adev); | 4513 | gfx_v7_0_constants_init(adev); |
4628 | 4514 | ||
4629 | /* init rlc */ | 4515 | /* init rlc */ |
4630 | r = gfx_v7_0_rlc_resume(adev); | 4516 | r = adev->gfx.rlc.funcs->resume(adev); |
4631 | if (r) | 4517 | if (r) |
4632 | return r; | 4518 | return r; |
4633 | 4519 | ||
@@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle) | |||
4645 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); | 4531 | amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); |
4646 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); | 4532 | amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); |
4647 | gfx_v7_0_cp_enable(adev, false); | 4533 | gfx_v7_0_cp_enable(adev, false); |
4648 | gfx_v7_0_rlc_stop(adev); | 4534 | adev->gfx.rlc.funcs->stop(adev); |
4649 | gfx_v7_0_fini_pg(adev); | 4535 | gfx_v7_0_fini_pg(adev); |
4650 | 4536 | ||
4651 | return 0; | 4537 | return 0; |
@@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle) | |||
4730 | gfx_v7_0_update_cg(adev, false); | 4616 | gfx_v7_0_update_cg(adev, false); |
4731 | 4617 | ||
4732 | /* stop the rlc */ | 4618 | /* stop the rlc */ |
4733 | gfx_v7_0_rlc_stop(adev); | 4619 | adev->gfx.rlc.funcs->stop(adev); |
4734 | 4620 | ||
4735 | /* Disable GFX parsing/prefetching */ | 4621 | /* Disable GFX parsing/prefetching */ |
4736 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); | 4622 | WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); |
@@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, | |||
4959 | return 0; | 4845 | return 0; |
4960 | } | 4846 | } |
4961 | 4847 | ||
4848 | static void gfx_v7_0_fault(struct amdgpu_device *adev, | ||
4849 | struct amdgpu_iv_entry *entry) | ||
4850 | { | ||
4851 | struct amdgpu_ring *ring; | ||
4852 | u8 me_id, pipe_id; | ||
4853 | int i; | ||
4854 | |||
4855 | me_id = (entry->ring_id & 0x0c) >> 2; | ||
4856 | pipe_id = (entry->ring_id & 0x03) >> 0; | ||
4857 | switch (me_id) { | ||
4858 | case 0: | ||
4859 | drm_sched_fault(&adev->gfx.gfx_ring[0].sched); | ||
4860 | break; | ||
4861 | case 1: | ||
4862 | case 2: | ||
4863 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
4864 | ring = &adev->gfx.compute_ring[i]; | ||
4865 | if ((ring->me == me_id) && (ring->pipe == pipe_id)) | ||
4866 | drm_sched_fault(&ring->sched); | ||
4867 | } | ||
4868 | break; | ||
4869 | } | ||
4870 | } | ||
4871 | |||
4962 | static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, | 4872 | static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, |
4963 | struct amdgpu_irq_src *source, | 4873 | struct amdgpu_irq_src *source, |
4964 | struct amdgpu_iv_entry *entry) | 4874 | struct amdgpu_iv_entry *entry) |
4965 | { | 4875 | { |
4966 | DRM_ERROR("Illegal register access in command stream\n"); | 4876 | DRM_ERROR("Illegal register access in command stream\n"); |
4967 | schedule_work(&adev->reset_work); | 4877 | gfx_v7_0_fault(adev, entry); |
4968 | return 0; | 4878 | return 0; |
4969 | } | 4879 | } |
4970 | 4880 | ||
@@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, | |||
4974 | { | 4884 | { |
4975 | DRM_ERROR("Illegal instruction in command stream\n"); | 4885 | DRM_ERROR("Illegal instruction in command stream\n"); |
4976 | // XXX soft reset the gfx block only | 4886 | // XXX soft reset the gfx block only |
4977 | schedule_work(&adev->reset_work); | 4887 | gfx_v7_0_fault(adev, entry); |
4978 | return 0; | 4888 | return 0; |
4979 | } | 4889 | } |
4980 | 4890 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 617b0c8908a3..cb066a8dccd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -54,7 +54,7 @@ | |||
54 | #include "ivsrcid/ivsrcid_vislands30.h" | 54 | #include "ivsrcid/ivsrcid_vislands30.h" |
55 | 55 | ||
56 | #define GFX8_NUM_GFX_RINGS 1 | 56 | #define GFX8_NUM_GFX_RINGS 1 |
57 | #define GFX8_MEC_HPD_SIZE 2048 | 57 | #define GFX8_MEC_HPD_SIZE 4096 |
58 | 58 | ||
59 | #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 | 59 | #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 |
60 | #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 | 60 | #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 |
@@ -839,18 +839,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) | |||
839 | int r; | 839 | int r; |
840 | 840 | ||
841 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 841 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
842 | if (r) { | 842 | if (r) |
843 | DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); | ||
844 | return r; | 843 | return r; |
845 | } | 844 | |
846 | WREG32(scratch, 0xCAFEDEAD); | 845 | WREG32(scratch, 0xCAFEDEAD); |
847 | r = amdgpu_ring_alloc(ring, 3); | 846 | r = amdgpu_ring_alloc(ring, 3); |
848 | if (r) { | 847 | if (r) |
849 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 848 | goto error_free_scratch; |
850 | ring->idx, r); | 849 | |
851 | amdgpu_gfx_scratch_free(adev, scratch); | ||
852 | return r; | ||
853 | } | ||
854 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | 850 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
855 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | 851 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
856 | amdgpu_ring_write(ring, 0xDEADBEEF); | 852 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -862,14 +858,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) | |||
862 | break; | 858 | break; |
863 | DRM_UDELAY(1); | 859 | DRM_UDELAY(1); |
864 | } | 860 | } |
865 | if (i < adev->usec_timeout) { | 861 | |
866 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 862 | if (i >= adev->usec_timeout) |
867 | ring->idx, i); | 863 | r = -ETIMEDOUT; |
868 | } else { | 864 | |
869 | DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", | 865 | error_free_scratch: |
870 | ring->idx, scratch, tmp); | ||
871 | r = -EINVAL; | ||
872 | } | ||
873 | amdgpu_gfx_scratch_free(adev, scratch); | 866 | amdgpu_gfx_scratch_free(adev, scratch); |
874 | return r; | 867 | return r; |
875 | } | 868 | } |
@@ -886,19 +879,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
886 | long r; | 879 | long r; |
887 | 880 | ||
888 | r = amdgpu_device_wb_get(adev, &index); | 881 | r = amdgpu_device_wb_get(adev, &index); |
889 | if (r) { | 882 | if (r) |
890 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
891 | return r; | 883 | return r; |
892 | } | ||
893 | 884 | ||
894 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 885 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
895 | adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); | 886 | adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); |
896 | memset(&ib, 0, sizeof(ib)); | 887 | memset(&ib, 0, sizeof(ib)); |
897 | r = amdgpu_ib_get(adev, NULL, 16, &ib); | 888 | r = amdgpu_ib_get(adev, NULL, 16, &ib); |
898 | if (r) { | 889 | if (r) |
899 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
900 | goto err1; | 890 | goto err1; |
901 | } | 891 | |
902 | ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); | 892 | ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); |
903 | ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; | 893 | ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; |
904 | ib.ptr[2] = lower_32_bits(gpu_addr); | 894 | ib.ptr[2] = lower_32_bits(gpu_addr); |
@@ -912,22 +902,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
912 | 902 | ||
913 | r = dma_fence_wait_timeout(f, false, timeout); | 903 | r = dma_fence_wait_timeout(f, false, timeout); |
914 | if (r == 0) { | 904 | if (r == 0) { |
915 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
916 | r = -ETIMEDOUT; | 905 | r = -ETIMEDOUT; |
917 | goto err2; | 906 | goto err2; |
918 | } else if (r < 0) { | 907 | } else if (r < 0) { |
919 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
920 | goto err2; | 908 | goto err2; |
921 | } | 909 | } |
922 | 910 | ||
923 | tmp = adev->wb.wb[index]; | 911 | tmp = adev->wb.wb[index]; |
924 | if (tmp == 0xDEADBEEF) { | 912 | if (tmp == 0xDEADBEEF) |
925 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
926 | r = 0; | 913 | r = 0; |
927 | } else { | 914 | else |
928 | DRM_ERROR("ib test on ring %d failed\n", ring->idx); | ||
929 | r = -EINVAL; | 915 | r = -EINVAL; |
930 | } | ||
931 | 916 | ||
932 | err2: | 917 | err2: |
933 | amdgpu_ib_free(adev, &ib, NULL); | 918 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -1298,81 +1283,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, | |||
1298 | buffer[count++] = cpu_to_le32(0); | 1283 | buffer[count++] = cpu_to_le32(0); |
1299 | } | 1284 | } |
1300 | 1285 | ||
1301 | static void cz_init_cp_jump_table(struct amdgpu_device *adev) | 1286 | static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) |
1302 | { | 1287 | { |
1303 | const __le32 *fw_data; | ||
1304 | volatile u32 *dst_ptr; | ||
1305 | int me, i, max_me = 4; | ||
1306 | u32 bo_offset = 0; | ||
1307 | u32 table_offset, table_size; | ||
1308 | |||
1309 | if (adev->asic_type == CHIP_CARRIZO) | 1288 | if (adev->asic_type == CHIP_CARRIZO) |
1310 | max_me = 5; | 1289 | return 5; |
1311 | 1290 | else | |
1312 | /* write the cp table buffer */ | 1291 | return 4; |
1313 | dst_ptr = adev->gfx.rlc.cp_table_ptr; | ||
1314 | for (me = 0; me < max_me; me++) { | ||
1315 | if (me == 0) { | ||
1316 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1317 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; | ||
1318 | fw_data = (const __le32 *) | ||
1319 | (adev->gfx.ce_fw->data + | ||
1320 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1321 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1322 | table_size = le32_to_cpu(hdr->jt_size); | ||
1323 | } else if (me == 1) { | ||
1324 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1325 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; | ||
1326 | fw_data = (const __le32 *) | ||
1327 | (adev->gfx.pfp_fw->data + | ||
1328 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1329 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1330 | table_size = le32_to_cpu(hdr->jt_size); | ||
1331 | } else if (me == 2) { | ||
1332 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1333 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | ||
1334 | fw_data = (const __le32 *) | ||
1335 | (adev->gfx.me_fw->data + | ||
1336 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1337 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1338 | table_size = le32_to_cpu(hdr->jt_size); | ||
1339 | } else if (me == 3) { | ||
1340 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1341 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; | ||
1342 | fw_data = (const __le32 *) | ||
1343 | (adev->gfx.mec_fw->data + | ||
1344 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1345 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1346 | table_size = le32_to_cpu(hdr->jt_size); | ||
1347 | } else if (me == 4) { | ||
1348 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1349 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; | ||
1350 | fw_data = (const __le32 *) | ||
1351 | (adev->gfx.mec2_fw->data + | ||
1352 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1353 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1354 | table_size = le32_to_cpu(hdr->jt_size); | ||
1355 | } | ||
1356 | |||
1357 | for (i = 0; i < table_size; i ++) { | ||
1358 | dst_ptr[bo_offset + i] = | ||
1359 | cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); | ||
1360 | } | ||
1361 | |||
1362 | bo_offset += table_size; | ||
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) | ||
1367 | { | ||
1368 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); | ||
1369 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); | ||
1370 | } | 1292 | } |
1371 | 1293 | ||
1372 | static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) | 1294 | static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) |
1373 | { | 1295 | { |
1374 | volatile u32 *dst_ptr; | ||
1375 | u32 dws; | ||
1376 | const struct cs_section_def *cs_data; | 1296 | const struct cs_section_def *cs_data; |
1377 | int r; | 1297 | int r; |
1378 | 1298 | ||
@@ -1381,44 +1301,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) | |||
1381 | cs_data = adev->gfx.rlc.cs_data; | 1301 | cs_data = adev->gfx.rlc.cs_data; |
1382 | 1302 | ||
1383 | if (cs_data) { | 1303 | if (cs_data) { |
1384 | /* clear state block */ | 1304 | /* init clear state block */ |
1385 | adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); | 1305 | r = amdgpu_gfx_rlc_init_csb(adev); |
1386 | 1306 | if (r) | |
1387 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | ||
1388 | AMDGPU_GEM_DOMAIN_VRAM, | ||
1389 | &adev->gfx.rlc.clear_state_obj, | ||
1390 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
1391 | (void **)&adev->gfx.rlc.cs_ptr); | ||
1392 | if (r) { | ||
1393 | dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); | ||
1394 | gfx_v8_0_rlc_fini(adev); | ||
1395 | return r; | 1307 | return r; |
1396 | } | ||
1397 | |||
1398 | /* set up the cs buffer */ | ||
1399 | dst_ptr = adev->gfx.rlc.cs_ptr; | ||
1400 | gfx_v8_0_get_csb_buffer(adev, dst_ptr); | ||
1401 | amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); | ||
1402 | amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); | ||
1403 | } | 1308 | } |
1404 | 1309 | ||
1405 | if ((adev->asic_type == CHIP_CARRIZO) || | 1310 | if ((adev->asic_type == CHIP_CARRIZO) || |
1406 | (adev->asic_type == CHIP_STONEY)) { | 1311 | (adev->asic_type == CHIP_STONEY)) { |
1407 | adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ | 1312 | adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ |
1408 | r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, | 1313 | r = amdgpu_gfx_rlc_init_cpt(adev); |
1409 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | 1314 | if (r) |
1410 | &adev->gfx.rlc.cp_table_obj, | ||
1411 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
1412 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
1413 | if (r) { | ||
1414 | dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); | ||
1415 | return r; | 1315 | return r; |
1416 | } | ||
1417 | |||
1418 | cz_init_cp_jump_table(adev); | ||
1419 | |||
1420 | amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); | ||
1421 | amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); | ||
1422 | } | 1316 | } |
1423 | 1317 | ||
1424 | return 0; | 1318 | return 0; |
@@ -1443,7 +1337,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) | |||
1443 | mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; | 1337 | mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; |
1444 | 1338 | ||
1445 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, | 1339 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, |
1446 | AMDGPU_GEM_DOMAIN_GTT, | 1340 | AMDGPU_GEM_DOMAIN_VRAM, |
1447 | &adev->gfx.mec.hpd_eop_obj, | 1341 | &adev->gfx.mec.hpd_eop_obj, |
1448 | &adev->gfx.mec.hpd_eop_gpu_addr, | 1342 | &adev->gfx.mec.hpd_eop_gpu_addr, |
1449 | (void **)&hpd); | 1343 | (void **)&hpd); |
@@ -1629,7 +1523,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) | |||
1629 | return 0; | 1523 | return 0; |
1630 | 1524 | ||
1631 | /* bail if the compute ring is not ready */ | 1525 | /* bail if the compute ring is not ready */ |
1632 | if (!ring->ready) | 1526 | if (!ring->sched.ready) |
1633 | return 0; | 1527 | return 0; |
1634 | 1528 | ||
1635 | tmp = RREG32(mmGB_EDC_MODE); | 1529 | tmp = RREG32(mmGB_EDC_MODE); |
@@ -2088,7 +1982,7 @@ static int gfx_v8_0_sw_init(void *handle) | |||
2088 | return r; | 1982 | return r; |
2089 | } | 1983 | } |
2090 | 1984 | ||
2091 | r = gfx_v8_0_rlc_init(adev); | 1985 | r = adev->gfx.rlc.funcs->init(adev); |
2092 | if (r) { | 1986 | if (r) { |
2093 | DRM_ERROR("Failed to init rlc BOs!\n"); | 1987 | DRM_ERROR("Failed to init rlc BOs!\n"); |
2094 | return r; | 1988 | return r; |
@@ -2181,7 +2075,7 @@ static int gfx_v8_0_sw_fini(void *handle) | |||
2181 | amdgpu_gfx_kiq_fini(adev); | 2075 | amdgpu_gfx_kiq_fini(adev); |
2182 | 2076 | ||
2183 | gfx_v8_0_mec_fini(adev); | 2077 | gfx_v8_0_mec_fini(adev); |
2184 | gfx_v8_0_rlc_fini(adev); | 2078 | amdgpu_gfx_rlc_fini(adev); |
2185 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, | 2079 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, |
2186 | &adev->gfx.rlc.clear_state_gpu_addr, | 2080 | &adev->gfx.rlc.clear_state_gpu_addr, |
2187 | (void **)&adev->gfx.rlc.cs_ptr); | 2081 | (void **)&adev->gfx.rlc.cs_ptr); |
@@ -4175,10 +4069,10 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) | |||
4175 | 4069 | ||
4176 | static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) | 4070 | static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) |
4177 | { | 4071 | { |
4178 | gfx_v8_0_rlc_stop(adev); | 4072 | adev->gfx.rlc.funcs->stop(adev); |
4179 | gfx_v8_0_rlc_reset(adev); | 4073 | adev->gfx.rlc.funcs->reset(adev); |
4180 | gfx_v8_0_init_pg(adev); | 4074 | gfx_v8_0_init_pg(adev); |
4181 | gfx_v8_0_rlc_start(adev); | 4075 | adev->gfx.rlc.funcs->start(adev); |
4182 | 4076 | ||
4183 | return 0; | 4077 | return 0; |
4184 | } | 4078 | } |
@@ -4197,7 +4091,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) | |||
4197 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); | 4091 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); |
4198 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); | 4092 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); |
4199 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) | 4093 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
4200 | adev->gfx.gfx_ring[i].ready = false; | 4094 | adev->gfx.gfx_ring[i].sched.ready = false; |
4201 | } | 4095 | } |
4202 | WREG32(mmCP_ME_CNTL, tmp); | 4096 | WREG32(mmCP_ME_CNTL, tmp); |
4203 | udelay(50); | 4097 | udelay(50); |
@@ -4379,10 +4273,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
4379 | /* start the ring */ | 4273 | /* start the ring */ |
4380 | amdgpu_ring_clear_ring(ring); | 4274 | amdgpu_ring_clear_ring(ring); |
4381 | gfx_v8_0_cp_gfx_start(adev); | 4275 | gfx_v8_0_cp_gfx_start(adev); |
4382 | ring->ready = true; | 4276 | ring->sched.ready = true; |
4383 | r = amdgpu_ring_test_ring(ring); | 4277 | r = amdgpu_ring_test_helper(ring); |
4384 | if (r) | ||
4385 | ring->ready = false; | ||
4386 | 4278 | ||
4387 | return r; | 4279 | return r; |
4388 | } | 4280 | } |
@@ -4396,8 +4288,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) | |||
4396 | } else { | 4288 | } else { |
4397 | WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); | 4289 | WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
4398 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 4290 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
4399 | adev->gfx.compute_ring[i].ready = false; | 4291 | adev->gfx.compute_ring[i].sched.ready = false; |
4400 | adev->gfx.kiq.ring.ready = false; | 4292 | adev->gfx.kiq.ring.sched.ready = false; |
4401 | } | 4293 | } |
4402 | udelay(50); | 4294 | udelay(50); |
4403 | } | 4295 | } |
@@ -4473,11 +4365,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) | |||
4473 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | 4365 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); |
4474 | } | 4366 | } |
4475 | 4367 | ||
4476 | r = amdgpu_ring_test_ring(kiq_ring); | 4368 | r = amdgpu_ring_test_helper(kiq_ring); |
4477 | if (r) { | 4369 | if (r) |
4478 | DRM_ERROR("KCQ enable failed\n"); | 4370 | DRM_ERROR("KCQ enable failed\n"); |
4479 | kiq_ring->ready = false; | ||
4480 | } | ||
4481 | return r; | 4371 | return r; |
4482 | } | 4372 | } |
4483 | 4373 | ||
@@ -4781,7 +4671,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) | |||
4781 | amdgpu_bo_kunmap(ring->mqd_obj); | 4671 | amdgpu_bo_kunmap(ring->mqd_obj); |
4782 | ring->mqd_ptr = NULL; | 4672 | ring->mqd_ptr = NULL; |
4783 | amdgpu_bo_unreserve(ring->mqd_obj); | 4673 | amdgpu_bo_unreserve(ring->mqd_obj); |
4784 | ring->ready = true; | 4674 | ring->sched.ready = true; |
4785 | return 0; | 4675 | return 0; |
4786 | } | 4676 | } |
4787 | 4677 | ||
@@ -4820,10 +4710,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) | |||
4820 | */ | 4710 | */ |
4821 | for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { | 4711 | for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { |
4822 | ring = &adev->gfx.compute_ring[i]; | 4712 | ring = &adev->gfx.compute_ring[i]; |
4823 | ring->ready = true; | 4713 | r = amdgpu_ring_test_helper(ring); |
4824 | r = amdgpu_ring_test_ring(ring); | ||
4825 | if (r) | ||
4826 | ring->ready = false; | ||
4827 | } | 4714 | } |
4828 | 4715 | ||
4829 | done: | 4716 | done: |
@@ -4867,7 +4754,7 @@ static int gfx_v8_0_hw_init(void *handle) | |||
4867 | gfx_v8_0_init_golden_registers(adev); | 4754 | gfx_v8_0_init_golden_registers(adev); |
4868 | gfx_v8_0_constants_init(adev); | 4755 | gfx_v8_0_constants_init(adev); |
4869 | 4756 | ||
4870 | r = gfx_v8_0_rlc_resume(adev); | 4757 | r = adev->gfx.rlc.funcs->resume(adev); |
4871 | if (r) | 4758 | if (r) |
4872 | return r; | 4759 | return r; |
4873 | 4760 | ||
@@ -4899,7 +4786,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) | |||
4899 | amdgpu_ring_write(kiq_ring, 0); | 4786 | amdgpu_ring_write(kiq_ring, 0); |
4900 | amdgpu_ring_write(kiq_ring, 0); | 4787 | amdgpu_ring_write(kiq_ring, 0); |
4901 | } | 4788 | } |
4902 | r = amdgpu_ring_test_ring(kiq_ring); | 4789 | r = amdgpu_ring_test_helper(kiq_ring); |
4903 | if (r) | 4790 | if (r) |
4904 | DRM_ERROR("KCQ disable failed\n"); | 4791 | DRM_ERROR("KCQ disable failed\n"); |
4905 | 4792 | ||
@@ -4973,16 +4860,16 @@ static int gfx_v8_0_hw_fini(void *handle) | |||
4973 | pr_debug("For SRIOV client, shouldn't do anything.\n"); | 4860 | pr_debug("For SRIOV client, shouldn't do anything.\n"); |
4974 | return 0; | 4861 | return 0; |
4975 | } | 4862 | } |
4976 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 4863 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
4977 | if (!gfx_v8_0_wait_for_idle(adev)) | 4864 | if (!gfx_v8_0_wait_for_idle(adev)) |
4978 | gfx_v8_0_cp_enable(adev, false); | 4865 | gfx_v8_0_cp_enable(adev, false); |
4979 | else | 4866 | else |
4980 | pr_err("cp is busy, skip halt cp\n"); | 4867 | pr_err("cp is busy, skip halt cp\n"); |
4981 | if (!gfx_v8_0_wait_for_rlc_idle(adev)) | 4868 | if (!gfx_v8_0_wait_for_rlc_idle(adev)) |
4982 | gfx_v8_0_rlc_stop(adev); | 4869 | adev->gfx.rlc.funcs->stop(adev); |
4983 | else | 4870 | else |
4984 | pr_err("rlc is busy, skip halt rlc\n"); | 4871 | pr_err("rlc is busy, skip halt rlc\n"); |
4985 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 4872 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
4986 | return 0; | 4873 | return 0; |
4987 | } | 4874 | } |
4988 | 4875 | ||
@@ -5071,7 +4958,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle) | |||
5071 | srbm_soft_reset = adev->gfx.srbm_soft_reset; | 4958 | srbm_soft_reset = adev->gfx.srbm_soft_reset; |
5072 | 4959 | ||
5073 | /* stop the rlc */ | 4960 | /* stop the rlc */ |
5074 | gfx_v8_0_rlc_stop(adev); | 4961 | adev->gfx.rlc.funcs->stop(adev); |
5075 | 4962 | ||
5076 | if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || | 4963 | if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || |
5077 | REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) | 4964 | REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) |
@@ -5197,7 +5084,7 @@ static int gfx_v8_0_post_soft_reset(void *handle) | |||
5197 | REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) | 5084 | REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) |
5198 | gfx_v8_0_cp_gfx_resume(adev); | 5085 | gfx_v8_0_cp_gfx_resume(adev); |
5199 | 5086 | ||
5200 | gfx_v8_0_rlc_start(adev); | 5087 | adev->gfx.rlc.funcs->start(adev); |
5201 | 5088 | ||
5202 | return 0; | 5089 | return 0; |
5203 | } | 5090 | } |
@@ -5445,7 +5332,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, | |||
5445 | AMD_PG_SUPPORT_RLC_SMU_HS | | 5332 | AMD_PG_SUPPORT_RLC_SMU_HS | |
5446 | AMD_PG_SUPPORT_CP | | 5333 | AMD_PG_SUPPORT_CP | |
5447 | AMD_PG_SUPPORT_GFX_DMG)) | 5334 | AMD_PG_SUPPORT_GFX_DMG)) |
5448 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 5335 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
5449 | switch (adev->asic_type) { | 5336 | switch (adev->asic_type) { |
5450 | case CHIP_CARRIZO: | 5337 | case CHIP_CARRIZO: |
5451 | case CHIP_STONEY: | 5338 | case CHIP_STONEY: |
@@ -5499,7 +5386,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, | |||
5499 | AMD_PG_SUPPORT_RLC_SMU_HS | | 5386 | AMD_PG_SUPPORT_RLC_SMU_HS | |
5500 | AMD_PG_SUPPORT_CP | | 5387 | AMD_PG_SUPPORT_CP | |
5501 | AMD_PG_SUPPORT_GFX_DMG)) | 5388 | AMD_PG_SUPPORT_GFX_DMG)) |
5502 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 5389 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
5503 | return 0; | 5390 | return 0; |
5504 | } | 5391 | } |
5505 | 5392 | ||
@@ -5593,57 +5480,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, | |||
5593 | #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 | 5480 | #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 |
5594 | #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e | 5481 | #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e |
5595 | 5482 | ||
5596 | static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) | 5483 | static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) |
5597 | { | 5484 | { |
5598 | u32 data; | 5485 | uint32_t rlc_setting; |
5599 | unsigned i; | ||
5600 | 5486 | ||
5601 | data = RREG32(mmRLC_CNTL); | 5487 | rlc_setting = RREG32(mmRLC_CNTL); |
5602 | if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) | 5488 | if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) |
5603 | return; | 5489 | return false; |
5604 | 5490 | ||
5605 | if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { | 5491 | return true; |
5606 | data |= RLC_SAFE_MODE__CMD_MASK; | 5492 | } |
5607 | data &= ~RLC_SAFE_MODE__MESSAGE_MASK; | ||
5608 | data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); | ||
5609 | WREG32(mmRLC_SAFE_MODE, data); | ||
5610 | 5493 | ||
5611 | for (i = 0; i < adev->usec_timeout; i++) { | 5494 | static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) |
5612 | if ((RREG32(mmRLC_GPM_STAT) & | 5495 | { |
5613 | (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | | 5496 | uint32_t data; |
5614 | RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == | 5497 | unsigned i; |
5615 | (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | | 5498 | data = RREG32(mmRLC_CNTL); |
5616 | RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) | 5499 | data |= RLC_SAFE_MODE__CMD_MASK; |
5617 | break; | 5500 | data &= ~RLC_SAFE_MODE__MESSAGE_MASK; |
5618 | udelay(1); | 5501 | data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); |
5619 | } | 5502 | WREG32(mmRLC_SAFE_MODE, data); |
5620 | 5503 | ||
5621 | for (i = 0; i < adev->usec_timeout; i++) { | 5504 | /* wait for RLC_SAFE_MODE */ |
5622 | if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) | 5505 | for (i = 0; i < adev->usec_timeout; i++) { |
5623 | break; | 5506 | if ((RREG32(mmRLC_GPM_STAT) & |
5624 | udelay(1); | 5507 | (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | |
5625 | } | 5508 | RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == |
5626 | adev->gfx.rlc.in_safe_mode = true; | 5509 | (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | |
5510 | RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) | ||
5511 | break; | ||
5512 | udelay(1); | ||
5513 | } | ||
5514 | for (i = 0; i < adev->usec_timeout; i++) { | ||
5515 | if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) | ||
5516 | break; | ||
5517 | udelay(1); | ||
5627 | } | 5518 | } |
5628 | } | 5519 | } |
5629 | 5520 | ||
5630 | static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) | 5521 | static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) |
5631 | { | 5522 | { |
5632 | u32 data = 0; | 5523 | uint32_t data; |
5633 | unsigned i; | 5524 | unsigned i; |
5634 | 5525 | ||
5635 | data = RREG32(mmRLC_CNTL); | 5526 | data = RREG32(mmRLC_CNTL); |
5636 | if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) | 5527 | data |= RLC_SAFE_MODE__CMD_MASK; |
5637 | return; | 5528 | data &= ~RLC_SAFE_MODE__MESSAGE_MASK; |
5638 | 5529 | WREG32(mmRLC_SAFE_MODE, data); | |
5639 | if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { | ||
5640 | if (adev->gfx.rlc.in_safe_mode) { | ||
5641 | data |= RLC_SAFE_MODE__CMD_MASK; | ||
5642 | data &= ~RLC_SAFE_MODE__MESSAGE_MASK; | ||
5643 | WREG32(mmRLC_SAFE_MODE, data); | ||
5644 | adev->gfx.rlc.in_safe_mode = false; | ||
5645 | } | ||
5646 | } | ||
5647 | 5530 | ||
5648 | for (i = 0; i < adev->usec_timeout; i++) { | 5531 | for (i = 0; i < adev->usec_timeout; i++) { |
5649 | if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) | 5532 | if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) |
@@ -5653,8 +5536,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) | |||
5653 | } | 5536 | } |
5654 | 5537 | ||
5655 | static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { | 5538 | static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { |
5656 | .enter_safe_mode = iceland_enter_rlc_safe_mode, | 5539 | .is_rlc_enabled = gfx_v8_0_is_rlc_enabled, |
5657 | .exit_safe_mode = iceland_exit_rlc_safe_mode | 5540 | .set_safe_mode = gfx_v8_0_set_safe_mode, |
5541 | .unset_safe_mode = gfx_v8_0_unset_safe_mode, | ||
5542 | .init = gfx_v8_0_rlc_init, | ||
5543 | .get_csb_size = gfx_v8_0_get_csb_size, | ||
5544 | .get_csb_buffer = gfx_v8_0_get_csb_buffer, | ||
5545 | .get_cp_table_num = gfx_v8_0_cp_jump_table_num, | ||
5546 | .resume = gfx_v8_0_rlc_resume, | ||
5547 | .stop = gfx_v8_0_rlc_stop, | ||
5548 | .reset = gfx_v8_0_rlc_reset, | ||
5549 | .start = gfx_v8_0_rlc_start | ||
5658 | }; | 5550 | }; |
5659 | 5551 | ||
5660 | static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, | 5552 | static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, |
@@ -5662,7 +5554,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev | |||
5662 | { | 5554 | { |
5663 | uint32_t temp, data; | 5555 | uint32_t temp, data; |
5664 | 5556 | ||
5665 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 5557 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
5666 | 5558 | ||
5667 | /* It is disabled by HW by default */ | 5559 | /* It is disabled by HW by default */ |
5668 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { | 5560 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { |
@@ -5758,7 +5650,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev | |||
5758 | gfx_v8_0_wait_for_rlc_serdes(adev); | 5650 | gfx_v8_0_wait_for_rlc_serdes(adev); |
5759 | } | 5651 | } |
5760 | 5652 | ||
5761 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 5653 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
5762 | } | 5654 | } |
5763 | 5655 | ||
5764 | static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, | 5656 | static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, |
@@ -5768,7 +5660,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev | |||
5768 | 5660 | ||
5769 | temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); | 5661 | temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); |
5770 | 5662 | ||
5771 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 5663 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
5772 | 5664 | ||
5773 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { | 5665 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { |
5774 | temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); | 5666 | temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); |
@@ -5851,7 +5743,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev | |||
5851 | 5743 | ||
5852 | gfx_v8_0_wait_for_rlc_serdes(adev); | 5744 | gfx_v8_0_wait_for_rlc_serdes(adev); |
5853 | 5745 | ||
5854 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 5746 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
5855 | } | 5747 | } |
5856 | static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, | 5748 | static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, |
5857 | bool enable) | 5749 | bool enable) |
@@ -6131,9 +6023,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) | |||
6131 | } | 6023 | } |
6132 | 6024 | ||
6133 | static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | 6025 | static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
6134 | struct amdgpu_ib *ib, | 6026 | struct amdgpu_job *job, |
6135 | unsigned vmid, bool ctx_switch) | 6027 | struct amdgpu_ib *ib, |
6028 | bool ctx_switch) | ||
6136 | { | 6029 | { |
6030 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
6137 | u32 header, control = 0; | 6031 | u32 header, control = 0; |
6138 | 6032 | ||
6139 | if (ib->flags & AMDGPU_IB_FLAG_CE) | 6033 | if (ib->flags & AMDGPU_IB_FLAG_CE) |
@@ -6161,9 +6055,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
6161 | } | 6055 | } |
6162 | 6056 | ||
6163 | static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, | 6057 | static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
6058 | struct amdgpu_job *job, | ||
6164 | struct amdgpu_ib *ib, | 6059 | struct amdgpu_ib *ib, |
6165 | unsigned vmid, bool ctx_switch) | 6060 | bool ctx_switch) |
6166 | { | 6061 | { |
6062 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
6167 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); | 6063 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); |
6168 | 6064 | ||
6169 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); | 6065 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
@@ -6738,12 +6634,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, | |||
6738 | return 0; | 6634 | return 0; |
6739 | } | 6635 | } |
6740 | 6636 | ||
6637 | static void gfx_v8_0_fault(struct amdgpu_device *adev, | ||
6638 | struct amdgpu_iv_entry *entry) | ||
6639 | { | ||
6640 | u8 me_id, pipe_id, queue_id; | ||
6641 | struct amdgpu_ring *ring; | ||
6642 | int i; | ||
6643 | |||
6644 | me_id = (entry->ring_id & 0x0c) >> 2; | ||
6645 | pipe_id = (entry->ring_id & 0x03) >> 0; | ||
6646 | queue_id = (entry->ring_id & 0x70) >> 4; | ||
6647 | |||
6648 | switch (me_id) { | ||
6649 | case 0: | ||
6650 | drm_sched_fault(&adev->gfx.gfx_ring[0].sched); | ||
6651 | break; | ||
6652 | case 1: | ||
6653 | case 2: | ||
6654 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
6655 | ring = &adev->gfx.compute_ring[i]; | ||
6656 | if (ring->me == me_id && ring->pipe == pipe_id && | ||
6657 | ring->queue == queue_id) | ||
6658 | drm_sched_fault(&ring->sched); | ||
6659 | } | ||
6660 | break; | ||
6661 | } | ||
6662 | } | ||
6663 | |||
6741 | static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, | 6664 | static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, |
6742 | struct amdgpu_irq_src *source, | 6665 | struct amdgpu_irq_src *source, |
6743 | struct amdgpu_iv_entry *entry) | 6666 | struct amdgpu_iv_entry *entry) |
6744 | { | 6667 | { |
6745 | DRM_ERROR("Illegal register access in command stream\n"); | 6668 | DRM_ERROR("Illegal register access in command stream\n"); |
6746 | schedule_work(&adev->reset_work); | 6669 | gfx_v8_0_fault(adev, entry); |
6747 | return 0; | 6670 | return 0; |
6748 | } | 6671 | } |
6749 | 6672 | ||
@@ -6752,7 +6675,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, | |||
6752 | struct amdgpu_iv_entry *entry) | 6675 | struct amdgpu_iv_entry *entry) |
6753 | { | 6676 | { |
6754 | DRM_ERROR("Illegal instruction in command stream\n"); | 6677 | DRM_ERROR("Illegal instruction in command stream\n"); |
6755 | schedule_work(&adev->reset_work); | 6678 | gfx_v8_0_fault(adev, entry); |
6756 | return 0; | 6679 | return 0; |
6757 | } | 6680 | } |
6758 | 6681 | ||
@@ -6976,10 +6899,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { | |||
6976 | 17 + /* gfx_v8_0_ring_emit_vm_flush */ | 6899 | 17 + /* gfx_v8_0_ring_emit_vm_flush */ |
6977 | 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ | 6900 | 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ |
6978 | .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ | 6901 | .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ |
6979 | .emit_ib = gfx_v8_0_ring_emit_ib_compute, | ||
6980 | .emit_fence = gfx_v8_0_ring_emit_fence_kiq, | 6902 | .emit_fence = gfx_v8_0_ring_emit_fence_kiq, |
6981 | .test_ring = gfx_v8_0_ring_test_ring, | 6903 | .test_ring = gfx_v8_0_ring_test_ring, |
6982 | .test_ib = gfx_v8_0_ring_test_ib, | ||
6983 | .insert_nop = amdgpu_ring_insert_nop, | 6904 | .insert_nop = amdgpu_ring_insert_nop, |
6984 | .pad_ib = amdgpu_ring_generic_pad_ib, | 6905 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6985 | .emit_rreg = gfx_v8_0_ring_emit_rreg, | 6906 | .emit_rreg = gfx_v8_0_ring_emit_rreg, |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6d7baf59d6e1..c27caa144c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | |||
@@ -41,7 +41,7 @@ | |||
41 | #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" | 41 | #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" |
42 | 42 | ||
43 | #define GFX9_NUM_GFX_RINGS 1 | 43 | #define GFX9_NUM_GFX_RINGS 1 |
44 | #define GFX9_MEC_HPD_SIZE 2048 | 44 | #define GFX9_MEC_HPD_SIZE 4096 |
45 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L | 45 | #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L |
46 | #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L | 46 | #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L |
47 | 47 | ||
@@ -396,18 +396,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) | |||
396 | int r; | 396 | int r; |
397 | 397 | ||
398 | r = amdgpu_gfx_scratch_get(adev, &scratch); | 398 | r = amdgpu_gfx_scratch_get(adev, &scratch); |
399 | if (r) { | 399 | if (r) |
400 | DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); | ||
401 | return r; | 400 | return r; |
402 | } | 401 | |
403 | WREG32(scratch, 0xCAFEDEAD); | 402 | WREG32(scratch, 0xCAFEDEAD); |
404 | r = amdgpu_ring_alloc(ring, 3); | 403 | r = amdgpu_ring_alloc(ring, 3); |
405 | if (r) { | 404 | if (r) |
406 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | 405 | goto error_free_scratch; |
407 | ring->idx, r); | 406 | |
408 | amdgpu_gfx_scratch_free(adev, scratch); | ||
409 | return r; | ||
410 | } | ||
411 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); | 407 | amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); |
412 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); | 408 | amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); |
413 | amdgpu_ring_write(ring, 0xDEADBEEF); | 409 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -419,14 +415,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) | |||
419 | break; | 415 | break; |
420 | DRM_UDELAY(1); | 416 | DRM_UDELAY(1); |
421 | } | 417 | } |
422 | if (i < adev->usec_timeout) { | 418 | |
423 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 419 | if (i >= adev->usec_timeout) |
424 | ring->idx, i); | 420 | r = -ETIMEDOUT; |
425 | } else { | 421 | |
426 | DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", | 422 | error_free_scratch: |
427 | ring->idx, scratch, tmp); | ||
428 | r = -EINVAL; | ||
429 | } | ||
430 | amdgpu_gfx_scratch_free(adev, scratch); | 423 | amdgpu_gfx_scratch_free(adev, scratch); |
431 | return r; | 424 | return r; |
432 | } | 425 | } |
@@ -443,19 +436,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
443 | long r; | 436 | long r; |
444 | 437 | ||
445 | r = amdgpu_device_wb_get(adev, &index); | 438 | r = amdgpu_device_wb_get(adev, &index); |
446 | if (r) { | 439 | if (r) |
447 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
448 | return r; | 440 | return r; |
449 | } | ||
450 | 441 | ||
451 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 442 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
452 | adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); | 443 | adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); |
453 | memset(&ib, 0, sizeof(ib)); | 444 | memset(&ib, 0, sizeof(ib)); |
454 | r = amdgpu_ib_get(adev, NULL, 16, &ib); | 445 | r = amdgpu_ib_get(adev, NULL, 16, &ib); |
455 | if (r) { | 446 | if (r) |
456 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
457 | goto err1; | 447 | goto err1; |
458 | } | 448 | |
459 | ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); | 449 | ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); |
460 | ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; | 450 | ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; |
461 | ib.ptr[2] = lower_32_bits(gpu_addr); | 451 | ib.ptr[2] = lower_32_bits(gpu_addr); |
@@ -469,22 +459,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
469 | 459 | ||
470 | r = dma_fence_wait_timeout(f, false, timeout); | 460 | r = dma_fence_wait_timeout(f, false, timeout); |
471 | if (r == 0) { | 461 | if (r == 0) { |
472 | DRM_ERROR("amdgpu: IB test timed out.\n"); | 462 | r = -ETIMEDOUT; |
473 | r = -ETIMEDOUT; | 463 | goto err2; |
474 | goto err2; | ||
475 | } else if (r < 0) { | 464 | } else if (r < 0) { |
476 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | 465 | goto err2; |
477 | goto err2; | ||
478 | } | 466 | } |
479 | 467 | ||
480 | tmp = adev->wb.wb[index]; | 468 | tmp = adev->wb.wb[index]; |
481 | if (tmp == 0xDEADBEEF) { | 469 | if (tmp == 0xDEADBEEF) |
482 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | 470 | r = 0; |
483 | r = 0; | 471 | else |
484 | } else { | 472 | r = -EINVAL; |
485 | DRM_ERROR("ib test on ring %d failed\n", ring->idx); | ||
486 | r = -EINVAL; | ||
487 | } | ||
488 | 473 | ||
489 | err2: | 474 | err2: |
490 | amdgpu_ib_free(adev, &ib, NULL); | 475 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -1065,85 +1050,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) | |||
1065 | WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); | 1050 | WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); |
1066 | } | 1051 | } |
1067 | 1052 | ||
1068 | static void rv_init_cp_jump_table(struct amdgpu_device *adev) | 1053 | static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) |
1069 | { | ||
1070 | const __le32 *fw_data; | ||
1071 | volatile u32 *dst_ptr; | ||
1072 | int me, i, max_me = 5; | ||
1073 | u32 bo_offset = 0; | ||
1074 | u32 table_offset, table_size; | ||
1075 | |||
1076 | /* write the cp table buffer */ | ||
1077 | dst_ptr = adev->gfx.rlc.cp_table_ptr; | ||
1078 | for (me = 0; me < max_me; me++) { | ||
1079 | if (me == 0) { | ||
1080 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1081 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; | ||
1082 | fw_data = (const __le32 *) | ||
1083 | (adev->gfx.ce_fw->data + | ||
1084 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1085 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1086 | table_size = le32_to_cpu(hdr->jt_size); | ||
1087 | } else if (me == 1) { | ||
1088 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1089 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; | ||
1090 | fw_data = (const __le32 *) | ||
1091 | (adev->gfx.pfp_fw->data + | ||
1092 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1093 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1094 | table_size = le32_to_cpu(hdr->jt_size); | ||
1095 | } else if (me == 2) { | ||
1096 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1097 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; | ||
1098 | fw_data = (const __le32 *) | ||
1099 | (adev->gfx.me_fw->data + | ||
1100 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1101 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1102 | table_size = le32_to_cpu(hdr->jt_size); | ||
1103 | } else if (me == 3) { | ||
1104 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1105 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; | ||
1106 | fw_data = (const __le32 *) | ||
1107 | (adev->gfx.mec_fw->data + | ||
1108 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1109 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1110 | table_size = le32_to_cpu(hdr->jt_size); | ||
1111 | } else if (me == 4) { | ||
1112 | const struct gfx_firmware_header_v1_0 *hdr = | ||
1113 | (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; | ||
1114 | fw_data = (const __le32 *) | ||
1115 | (adev->gfx.mec2_fw->data + | ||
1116 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | ||
1117 | table_offset = le32_to_cpu(hdr->jt_offset); | ||
1118 | table_size = le32_to_cpu(hdr->jt_size); | ||
1119 | } | ||
1120 | |||
1121 | for (i = 0; i < table_size; i ++) { | ||
1122 | dst_ptr[bo_offset + i] = | ||
1123 | cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); | ||
1124 | } | ||
1125 | |||
1126 | bo_offset += table_size; | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) | ||
1131 | { | 1054 | { |
1132 | /* clear state block */ | 1055 | return 5; |
1133 | amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, | ||
1134 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
1135 | (void **)&adev->gfx.rlc.cs_ptr); | ||
1136 | |||
1137 | /* jump table block */ | ||
1138 | amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, | ||
1139 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
1140 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
1141 | } | 1056 | } |
1142 | 1057 | ||
1143 | static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) | 1058 | static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) |
1144 | { | 1059 | { |
1145 | volatile u32 *dst_ptr; | ||
1146 | u32 dws; | ||
1147 | const struct cs_section_def *cs_data; | 1060 | const struct cs_section_def *cs_data; |
1148 | int r; | 1061 | int r; |
1149 | 1062 | ||
@@ -1152,45 +1065,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) | |||
1152 | cs_data = adev->gfx.rlc.cs_data; | 1065 | cs_data = adev->gfx.rlc.cs_data; |
1153 | 1066 | ||
1154 | if (cs_data) { | 1067 | if (cs_data) { |
1155 | /* clear state block */ | 1068 | /* init clear state block */ |
1156 | adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); | 1069 | r = amdgpu_gfx_rlc_init_csb(adev); |
1157 | r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, | 1070 | if (r) |
1158 | AMDGPU_GEM_DOMAIN_VRAM, | ||
1159 | &adev->gfx.rlc.clear_state_obj, | ||
1160 | &adev->gfx.rlc.clear_state_gpu_addr, | ||
1161 | (void **)&adev->gfx.rlc.cs_ptr); | ||
1162 | if (r) { | ||
1163 | dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", | ||
1164 | r); | ||
1165 | gfx_v9_0_rlc_fini(adev); | ||
1166 | return r; | 1071 | return r; |
1167 | } | ||
1168 | /* set up the cs buffer */ | ||
1169 | dst_ptr = adev->gfx.rlc.cs_ptr; | ||
1170 | gfx_v9_0_get_csb_buffer(adev, dst_ptr); | ||
1171 | amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); | ||
1172 | amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); | ||
1173 | amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); | ||
1174 | } | 1072 | } |
1175 | 1073 | ||
1176 | if (adev->asic_type == CHIP_RAVEN) { | 1074 | if (adev->asic_type == CHIP_RAVEN) { |
1177 | /* TODO: double check the cp_table_size for RV */ | 1075 | /* TODO: double check the cp_table_size for RV */ |
1178 | adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ | 1076 | adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ |
1179 | r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, | 1077 | r = amdgpu_gfx_rlc_init_cpt(adev); |
1180 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, | 1078 | if (r) |
1181 | &adev->gfx.rlc.cp_table_obj, | ||
1182 | &adev->gfx.rlc.cp_table_gpu_addr, | ||
1183 | (void **)&adev->gfx.rlc.cp_table_ptr); | ||
1184 | if (r) { | ||
1185 | dev_err(adev->dev, | ||
1186 | "(%d) failed to create cp table bo\n", r); | ||
1187 | gfx_v9_0_rlc_fini(adev); | ||
1188 | return r; | 1079 | return r; |
1189 | } | ||
1190 | |||
1191 | rv_init_cp_jump_table(adev); | ||
1192 | amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); | ||
1193 | amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); | ||
1194 | } | 1080 | } |
1195 | 1081 | ||
1196 | switch (adev->asic_type) { | 1082 | switch (adev->asic_type) { |
@@ -1264,7 +1150,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) | |||
1264 | mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; | 1150 | mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; |
1265 | 1151 | ||
1266 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, | 1152 | r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, |
1267 | AMDGPU_GEM_DOMAIN_GTT, | 1153 | AMDGPU_GEM_DOMAIN_VRAM, |
1268 | &adev->gfx.mec.hpd_eop_obj, | 1154 | &adev->gfx.mec.hpd_eop_obj, |
1269 | &adev->gfx.mec.hpd_eop_gpu_addr, | 1155 | &adev->gfx.mec.hpd_eop_gpu_addr, |
1270 | (void **)&hpd); | 1156 | (void **)&hpd); |
@@ -1635,8 +1521,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) | |||
1635 | /* Clear GDS reserved memory */ | 1521 | /* Clear GDS reserved memory */ |
1636 | r = amdgpu_ring_alloc(ring, 17); | 1522 | r = amdgpu_ring_alloc(ring, 17); |
1637 | if (r) { | 1523 | if (r) { |
1638 | DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", | 1524 | DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", |
1639 | ring->idx, r); | 1525 | ring->name, r); |
1640 | return r; | 1526 | return r; |
1641 | } | 1527 | } |
1642 | 1528 | ||
@@ -1748,7 +1634,7 @@ static int gfx_v9_0_sw_init(void *handle) | |||
1748 | return r; | 1634 | return r; |
1749 | } | 1635 | } |
1750 | 1636 | ||
1751 | r = gfx_v9_0_rlc_init(adev); | 1637 | r = adev->gfx.rlc.funcs->init(adev); |
1752 | if (r) { | 1638 | if (r) { |
1753 | DRM_ERROR("Failed to init rlc BOs!\n"); | 1639 | DRM_ERROR("Failed to init rlc BOs!\n"); |
1754 | return r; | 1640 | return r; |
@@ -2498,12 +2384,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) | |||
2498 | return 0; | 2384 | return 0; |
2499 | } | 2385 | } |
2500 | 2386 | ||
2501 | gfx_v9_0_rlc_stop(adev); | 2387 | adev->gfx.rlc.funcs->stop(adev); |
2502 | 2388 | ||
2503 | /* disable CG */ | 2389 | /* disable CG */ |
2504 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); | 2390 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); |
2505 | 2391 | ||
2506 | gfx_v9_0_rlc_reset(adev); | 2392 | adev->gfx.rlc.funcs->reset(adev); |
2507 | 2393 | ||
2508 | gfx_v9_0_init_pg(adev); | 2394 | gfx_v9_0_init_pg(adev); |
2509 | 2395 | ||
@@ -2514,15 +2400,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) | |||
2514 | return r; | 2400 | return r; |
2515 | } | 2401 | } |
2516 | 2402 | ||
2517 | if (adev->asic_type == CHIP_RAVEN || | 2403 | switch (adev->asic_type) { |
2518 | adev->asic_type == CHIP_VEGA20) { | 2404 | case CHIP_RAVEN: |
2519 | if (amdgpu_lbpw != 0) | 2405 | if (amdgpu_lbpw == 0) |
2406 | gfx_v9_0_enable_lbpw(adev, false); | ||
2407 | else | ||
2408 | gfx_v9_0_enable_lbpw(adev, true); | ||
2409 | break; | ||
2410 | case CHIP_VEGA20: | ||
2411 | if (amdgpu_lbpw > 0) | ||
2520 | gfx_v9_0_enable_lbpw(adev, true); | 2412 | gfx_v9_0_enable_lbpw(adev, true); |
2521 | else | 2413 | else |
2522 | gfx_v9_0_enable_lbpw(adev, false); | 2414 | gfx_v9_0_enable_lbpw(adev, false); |
2415 | break; | ||
2416 | default: | ||
2417 | break; | ||
2523 | } | 2418 | } |
2524 | 2419 | ||
2525 | gfx_v9_0_rlc_start(adev); | 2420 | adev->gfx.rlc.funcs->start(adev); |
2526 | 2421 | ||
2527 | return 0; | 2422 | return 0; |
2528 | } | 2423 | } |
@@ -2537,7 +2432,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) | |||
2537 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); | 2432 | tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); |
2538 | if (!enable) { | 2433 | if (!enable) { |
2539 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) | 2434 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) |
2540 | adev->gfx.gfx_ring[i].ready = false; | 2435 | adev->gfx.gfx_ring[i].sched.ready = false; |
2541 | } | 2436 | } |
2542 | WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); | 2437 | WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); |
2543 | udelay(50); | 2438 | udelay(50); |
@@ -2727,7 +2622,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) | |||
2727 | 2622 | ||
2728 | /* start the ring */ | 2623 | /* start the ring */ |
2729 | gfx_v9_0_cp_gfx_start(adev); | 2624 | gfx_v9_0_cp_gfx_start(adev); |
2730 | ring->ready = true; | 2625 | ring->sched.ready = true; |
2731 | 2626 | ||
2732 | return 0; | 2627 | return 0; |
2733 | } | 2628 | } |
@@ -2742,8 +2637,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) | |||
2742 | WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, | 2637 | WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, |
2743 | (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); | 2638 | (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); |
2744 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | 2639 | for (i = 0; i < adev->gfx.num_compute_rings; i++) |
2745 | adev->gfx.compute_ring[i].ready = false; | 2640 | adev->gfx.compute_ring[i].sched.ready = false; |
2746 | adev->gfx.kiq.ring.ready = false; | 2641 | adev->gfx.kiq.ring.sched.ready = false; |
2747 | } | 2642 | } |
2748 | udelay(50); | 2643 | udelay(50); |
2749 | } | 2644 | } |
@@ -2866,11 +2761,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) | |||
2866 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); | 2761 | amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); |
2867 | } | 2762 | } |
2868 | 2763 | ||
2869 | r = amdgpu_ring_test_ring(kiq_ring); | 2764 | r = amdgpu_ring_test_helper(kiq_ring); |
2870 | if (r) { | 2765 | if (r) |
2871 | DRM_ERROR("KCQ enable failed\n"); | 2766 | DRM_ERROR("KCQ enable failed\n"); |
2872 | kiq_ring->ready = false; | ||
2873 | } | ||
2874 | 2767 | ||
2875 | return r; | 2768 | return r; |
2876 | } | 2769 | } |
@@ -3249,7 +3142,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) | |||
3249 | amdgpu_bo_kunmap(ring->mqd_obj); | 3142 | amdgpu_bo_kunmap(ring->mqd_obj); |
3250 | ring->mqd_ptr = NULL; | 3143 | ring->mqd_ptr = NULL; |
3251 | amdgpu_bo_unreserve(ring->mqd_obj); | 3144 | amdgpu_bo_unreserve(ring->mqd_obj); |
3252 | ring->ready = true; | 3145 | ring->sched.ready = true; |
3253 | return 0; | 3146 | return 0; |
3254 | } | 3147 | } |
3255 | 3148 | ||
@@ -3314,19 +3207,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) | |||
3314 | return r; | 3207 | return r; |
3315 | 3208 | ||
3316 | ring = &adev->gfx.gfx_ring[0]; | 3209 | ring = &adev->gfx.gfx_ring[0]; |
3317 | r = amdgpu_ring_test_ring(ring); | 3210 | r = amdgpu_ring_test_helper(ring); |
3318 | if (r) { | 3211 | if (r) |
3319 | ring->ready = false; | ||
3320 | return r; | 3212 | return r; |
3321 | } | ||
3322 | 3213 | ||
3323 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | 3214 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { |
3324 | ring = &adev->gfx.compute_ring[i]; | 3215 | ring = &adev->gfx.compute_ring[i]; |
3325 | 3216 | amdgpu_ring_test_helper(ring); | |
3326 | ring->ready = true; | ||
3327 | r = amdgpu_ring_test_ring(ring); | ||
3328 | if (r) | ||
3329 | ring->ready = false; | ||
3330 | } | 3217 | } |
3331 | 3218 | ||
3332 | gfx_v9_0_enable_gui_idle_interrupt(adev, true); | 3219 | gfx_v9_0_enable_gui_idle_interrupt(adev, true); |
@@ -3353,7 +3240,7 @@ static int gfx_v9_0_hw_init(void *handle) | |||
3353 | if (r) | 3240 | if (r) |
3354 | return r; | 3241 | return r; |
3355 | 3242 | ||
3356 | r = gfx_v9_0_rlc_resume(adev); | 3243 | r = adev->gfx.rlc.funcs->resume(adev); |
3357 | if (r) | 3244 | if (r) |
3358 | return r; | 3245 | return r; |
3359 | 3246 | ||
@@ -3391,7 +3278,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) | |||
3391 | amdgpu_ring_write(kiq_ring, 0); | 3278 | amdgpu_ring_write(kiq_ring, 0); |
3392 | amdgpu_ring_write(kiq_ring, 0); | 3279 | amdgpu_ring_write(kiq_ring, 0); |
3393 | } | 3280 | } |
3394 | r = amdgpu_ring_test_ring(kiq_ring); | 3281 | r = amdgpu_ring_test_helper(kiq_ring); |
3395 | if (r) | 3282 | if (r) |
3396 | DRM_ERROR("KCQ disable failed\n"); | 3283 | DRM_ERROR("KCQ disable failed\n"); |
3397 | 3284 | ||
@@ -3433,7 +3320,7 @@ static int gfx_v9_0_hw_fini(void *handle) | |||
3433 | } | 3320 | } |
3434 | 3321 | ||
3435 | gfx_v9_0_cp_enable(adev, false); | 3322 | gfx_v9_0_cp_enable(adev, false); |
3436 | gfx_v9_0_rlc_stop(adev); | 3323 | adev->gfx.rlc.funcs->stop(adev); |
3437 | 3324 | ||
3438 | gfx_v9_0_csb_vram_unpin(adev); | 3325 | gfx_v9_0_csb_vram_unpin(adev); |
3439 | 3326 | ||
@@ -3508,7 +3395,7 @@ static int gfx_v9_0_soft_reset(void *handle) | |||
3508 | 3395 | ||
3509 | if (grbm_soft_reset) { | 3396 | if (grbm_soft_reset) { |
3510 | /* stop the rlc */ | 3397 | /* stop the rlc */ |
3511 | gfx_v9_0_rlc_stop(adev); | 3398 | adev->gfx.rlc.funcs->stop(adev); |
3512 | 3399 | ||
3513 | /* Disable GFX parsing/prefetching */ | 3400 | /* Disable GFX parsing/prefetching */ |
3514 | gfx_v9_0_cp_gfx_enable(adev, false); | 3401 | gfx_v9_0_cp_gfx_enable(adev, false); |
@@ -3607,64 +3494,47 @@ static int gfx_v9_0_late_init(void *handle) | |||
3607 | return 0; | 3494 | return 0; |
3608 | } | 3495 | } |
3609 | 3496 | ||
3610 | static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) | 3497 | static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) |
3611 | { | 3498 | { |
3612 | uint32_t rlc_setting, data; | 3499 | uint32_t rlc_setting; |
3613 | unsigned i; | ||
3614 | |||
3615 | if (adev->gfx.rlc.in_safe_mode) | ||
3616 | return; | ||
3617 | 3500 | ||
3618 | /* if RLC is not enabled, do nothing */ | 3501 | /* if RLC is not enabled, do nothing */ |
3619 | rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); | 3502 | rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); |
3620 | if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) | 3503 | if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) |
3621 | return; | 3504 | return false; |
3622 | |||
3623 | if (adev->cg_flags & | ||
3624 | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | | ||
3625 | AMD_CG_SUPPORT_GFX_3D_CGCG)) { | ||
3626 | data = RLC_SAFE_MODE__CMD_MASK; | ||
3627 | data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); | ||
3628 | WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); | ||
3629 | 3505 | ||
3630 | /* wait for RLC_SAFE_MODE */ | 3506 | return true; |
3631 | for (i = 0; i < adev->usec_timeout; i++) { | ||
3632 | if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) | ||
3633 | break; | ||
3634 | udelay(1); | ||
3635 | } | ||
3636 | adev->gfx.rlc.in_safe_mode = true; | ||
3637 | } | ||
3638 | } | 3507 | } |
3639 | 3508 | ||
3640 | static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) | 3509 | static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) |
3641 | { | 3510 | { |
3642 | uint32_t rlc_setting, data; | 3511 | uint32_t data; |
3643 | 3512 | unsigned i; | |
3644 | if (!adev->gfx.rlc.in_safe_mode) | ||
3645 | return; | ||
3646 | 3513 | ||
3647 | /* if RLC is not enabled, do nothing */ | 3514 | data = RLC_SAFE_MODE__CMD_MASK; |
3648 | rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); | 3515 | data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); |
3649 | if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) | 3516 | WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); |
3650 | return; | ||
3651 | 3517 | ||
3652 | if (adev->cg_flags & | 3518 | /* wait for RLC_SAFE_MODE */ |
3653 | (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { | 3519 | for (i = 0; i < adev->usec_timeout; i++) { |
3654 | /* | 3520 | if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) |
3655 | * Try to exit safe mode only if it is already in safe | 3521 | break; |
3656 | * mode. | 3522 | udelay(1); |
3657 | */ | ||
3658 | data = RLC_SAFE_MODE__CMD_MASK; | ||
3659 | WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); | ||
3660 | adev->gfx.rlc.in_safe_mode = false; | ||
3661 | } | 3523 | } |
3662 | } | 3524 | } |
3663 | 3525 | ||
3526 | static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) | ||
3527 | { | ||
3528 | uint32_t data; | ||
3529 | |||
3530 | data = RLC_SAFE_MODE__CMD_MASK; | ||
3531 | WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); | ||
3532 | } | ||
3533 | |||
3664 | static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, | 3534 | static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, |
3665 | bool enable) | 3535 | bool enable) |
3666 | { | 3536 | { |
3667 | gfx_v9_0_enter_rlc_safe_mode(adev); | 3537 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
3668 | 3538 | ||
3669 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { | 3539 | if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { |
3670 | gfx_v9_0_enable_gfx_cg_power_gating(adev, true); | 3540 | gfx_v9_0_enable_gfx_cg_power_gating(adev, true); |
@@ -3675,7 +3545,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, | |||
3675 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); | 3545 | gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); |
3676 | } | 3546 | } |
3677 | 3547 | ||
3678 | gfx_v9_0_exit_rlc_safe_mode(adev); | 3548 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
3679 | } | 3549 | } |
3680 | 3550 | ||
3681 | static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, | 3551 | static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, |
@@ -3773,7 +3643,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, | |||
3773 | { | 3643 | { |
3774 | uint32_t data, def; | 3644 | uint32_t data, def; |
3775 | 3645 | ||
3776 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 3646 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
3777 | 3647 | ||
3778 | /* Enable 3D CGCG/CGLS */ | 3648 | /* Enable 3D CGCG/CGLS */ |
3779 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { | 3649 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { |
@@ -3813,7 +3683,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, | |||
3813 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); | 3683 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); |
3814 | } | 3684 | } |
3815 | 3685 | ||
3816 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 3686 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
3817 | } | 3687 | } |
3818 | 3688 | ||
3819 | static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, | 3689 | static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, |
@@ -3821,7 +3691,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev | |||
3821 | { | 3691 | { |
3822 | uint32_t def, data; | 3692 | uint32_t def, data; |
3823 | 3693 | ||
3824 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 3694 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
3825 | 3695 | ||
3826 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { | 3696 | if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { |
3827 | def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); | 3697 | def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); |
@@ -3861,7 +3731,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev | |||
3861 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); | 3731 | WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); |
3862 | } | 3732 | } |
3863 | 3733 | ||
3864 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 3734 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
3865 | } | 3735 | } |
3866 | 3736 | ||
3867 | static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, | 3737 | static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, |
@@ -3890,8 +3760,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, | |||
3890 | } | 3760 | } |
3891 | 3761 | ||
3892 | static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { | 3762 | static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { |
3893 | .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, | 3763 | .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, |
3894 | .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode | 3764 | .set_safe_mode = gfx_v9_0_set_safe_mode, |
3765 | .unset_safe_mode = gfx_v9_0_unset_safe_mode, | ||
3766 | .init = gfx_v9_0_rlc_init, | ||
3767 | .get_csb_size = gfx_v9_0_get_csb_size, | ||
3768 | .get_csb_buffer = gfx_v9_0_get_csb_buffer, | ||
3769 | .get_cp_table_num = gfx_v9_0_cp_jump_table_num, | ||
3770 | .resume = gfx_v9_0_rlc_resume, | ||
3771 | .stop = gfx_v9_0_rlc_stop, | ||
3772 | .reset = gfx_v9_0_rlc_reset, | ||
3773 | .start = gfx_v9_0_rlc_start | ||
3895 | }; | 3774 | }; |
3896 | 3775 | ||
3897 | static int gfx_v9_0_set_powergating_state(void *handle, | 3776 | static int gfx_v9_0_set_powergating_state(void *handle, |
@@ -4072,9 +3951,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) | |||
4072 | } | 3951 | } |
4073 | 3952 | ||
4074 | static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | 3953 | static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, |
4075 | struct amdgpu_ib *ib, | 3954 | struct amdgpu_job *job, |
4076 | unsigned vmid, bool ctx_switch) | 3955 | struct amdgpu_ib *ib, |
3956 | bool ctx_switch) | ||
4077 | { | 3957 | { |
3958 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
4078 | u32 header, control = 0; | 3959 | u32 header, control = 0; |
4079 | 3960 | ||
4080 | if (ib->flags & AMDGPU_IB_FLAG_CE) | 3961 | if (ib->flags & AMDGPU_IB_FLAG_CE) |
@@ -4103,20 +3984,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, | |||
4103 | } | 3984 | } |
4104 | 3985 | ||
4105 | static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, | 3986 | static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, |
4106 | struct amdgpu_ib *ib, | 3987 | struct amdgpu_job *job, |
4107 | unsigned vmid, bool ctx_switch) | 3988 | struct amdgpu_ib *ib, |
3989 | bool ctx_switch) | ||
4108 | { | 3990 | { |
4109 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); | 3991 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); |
3992 | u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); | ||
4110 | 3993 | ||
4111 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); | 3994 | amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); |
4112 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ | 3995 | BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ |
4113 | amdgpu_ring_write(ring, | 3996 | amdgpu_ring_write(ring, |
4114 | #ifdef __BIG_ENDIAN | 3997 | #ifdef __BIG_ENDIAN |
4115 | (2 << 0) | | 3998 | (2 << 0) | |
4116 | #endif | 3999 | #endif |
4117 | lower_32_bits(ib->gpu_addr)); | 4000 | lower_32_bits(ib->gpu_addr)); |
4118 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | 4001 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); |
4119 | amdgpu_ring_write(ring, control); | 4002 | amdgpu_ring_write(ring, control); |
4120 | } | 4003 | } |
4121 | 4004 | ||
4122 | static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, | 4005 | static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, |
@@ -4695,12 +4578,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, | |||
4695 | return 0; | 4578 | return 0; |
4696 | } | 4579 | } |
4697 | 4580 | ||
4581 | static void gfx_v9_0_fault(struct amdgpu_device *adev, | ||
4582 | struct amdgpu_iv_entry *entry) | ||
4583 | { | ||
4584 | u8 me_id, pipe_id, queue_id; | ||
4585 | struct amdgpu_ring *ring; | ||
4586 | int i; | ||
4587 | |||
4588 | me_id = (entry->ring_id & 0x0c) >> 2; | ||
4589 | pipe_id = (entry->ring_id & 0x03) >> 0; | ||
4590 | queue_id = (entry->ring_id & 0x70) >> 4; | ||
4591 | |||
4592 | switch (me_id) { | ||
4593 | case 0: | ||
4594 | drm_sched_fault(&adev->gfx.gfx_ring[0].sched); | ||
4595 | break; | ||
4596 | case 1: | ||
4597 | case 2: | ||
4598 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | ||
4599 | ring = &adev->gfx.compute_ring[i]; | ||
4600 | if (ring->me == me_id && ring->pipe == pipe_id && | ||
4601 | ring->queue == queue_id) | ||
4602 | drm_sched_fault(&ring->sched); | ||
4603 | } | ||
4604 | break; | ||
4605 | } | ||
4606 | } | ||
4607 | |||
4698 | static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, | 4608 | static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, |
4699 | struct amdgpu_irq_src *source, | 4609 | struct amdgpu_irq_src *source, |
4700 | struct amdgpu_iv_entry *entry) | 4610 | struct amdgpu_iv_entry *entry) |
4701 | { | 4611 | { |
4702 | DRM_ERROR("Illegal register access in command stream\n"); | 4612 | DRM_ERROR("Illegal register access in command stream\n"); |
4703 | schedule_work(&adev->reset_work); | 4613 | gfx_v9_0_fault(adev, entry); |
4704 | return 0; | 4614 | return 0; |
4705 | } | 4615 | } |
4706 | 4616 | ||
@@ -4709,7 +4619,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, | |||
4709 | struct amdgpu_iv_entry *entry) | 4619 | struct amdgpu_iv_entry *entry) |
4710 | { | 4620 | { |
4711 | DRM_ERROR("Illegal instruction in command stream\n"); | 4621 | DRM_ERROR("Illegal instruction in command stream\n"); |
4712 | schedule_work(&adev->reset_work); | 4622 | gfx_v9_0_fault(adev, entry); |
4713 | return 0; | 4623 | return 0; |
4714 | } | 4624 | } |
4715 | 4625 | ||
@@ -4836,10 +4746,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { | |||
4836 | 2 + /* gfx_v9_0_ring_emit_vm_flush */ | 4746 | 2 + /* gfx_v9_0_ring_emit_vm_flush */ |
4837 | 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ | 4747 | 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ |
4838 | .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ | 4748 | .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ |
4839 | .emit_ib = gfx_v9_0_ring_emit_ib_compute, | ||
4840 | .emit_fence = gfx_v9_0_ring_emit_fence_kiq, | 4749 | .emit_fence = gfx_v9_0_ring_emit_fence_kiq, |
4841 | .test_ring = gfx_v9_0_ring_test_ring, | 4750 | .test_ring = gfx_v9_0_ring_test_ring, |
4842 | .test_ib = gfx_v9_0_ring_test_ib, | ||
4843 | .insert_nop = amdgpu_ring_insert_nop, | 4751 | .insert_nop = amdgpu_ring_insert_nop, |
4844 | .pad_ib = amdgpu_ring_generic_pad_ib, | 4752 | .pad_ib = amdgpu_ring_generic_pad_ib, |
4845 | .emit_rreg = gfx_v9_0_ring_emit_rreg, | 4753 | .emit_rreg = gfx_v9_0_ring_emit_rreg, |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ceb7847b504f..f5edddf3b29d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | |||
@@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) | |||
35 | return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; | 35 | return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; |
36 | } | 36 | } |
37 | 37 | ||
38 | static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) | 38 | void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, |
39 | uint64_t page_table_base) | ||
39 | { | 40 | { |
40 | uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); | 41 | /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ |
42 | int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 | ||
43 | - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; | ||
41 | 44 | ||
42 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, | 45 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, |
43 | lower_32_bits(value)); | 46 | offset * vmid, lower_32_bits(page_table_base)); |
44 | 47 | ||
45 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, | 48 | WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, |
46 | upper_32_bits(value)); | 49 | offset * vmid, upper_32_bits(page_table_base)); |
47 | } | 50 | } |
48 | 51 | ||
49 | static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) | 52 | static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) |
50 | { | 53 | { |
51 | gfxhub_v1_0_init_gart_pt_regs(adev); | 54 | uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); |
55 | |||
56 | gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); | ||
52 | 57 | ||
53 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, | 58 | WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, |
54 | (u32)(adev->gmc.gart_start >> 12)); | 59 | (u32)(adev->gmc.gart_start >> 12)); |
@@ -72,7 +77,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | |||
72 | 77 | ||
73 | /* Program the system aperture low logical page number. */ | 78 | /* Program the system aperture low logical page number. */ |
74 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, | 79 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, |
75 | min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); | 80 | min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); |
76 | 81 | ||
77 | if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) | 82 | if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) |
78 | /* | 83 | /* |
@@ -82,11 +87,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | |||
82 | * to get rid of the VM fault and hardware hang. | 87 | * to get rid of the VM fault and hardware hang. |
83 | */ | 88 | */ |
84 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | 89 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, |
85 | max((adev->gmc.vram_end >> 18) + 0x1, | 90 | max((adev->gmc.fb_end >> 18) + 0x1, |
86 | adev->gmc.agp_end >> 18)); | 91 | adev->gmc.agp_end >> 18)); |
87 | else | 92 | else |
88 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | 93 | WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, |
89 | max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); | 94 | max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); |
90 | 95 | ||
91 | /* Set default page address. */ | 96 | /* Set default page address. */ |
92 | value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start | 97 | value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 206e29cad753..92d3a70cd9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h | |||
@@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, | |||
30 | bool value); | 30 | bool value); |
31 | void gfxhub_v1_0_init(struct amdgpu_device *adev); | 31 | void gfxhub_v1_0_init(struct amdgpu_device *adev); |
32 | u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); | 32 | u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); |
33 | void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, | ||
34 | uint64_t page_table_base); | ||
33 | 35 | ||
34 | #endif | 36 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index e1c2b4e9c7b2..2821d1d846e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | |||
@@ -358,7 +358,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) | |||
358 | return 0; | 358 | return 0; |
359 | } | 359 | } |
360 | 360 | ||
361 | static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) | 361 | static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, |
362 | uint32_t vmid, uint32_t flush_type) | ||
362 | { | 363 | { |
363 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | 364 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); |
364 | } | 365 | } |
@@ -580,7 +581,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) | |||
580 | else | 581 | else |
581 | gmc_v6_0_set_fault_enable_default(adev, true); | 582 | gmc_v6_0_set_fault_enable_default(adev, true); |
582 | 583 | ||
583 | gmc_v6_0_flush_gpu_tlb(adev, 0); | 584 | gmc_v6_0_flush_gpu_tlb(adev, 0, 0); |
584 | dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", | 585 | dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", |
585 | (unsigned)(adev->gmc.gart_size >> 20), | 586 | (unsigned)(adev->gmc.gart_size >> 20), |
586 | (unsigned long long)table_addr); | 587 | (unsigned long long)table_addr); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 910c4ce19cb3..761dcfb2fec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | |||
@@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) | |||
430 | * | 430 | * |
431 | * Flush the TLB for the requested page table (CIK). | 431 | * Flush the TLB for the requested page table (CIK). |
432 | */ | 432 | */ |
433 | static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) | 433 | static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, |
434 | uint32_t vmid, uint32_t flush_type) | ||
434 | { | 435 | { |
435 | /* bits 0-15 are the VM contexts0-15 */ | 436 | /* bits 0-15 are the VM contexts0-15 */ |
436 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | 437 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); |
@@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) | |||
698 | WREG32(mmCHUB_CONTROL, tmp); | 699 | WREG32(mmCHUB_CONTROL, tmp); |
699 | } | 700 | } |
700 | 701 | ||
701 | gmc_v7_0_flush_gpu_tlb(adev, 0); | 702 | gmc_v7_0_flush_gpu_tlb(adev, 0, 0); |
702 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", | 703 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
703 | (unsigned)(adev->gmc.gart_size >> 20), | 704 | (unsigned)(adev->gmc.gart_size >> 20), |
704 | (unsigned long long)table_addr); | 705 | (unsigned long long)table_addr); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1d3265c97b70..531aaf377592 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | |||
@@ -611,7 +611,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) | |||
611 | * Flush the TLB for the requested page table (CIK). | 611 | * Flush the TLB for the requested page table (CIK). |
612 | */ | 612 | */ |
613 | static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, | 613 | static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, |
614 | uint32_t vmid) | 614 | uint32_t vmid, uint32_t flush_type) |
615 | { | 615 | { |
616 | /* bits 0-15 are the VM contexts0-15 */ | 616 | /* bits 0-15 are the VM contexts0-15 */ |
617 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); | 617 | WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); |
@@ -920,7 +920,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) | |||
920 | else | 920 | else |
921 | gmc_v8_0_set_fault_enable_default(adev, true); | 921 | gmc_v8_0_set_fault_enable_default(adev, true); |
922 | 922 | ||
923 | gmc_v8_0_flush_gpu_tlb(adev, 0); | 923 | gmc_v8_0_flush_gpu_tlb(adev, 0, 0); |
924 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", | 924 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
925 | (unsigned)(adev->gmc.gart_size >> 20), | 925 | (unsigned)(adev->gmc.gart_size >> 20), |
926 | (unsigned long long)table_addr); | 926 | (unsigned long long)table_addr); |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f35d7a554ad5..811231e4ec53 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | |||
@@ -293,14 +293,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) | |||
293 | adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; | 293 | adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; |
294 | } | 294 | } |
295 | 295 | ||
296 | static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) | 296 | static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, |
297 | uint32_t flush_type) | ||
297 | { | 298 | { |
298 | u32 req = 0; | 299 | u32 req = 0; |
299 | 300 | ||
300 | /* invalidate using legacy mode on vmid*/ | ||
301 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, | 301 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, |
302 | PER_VMID_INVALIDATE_REQ, 1 << vmid); | 302 | PER_VMID_INVALIDATE_REQ, 1 << vmid); |
303 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); | 303 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); |
304 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); | 304 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); |
305 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); | 305 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); |
306 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); | 306 | req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); |
@@ -312,48 +312,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) | |||
312 | return req; | 312 | return req; |
313 | } | 313 | } |
314 | 314 | ||
315 | static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, | ||
316 | uint32_t reg0, uint32_t reg1, | ||
317 | uint32_t ref, uint32_t mask) | ||
318 | { | ||
319 | signed long r, cnt = 0; | ||
320 | unsigned long flags; | ||
321 | uint32_t seq; | ||
322 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | ||
323 | struct amdgpu_ring *ring = &kiq->ring; | ||
324 | |||
325 | spin_lock_irqsave(&kiq->ring_lock, flags); | ||
326 | |||
327 | amdgpu_ring_alloc(ring, 32); | ||
328 | amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, | ||
329 | ref, mask); | ||
330 | amdgpu_fence_emit_polling(ring, &seq); | ||
331 | amdgpu_ring_commit(ring); | ||
332 | spin_unlock_irqrestore(&kiq->ring_lock, flags); | ||
333 | |||
334 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | ||
335 | |||
336 | /* don't wait anymore for IRQ context */ | ||
337 | if (r < 1 && in_interrupt()) | ||
338 | goto failed_kiq; | ||
339 | |||
340 | might_sleep(); | ||
341 | |||
342 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { | ||
343 | msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); | ||
344 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); | ||
345 | } | ||
346 | |||
347 | if (cnt > MAX_KIQ_REG_TRY) | ||
348 | goto failed_kiq; | ||
349 | |||
350 | return 0; | ||
351 | |||
352 | failed_kiq: | ||
353 | pr_err("failed to invalidate tlb with kiq\n"); | ||
354 | return r; | ||
355 | } | ||
356 | |||
357 | /* | 315 | /* |
358 | * GART | 316 | * GART |
359 | * VMID 0 is the physical GPU addresses as used by the kernel. | 317 | * VMID 0 is the physical GPU addresses as used by the kernel. |
@@ -362,64 +320,47 @@ failed_kiq: | |||
362 | */ | 320 | */ |
363 | 321 | ||
364 | /** | 322 | /** |
365 | * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback | 323 | * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type |
366 | * | 324 | * |
367 | * @adev: amdgpu_device pointer | 325 | * @adev: amdgpu_device pointer |
368 | * @vmid: vm instance to flush | 326 | * @vmid: vm instance to flush |
327 | * @flush_type: the flush type | ||
369 | * | 328 | * |
370 | * Flush the TLB for the requested page table. | 329 | * Flush the TLB for the requested page table using certain type. |
371 | */ | 330 | */ |
372 | static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, | 331 | static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, |
373 | uint32_t vmid) | 332 | uint32_t vmid, uint32_t flush_type) |
374 | { | 333 | { |
375 | /* Use register 17 for GART */ | ||
376 | const unsigned eng = 17; | 334 | const unsigned eng = 17; |
377 | unsigned i, j; | 335 | unsigned i, j; |
378 | int r; | ||
379 | 336 | ||
380 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { | 337 | for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { |
381 | struct amdgpu_vmhub *hub = &adev->vmhub[i]; | 338 | struct amdgpu_vmhub *hub = &adev->vmhub[i]; |
382 | u32 tmp = gmc_v9_0_get_invalidate_req(vmid); | 339 | u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); |
383 | |||
384 | if (adev->gfx.kiq.ring.ready && | ||
385 | (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && | ||
386 | !adev->in_gpu_reset) { | ||
387 | r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, | ||
388 | hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); | ||
389 | if (!r) | ||
390 | continue; | ||
391 | } | ||
392 | 340 | ||
393 | spin_lock(&adev->gmc.invalidate_lock); | 341 | if (i == AMDGPU_GFXHUB && !adev->in_gpu_reset && |
342 | adev->gfx.kiq.ring.sched.ready && | ||
343 | (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { | ||
344 | uint32_t req = hub->vm_inv_eng0_req + eng; | ||
345 | uint32_t ack = hub->vm_inv_eng0_ack + eng; | ||
394 | 346 | ||
395 | WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); | 347 | amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, |
396 | 348 | 1 << vmid); | |
397 | /* Busy wait for ACK.*/ | ||
398 | for (j = 0; j < 100; j++) { | ||
399 | tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); | ||
400 | tmp &= 1 << vmid; | ||
401 | if (tmp) | ||
402 | break; | ||
403 | cpu_relax(); | ||
404 | } | ||
405 | if (j < 100) { | ||
406 | spin_unlock(&adev->gmc.invalidate_lock); | ||
407 | continue; | 349 | continue; |
408 | } | 350 | } |
409 | 351 | ||
410 | /* Wait for ACK with a delay.*/ | 352 | spin_lock(&adev->gmc.invalidate_lock); |
353 | WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); | ||
411 | for (j = 0; j < adev->usec_timeout; j++) { | 354 | for (j = 0; j < adev->usec_timeout; j++) { |
412 | tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); | 355 | tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); |
413 | tmp &= 1 << vmid; | 356 | if (tmp & (1 << vmid)) |
414 | if (tmp) | ||
415 | break; | 357 | break; |
416 | udelay(1); | 358 | udelay(1); |
417 | } | 359 | } |
418 | if (j < adev->usec_timeout) { | ||
419 | spin_unlock(&adev->gmc.invalidate_lock); | ||
420 | continue; | ||
421 | } | ||
422 | spin_unlock(&adev->gmc.invalidate_lock); | 360 | spin_unlock(&adev->gmc.invalidate_lock); |
361 | if (j < adev->usec_timeout) | ||
362 | continue; | ||
363 | |||
423 | DRM_ERROR("Timeout waiting for VM flush ACK!\n"); | 364 | DRM_ERROR("Timeout waiting for VM flush ACK!\n"); |
424 | } | 365 | } |
425 | } | 366 | } |
@@ -429,7 +370,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, | |||
429 | { | 370 | { |
430 | struct amdgpu_device *adev = ring->adev; | 371 | struct amdgpu_device *adev = ring->adev; |
431 | struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; | 372 | struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; |
432 | uint32_t req = gmc_v9_0_get_invalidate_req(vmid); | 373 | uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); |
433 | unsigned eng = ring->vm_inv_eng; | 374 | unsigned eng = ring->vm_inv_eng; |
434 | 375 | ||
435 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), | 376 | amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), |
@@ -739,9 +680,8 @@ static int gmc_v9_0_late_init(void *handle) | |||
739 | unsigned vmhub = ring->funcs->vmhub; | 680 | unsigned vmhub = ring->funcs->vmhub; |
740 | 681 | ||
741 | ring->vm_inv_eng = vm_inv_eng[vmhub]++; | 682 | ring->vm_inv_eng = vm_inv_eng[vmhub]++; |
742 | dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", | 683 | dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", |
743 | ring->idx, ring->name, ring->vm_inv_eng, | 684 | ring->name, ring->vm_inv_eng, ring->funcs->vmhub); |
744 | ring->funcs->vmhub); | ||
745 | } | 685 | } |
746 | 686 | ||
747 | /* Engine 16 is used for KFD and 17 for GART flushes */ | 687 | /* Engine 16 is used for KFD and 17 for GART flushes */ |
@@ -1122,7 +1062,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) | |||
1122 | 1062 | ||
1123 | gfxhub_v1_0_set_fault_enable_default(adev, value); | 1063 | gfxhub_v1_0_set_fault_enable_default(adev, value); |
1124 | mmhub_v1_0_set_fault_enable_default(adev, value); | 1064 | mmhub_v1_0_set_fault_enable_default(adev, value); |
1125 | gmc_v9_0_flush_gpu_tlb(adev, 0); | 1065 | gmc_v9_0_flush_gpu_tlb(adev, 0, 0); |
1126 | 1066 | ||
1127 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", | 1067 | DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", |
1128 | (unsigned)(adev->gmc.gart_size >> 20), | 1068 | (unsigned)(adev->gmc.gart_size >> 20), |
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index d0e478f43443..0c9a2c03504e 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c | |||
@@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable) | |||
508 | pi->caps_db_ramping || | 508 | pi->caps_db_ramping || |
509 | pi->caps_td_ramping || | 509 | pi->caps_td_ramping || |
510 | pi->caps_tcp_ramping) { | 510 | pi->caps_tcp_ramping) { |
511 | adev->gfx.rlc.funcs->enter_safe_mode(adev); | 511 | amdgpu_gfx_rlc_enter_safe_mode(adev); |
512 | 512 | ||
513 | if (enable) { | 513 | if (enable) { |
514 | ret = kv_program_pt_config_registers(adev, didt_config_kv); | 514 | ret = kv_program_pt_config_registers(adev, didt_config_kv); |
515 | if (ret) { | 515 | if (ret) { |
516 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 516 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
517 | return ret; | 517 | return ret; |
518 | } | 518 | } |
519 | } | 519 | } |
520 | 520 | ||
521 | kv_do_enable_didt(adev, enable); | 521 | kv_do_enable_didt(adev, enable); |
522 | 522 | ||
523 | adev->gfx.rlc.funcs->exit_safe_mode(adev); | 523 | amdgpu_gfx_rlc_exit_safe_mode(adev); |
524 | } | 524 | } |
525 | 525 | ||
526 | return 0; | 526 | return 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index fd23ba1226a5..d0d966d6080a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | |||
@@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) | |||
52 | return base; | 52 | return base; |
53 | } | 53 | } |
54 | 54 | ||
55 | static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) | 55 | void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, |
56 | uint64_t page_table_base) | ||
56 | { | 57 | { |
57 | uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); | 58 | /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ |
59 | int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 | ||
60 | - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; | ||
58 | 61 | ||
59 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, | 62 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, |
60 | lower_32_bits(value)); | 63 | offset * vmid, lower_32_bits(page_table_base)); |
61 | 64 | ||
62 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, | 65 | WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, |
63 | upper_32_bits(value)); | 66 | offset * vmid, upper_32_bits(page_table_base)); |
64 | } | 67 | } |
65 | 68 | ||
66 | static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) | 69 | static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) |
67 | { | 70 | { |
68 | mmhub_v1_0_init_gart_pt_regs(adev); | 71 | uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); |
72 | |||
73 | mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); | ||
69 | 74 | ||
70 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, | 75 | WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, |
71 | (u32)(adev->gmc.gart_start >> 12)); | 76 | (u32)(adev->gmc.gart_start >> 12)); |
@@ -90,7 +95,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | |||
90 | 95 | ||
91 | /* Program the system aperture low logical page number. */ | 96 | /* Program the system aperture low logical page number. */ |
92 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, | 97 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, |
93 | min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); | 98 | min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); |
94 | 99 | ||
95 | if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) | 100 | if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) |
96 | /* | 101 | /* |
@@ -100,11 +105,11 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) | |||
100 | * to get rid of the VM fault and hardware hang. | 105 | * to get rid of the VM fault and hardware hang. |
101 | */ | 106 | */ |
102 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | 107 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, |
103 | max((adev->gmc.vram_end >> 18) + 0x1, | 108 | max((adev->gmc.fb_end >> 18) + 0x1, |
104 | adev->gmc.agp_end >> 18)); | 109 | adev->gmc.agp_end >> 18)); |
105 | else | 110 | else |
106 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, | 111 | WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, |
107 | max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); | 112 | max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); |
108 | 113 | ||
109 | /* Set default page address. */ | 114 | /* Set default page address. */ |
110 | value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + | 115 | value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + |
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index bef3d0c0c117..0de0fdf98c00 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h | |||
@@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, | |||
34 | void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); | 34 | void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); |
35 | void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, | 35 | void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, |
36 | bool enable); | 36 | bool enable); |
37 | void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, | ||
38 | uint64_t page_table_base); | ||
37 | 39 | ||
38 | #endif | 40 | #endif |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 3f3fac2d50cd..e5dd052d9e06 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "nbio/nbio_7_4_offset.h" | 34 | #include "nbio/nbio_7_4_offset.h" |
35 | 35 | ||
36 | MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); | 36 | MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); |
37 | MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); | ||
37 | 38 | ||
38 | /* address block */ | 39 | /* address block */ |
39 | #define smnMP1_FIRMWARE_FLAGS 0x3010024 | 40 | #define smnMP1_FIRMWARE_FLAGS 0x3010024 |
@@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) | |||
98 | const char *chip_name; | 99 | const char *chip_name; |
99 | char fw_name[30]; | 100 | char fw_name[30]; |
100 | int err = 0; | 101 | int err = 0; |
101 | const struct psp_firmware_header_v1_0 *hdr; | 102 | const struct psp_firmware_header_v1_0 *sos_hdr; |
103 | const struct ta_firmware_header_v1_0 *ta_hdr; | ||
102 | 104 | ||
103 | DRM_DEBUG("\n"); | 105 | DRM_DEBUG("\n"); |
104 | 106 | ||
@@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) | |||
119 | if (err) | 121 | if (err) |
120 | goto out; | 122 | goto out; |
121 | 123 | ||
122 | hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; | 124 | sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; |
123 | adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version); | 125 | adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); |
124 | adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version); | 126 | adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); |
125 | adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes); | 127 | adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); |
126 | adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) - | 128 | adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) - |
127 | le32_to_cpu(hdr->sos_size_bytes); | 129 | le32_to_cpu(sos_hdr->sos_size_bytes); |
128 | adev->psp.sys_start_addr = (uint8_t *)hdr + | 130 | adev->psp.sys_start_addr = (uint8_t *)sos_hdr + |
129 | le32_to_cpu(hdr->header.ucode_array_offset_bytes); | 131 | le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); |
130 | adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + | 132 | adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + |
131 | le32_to_cpu(hdr->sos_offset_bytes); | 133 | le32_to_cpu(sos_hdr->sos_offset_bytes); |
134 | |||
135 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); | ||
136 | err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); | ||
137 | if (err) | ||
138 | goto out; | ||
139 | |||
140 | err = amdgpu_ucode_validate(adev->psp.ta_fw); | ||
141 | if (err) | ||
142 | goto out; | ||
143 | |||
144 | ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; | ||
145 | adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); | ||
146 | adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); | ||
147 | adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + | ||
148 | le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); | ||
149 | |||
132 | return 0; | 150 | return 0; |
133 | out: | 151 | out: |
134 | if (err) { | 152 | if (err) { |
@@ -167,7 +185,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) | |||
167 | /* Copy PSP System Driver binary to memory */ | 185 | /* Copy PSP System Driver binary to memory */ |
168 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); | 186 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); |
169 | 187 | ||
170 | /* Provide the sys driver to bootrom */ | 188 | /* Provide the sys driver to bootloader */ |
171 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, | 189 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
172 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 190 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
173 | psp_gfxdrv_command_reg = 1 << 16; | 191 | psp_gfxdrv_command_reg = 1 << 16; |
@@ -208,7 +226,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) | |||
208 | /* Copy Secure OS binary to PSP memory */ | 226 | /* Copy Secure OS binary to PSP memory */ |
209 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); | 227 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); |
210 | 228 | ||
211 | /* Provide the PSP secure OS to bootrom */ | 229 | /* Provide the PSP secure OS to bootloader */ |
212 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, | 230 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
213 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 231 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
214 | psp_gfxdrv_command_reg = 2 << 16; | 232 | psp_gfxdrv_command_reg = 2 << 16; |
@@ -552,24 +570,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) | |||
552 | static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, | 570 | static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, |
553 | int number_devices, struct psp_xgmi_topology_info *topology) | 571 | int number_devices, struct psp_xgmi_topology_info *topology) |
554 | { | 572 | { |
573 | struct ta_xgmi_shared_memory *xgmi_cmd; | ||
574 | struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; | ||
575 | struct ta_xgmi_cmd_get_topology_info_output *topology_info_output; | ||
576 | int i; | ||
577 | int ret; | ||
578 | |||
579 | if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) | ||
580 | return -EINVAL; | ||
581 | |||
582 | xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; | ||
583 | memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); | ||
584 | |||
585 | /* Fill in the shared memory with topology information as input */ | ||
586 | topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; | ||
587 | xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; | ||
588 | topology_info_input->num_nodes = number_devices; | ||
589 | |||
590 | for (i = 0; i < topology_info_input->num_nodes; i++) { | ||
591 | topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; | ||
592 | topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; | ||
593 | topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; | ||
594 | topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; | ||
595 | } | ||
596 | |||
597 | /* Invoke xgmi ta to get the topology information */ | ||
598 | ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); | ||
599 | if (ret) | ||
600 | return ret; | ||
601 | |||
602 | /* Read the output topology information from the shared memory */ | ||
603 | topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; | ||
604 | topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; | ||
605 | for (i = 0; i < topology->num_nodes; i++) { | ||
606 | topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; | ||
607 | topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; | ||
608 | topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; | ||
609 | topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; | ||
610 | } | ||
611 | |||
555 | return 0; | 612 | return 0; |
556 | } | 613 | } |
557 | 614 | ||
558 | static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, | 615 | static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, |
559 | int number_devices, struct psp_xgmi_topology_info *topology) | 616 | int number_devices, struct psp_xgmi_topology_info *topology) |
560 | { | 617 | { |
561 | return 0; | 618 | struct ta_xgmi_shared_memory *xgmi_cmd; |
619 | struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; | ||
620 | int i; | ||
621 | |||
622 | if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) | ||
623 | return -EINVAL; | ||
624 | |||
625 | xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; | ||
626 | memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); | ||
627 | |||
628 | topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; | ||
629 | xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; | ||
630 | topology_info_input->num_nodes = number_devices; | ||
631 | |||
632 | for (i = 0; i < topology_info_input->num_nodes; i++) { | ||
633 | topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; | ||
634 | topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; | ||
635 | topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; | ||
636 | topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; | ||
637 | } | ||
638 | |||
639 | /* Invoke xgmi ta to set topology information */ | ||
640 | return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO); | ||
562 | } | 641 | } |
563 | 642 | ||
564 | static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp) | 643 | static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp) |
565 | { | 644 | { |
566 | u64 hive_id = 0; | 645 | struct ta_xgmi_shared_memory *xgmi_cmd; |
646 | int ret; | ||
647 | |||
648 | xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; | ||
649 | memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); | ||
650 | |||
651 | xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; | ||
652 | |||
653 | /* Invoke xgmi ta to get hive id */ | ||
654 | ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); | ||
655 | if (ret) | ||
656 | return 0; | ||
657 | else | ||
658 | return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; | ||
659 | } | ||
660 | |||
661 | static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp) | ||
662 | { | ||
663 | struct ta_xgmi_shared_memory *xgmi_cmd; | ||
664 | int ret; | ||
665 | |||
666 | xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; | ||
667 | memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); | ||
567 | 668 | ||
568 | /* Remove me when we can get correct hive_id through PSP */ | 669 | xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; |
569 | if (psp->adev->gmc.xgmi.num_physical_nodes) | ||
570 | hive_id = 0x123456789abcdef; | ||
571 | 670 | ||
572 | return hive_id; | 671 | /* Invoke xgmi ta to get the node id */ |
672 | ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); | ||
673 | if (ret) | ||
674 | return 0; | ||
675 | else | ||
676 | return xgmi_cmd->xgmi_out_message.get_node_id.node_id; | ||
573 | } | 677 | } |
574 | 678 | ||
575 | static const struct psp_funcs psp_v11_0_funcs = { | 679 | static const struct psp_funcs psp_v11_0_funcs = { |
@@ -587,6 +691,7 @@ static const struct psp_funcs psp_v11_0_funcs = { | |||
587 | .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, | 691 | .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, |
588 | .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, | 692 | .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, |
589 | .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, | 693 | .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, |
694 | .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, | ||
590 | }; | 695 | }; |
591 | 696 | ||
592 | void psp_v11_0_set_psp_funcs(struct psp_context *psp) | 697 | void psp_v11_0_set_psp_funcs(struct psp_context *psp) |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index e1ebf770c303..9cea0bbe4525 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | |||
@@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) | |||
194 | /* Copy PSP System Driver binary to memory */ | 194 | /* Copy PSP System Driver binary to memory */ |
195 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); | 195 | memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); |
196 | 196 | ||
197 | /* Provide the sys driver to bootrom */ | 197 | /* Provide the sys driver to bootloader */ |
198 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, | 198 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
199 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 199 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
200 | psp_gfxdrv_command_reg = 1 << 16; | 200 | psp_gfxdrv_command_reg = 1 << 16; |
@@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) | |||
254 | /* Copy Secure OS binary to PSP memory */ | 254 | /* Copy Secure OS binary to PSP memory */ |
255 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); | 255 | memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); |
256 | 256 | ||
257 | /* Provide the PSP secure OS to bootrom */ | 257 | /* Provide the PSP secure OS to bootloader */ |
258 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, | 258 | WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, |
259 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); | 259 | (uint32_t)(psp->fw_pri_mc_addr >> 20)); |
260 | psp_gfxdrv_command_reg = 2 << 16; | 260 | psp_gfxdrv_command_reg = 2 << 16; |
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2d4770e173dd..9f3cb2aec7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | |||
@@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) | |||
225 | 225 | ||
226 | static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 226 | static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
227 | { | 227 | { |
228 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 228 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
229 | int i; | 229 | int i; |
230 | 230 | ||
231 | for (i = 0; i < count; i++) | 231 | for (i = 0; i < count; i++) |
@@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | |||
245 | * Schedule an IB in the DMA ring (VI). | 245 | * Schedule an IB in the DMA ring (VI). |
246 | */ | 246 | */ |
247 | static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, | 247 | static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, |
248 | struct amdgpu_job *job, | ||
248 | struct amdgpu_ib *ib, | 249 | struct amdgpu_ib *ib, |
249 | unsigned vmid, bool ctx_switch) | 250 | bool ctx_switch) |
250 | { | 251 | { |
252 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
253 | |||
251 | /* IB packet must end on a 8 DW boundary */ | 254 | /* IB packet must end on a 8 DW boundary */ |
252 | sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); | 255 | sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
253 | 256 | ||
@@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) | |||
349 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); | 352 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
350 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); | 353 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
351 | } | 354 | } |
352 | sdma0->ready = false; | 355 | sdma0->sched.ready = false; |
353 | sdma1->ready = false; | 356 | sdma1->sched.ready = false; |
354 | } | 357 | } |
355 | 358 | ||
356 | /** | 359 | /** |
@@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) | |||
471 | /* enable DMA IBs */ | 474 | /* enable DMA IBs */ |
472 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); | 475 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
473 | 476 | ||
474 | ring->ready = true; | 477 | ring->sched.ready = true; |
475 | } | 478 | } |
476 | 479 | ||
477 | sdma_v2_4_enable(adev, true); | 480 | sdma_v2_4_enable(adev, true); |
478 | for (i = 0; i < adev->sdma.num_instances; i++) { | 481 | for (i = 0; i < adev->sdma.num_instances; i++) { |
479 | ring = &adev->sdma.instance[i].ring; | 482 | ring = &adev->sdma.instance[i].ring; |
480 | r = amdgpu_ring_test_ring(ring); | 483 | r = amdgpu_ring_test_helper(ring); |
481 | if (r) { | 484 | if (r) |
482 | ring->ready = false; | ||
483 | return r; | 485 | return r; |
484 | } | ||
485 | 486 | ||
486 | if (adev->mman.buffer_funcs_ring == ring) | 487 | if (adev->mman.buffer_funcs_ring == ring) |
487 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | 488 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
@@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) | |||
550 | u64 gpu_addr; | 551 | u64 gpu_addr; |
551 | 552 | ||
552 | r = amdgpu_device_wb_get(adev, &index); | 553 | r = amdgpu_device_wb_get(adev, &index); |
553 | if (r) { | 554 | if (r) |
554 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | ||
555 | return r; | 555 | return r; |
556 | } | ||
557 | 556 | ||
558 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 557 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
559 | tmp = 0xCAFEDEAD; | 558 | tmp = 0xCAFEDEAD; |
560 | adev->wb.wb[index] = cpu_to_le32(tmp); | 559 | adev->wb.wb[index] = cpu_to_le32(tmp); |
561 | 560 | ||
562 | r = amdgpu_ring_alloc(ring, 5); | 561 | r = amdgpu_ring_alloc(ring, 5); |
563 | if (r) { | 562 | if (r) |
564 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 563 | goto error_free_wb; |
565 | amdgpu_device_wb_free(adev, index); | ||
566 | return r; | ||
567 | } | ||
568 | 564 | ||
569 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 565 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
570 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); | 566 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
@@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) | |||
581 | DRM_UDELAY(1); | 577 | DRM_UDELAY(1); |
582 | } | 578 | } |
583 | 579 | ||
584 | if (i < adev->usec_timeout) { | 580 | if (i >= adev->usec_timeout) |
585 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 581 | r = -ETIMEDOUT; |
586 | } else { | ||
587 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
588 | ring->idx, tmp); | ||
589 | r = -EINVAL; | ||
590 | } | ||
591 | amdgpu_device_wb_free(adev, index); | ||
592 | 582 | ||
583 | error_free_wb: | ||
584 | amdgpu_device_wb_free(adev, index); | ||
593 | return r; | 585 | return r; |
594 | } | 586 | } |
595 | 587 | ||
@@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
612 | long r; | 604 | long r; |
613 | 605 | ||
614 | r = amdgpu_device_wb_get(adev, &index); | 606 | r = amdgpu_device_wb_get(adev, &index); |
615 | if (r) { | 607 | if (r) |
616 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
617 | return r; | 608 | return r; |
618 | } | ||
619 | 609 | ||
620 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 610 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
621 | tmp = 0xCAFEDEAD; | 611 | tmp = 0xCAFEDEAD; |
622 | adev->wb.wb[index] = cpu_to_le32(tmp); | 612 | adev->wb.wb[index] = cpu_to_le32(tmp); |
623 | memset(&ib, 0, sizeof(ib)); | 613 | memset(&ib, 0, sizeof(ib)); |
624 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 614 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
625 | if (r) { | 615 | if (r) |
626 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
627 | goto err0; | 616 | goto err0; |
628 | } | ||
629 | 617 | ||
630 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 618 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
631 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); | 619 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
@@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
644 | 632 | ||
645 | r = dma_fence_wait_timeout(f, false, timeout); | 633 | r = dma_fence_wait_timeout(f, false, timeout); |
646 | if (r == 0) { | 634 | if (r == 0) { |
647 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
648 | r = -ETIMEDOUT; | 635 | r = -ETIMEDOUT; |
649 | goto err1; | 636 | goto err1; |
650 | } else if (r < 0) { | 637 | } else if (r < 0) { |
651 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
652 | goto err1; | 638 | goto err1; |
653 | } | 639 | } |
654 | tmp = le32_to_cpu(adev->wb.wb[index]); | 640 | tmp = le32_to_cpu(adev->wb.wb[index]); |
655 | if (tmp == 0xDEADBEEF) { | 641 | if (tmp == 0xDEADBEEF) |
656 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
657 | r = 0; | 642 | r = 0; |
658 | } else { | 643 | else |
659 | DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); | ||
660 | r = -EINVAL; | 644 | r = -EINVAL; |
661 | } | ||
662 | 645 | ||
663 | err1: | 646 | err1: |
664 | amdgpu_ib_free(adev, &ib, NULL); | 647 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, | |||
760 | */ | 743 | */ |
761 | static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) | 744 | static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
762 | { | 745 | { |
763 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 746 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
764 | u32 pad_count; | 747 | u32 pad_count; |
765 | int i; | 748 | int i; |
766 | 749 | ||
@@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, | |||
1105 | struct amdgpu_irq_src *source, | 1088 | struct amdgpu_irq_src *source, |
1106 | struct amdgpu_iv_entry *entry) | 1089 | struct amdgpu_iv_entry *entry) |
1107 | { | 1090 | { |
1091 | u8 instance_id, queue_id; | ||
1092 | |||
1108 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); | 1093 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
1109 | schedule_work(&adev->reset_work); | 1094 | instance_id = (entry->ring_id & 0x3) >> 0; |
1095 | queue_id = (entry->ring_id & 0xc) >> 2; | ||
1096 | |||
1097 | if (instance_id <= 1 && queue_id == 0) | ||
1098 | drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); | ||
1110 | return 0; | 1099 | return 0; |
1111 | } | 1100 | } |
1112 | 1101 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 6fb3edaba0ec..b6a25f92d566 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | |||
@@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
399 | 399 | ||
400 | static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 400 | static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
401 | { | 401 | { |
402 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 402 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
403 | int i; | 403 | int i; |
404 | 404 | ||
405 | for (i = 0; i < count; i++) | 405 | for (i = 0; i < count; i++) |
@@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | |||
419 | * Schedule an IB in the DMA ring (VI). | 419 | * Schedule an IB in the DMA ring (VI). |
420 | */ | 420 | */ |
421 | static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, | 421 | static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, |
422 | struct amdgpu_job *job, | ||
422 | struct amdgpu_ib *ib, | 423 | struct amdgpu_ib *ib, |
423 | unsigned vmid, bool ctx_switch) | 424 | bool ctx_switch) |
424 | { | 425 | { |
426 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
427 | |||
425 | /* IB packet must end on a 8 DW boundary */ | 428 | /* IB packet must end on a 8 DW boundary */ |
426 | sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); | 429 | sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
427 | 430 | ||
@@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) | |||
523 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); | 526 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
524 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); | 527 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
525 | } | 528 | } |
526 | sdma0->ready = false; | 529 | sdma0->sched.ready = false; |
527 | sdma1->ready = false; | 530 | sdma1->sched.ready = false; |
528 | } | 531 | } |
529 | 532 | ||
530 | /** | 533 | /** |
@@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
739 | /* enable DMA IBs */ | 742 | /* enable DMA IBs */ |
740 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); | 743 | WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); |
741 | 744 | ||
742 | ring->ready = true; | 745 | ring->sched.ready = true; |
743 | } | 746 | } |
744 | 747 | ||
745 | /* unhalt the MEs */ | 748 | /* unhalt the MEs */ |
@@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) | |||
749 | 752 | ||
750 | for (i = 0; i < adev->sdma.num_instances; i++) { | 753 | for (i = 0; i < adev->sdma.num_instances; i++) { |
751 | ring = &adev->sdma.instance[i].ring; | 754 | ring = &adev->sdma.instance[i].ring; |
752 | r = amdgpu_ring_test_ring(ring); | 755 | r = amdgpu_ring_test_helper(ring); |
753 | if (r) { | 756 | if (r) |
754 | ring->ready = false; | ||
755 | return r; | 757 | return r; |
756 | } | ||
757 | 758 | ||
758 | if (adev->mman.buffer_funcs_ring == ring) | 759 | if (adev->mman.buffer_funcs_ring == ring) |
759 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | 760 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
@@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) | |||
822 | u64 gpu_addr; | 823 | u64 gpu_addr; |
823 | 824 | ||
824 | r = amdgpu_device_wb_get(adev, &index); | 825 | r = amdgpu_device_wb_get(adev, &index); |
825 | if (r) { | 826 | if (r) |
826 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | ||
827 | return r; | 827 | return r; |
828 | } | ||
829 | 828 | ||
830 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 829 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
831 | tmp = 0xCAFEDEAD; | 830 | tmp = 0xCAFEDEAD; |
832 | adev->wb.wb[index] = cpu_to_le32(tmp); | 831 | adev->wb.wb[index] = cpu_to_le32(tmp); |
833 | 832 | ||
834 | r = amdgpu_ring_alloc(ring, 5); | 833 | r = amdgpu_ring_alloc(ring, 5); |
835 | if (r) { | 834 | if (r) |
836 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 835 | goto error_free_wb; |
837 | amdgpu_device_wb_free(adev, index); | ||
838 | return r; | ||
839 | } | ||
840 | 836 | ||
841 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 837 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
842 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); | 838 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
@@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) | |||
853 | DRM_UDELAY(1); | 849 | DRM_UDELAY(1); |
854 | } | 850 | } |
855 | 851 | ||
856 | if (i < adev->usec_timeout) { | 852 | if (i >= adev->usec_timeout) |
857 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 853 | r = -ETIMEDOUT; |
858 | } else { | ||
859 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
860 | ring->idx, tmp); | ||
861 | r = -EINVAL; | ||
862 | } | ||
863 | amdgpu_device_wb_free(adev, index); | ||
864 | 854 | ||
855 | error_free_wb: | ||
856 | amdgpu_device_wb_free(adev, index); | ||
865 | return r; | 857 | return r; |
866 | } | 858 | } |
867 | 859 | ||
@@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
884 | long r; | 876 | long r; |
885 | 877 | ||
886 | r = amdgpu_device_wb_get(adev, &index); | 878 | r = amdgpu_device_wb_get(adev, &index); |
887 | if (r) { | 879 | if (r) |
888 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
889 | return r; | 880 | return r; |
890 | } | ||
891 | 881 | ||
892 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 882 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
893 | tmp = 0xCAFEDEAD; | 883 | tmp = 0xCAFEDEAD; |
894 | adev->wb.wb[index] = cpu_to_le32(tmp); | 884 | adev->wb.wb[index] = cpu_to_le32(tmp); |
895 | memset(&ib, 0, sizeof(ib)); | 885 | memset(&ib, 0, sizeof(ib)); |
896 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 886 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
897 | if (r) { | 887 | if (r) |
898 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
899 | goto err0; | 888 | goto err0; |
900 | } | ||
901 | 889 | ||
902 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 890 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
903 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); | 891 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
@@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
916 | 904 | ||
917 | r = dma_fence_wait_timeout(f, false, timeout); | 905 | r = dma_fence_wait_timeout(f, false, timeout); |
918 | if (r == 0) { | 906 | if (r == 0) { |
919 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
920 | r = -ETIMEDOUT; | 907 | r = -ETIMEDOUT; |
921 | goto err1; | 908 | goto err1; |
922 | } else if (r < 0) { | 909 | } else if (r < 0) { |
923 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
924 | goto err1; | 910 | goto err1; |
925 | } | 911 | } |
926 | tmp = le32_to_cpu(adev->wb.wb[index]); | 912 | tmp = le32_to_cpu(adev->wb.wb[index]); |
927 | if (tmp == 0xDEADBEEF) { | 913 | if (tmp == 0xDEADBEEF) |
928 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
929 | r = 0; | 914 | r = 0; |
930 | } else { | 915 | else |
931 | DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); | ||
932 | r = -EINVAL; | 916 | r = -EINVAL; |
933 | } | ||
934 | err1: | 917 | err1: |
935 | amdgpu_ib_free(adev, &ib, NULL); | 918 | amdgpu_ib_free(adev, &ib, NULL); |
936 | dma_fence_put(f); | 919 | dma_fence_put(f); |
@@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, | |||
1031 | */ | 1014 | */ |
1032 | static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) | 1015 | static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
1033 | { | 1016 | { |
1034 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 1017 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
1035 | u32 pad_count; | 1018 | u32 pad_count; |
1036 | int i; | 1019 | int i; |
1037 | 1020 | ||
@@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, | |||
1440 | struct amdgpu_irq_src *source, | 1423 | struct amdgpu_irq_src *source, |
1441 | struct amdgpu_iv_entry *entry) | 1424 | struct amdgpu_iv_entry *entry) |
1442 | { | 1425 | { |
1426 | u8 instance_id, queue_id; | ||
1427 | |||
1443 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); | 1428 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
1444 | schedule_work(&adev->reset_work); | 1429 | instance_id = (entry->ring_id & 0x3) >> 0; |
1430 | queue_id = (entry->ring_id & 0xc) >> 2; | ||
1431 | |||
1432 | if (instance_id <= 1 && queue_id == 0) | ||
1433 | drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); | ||
1445 | return 0; | 1434 | return 0; |
1446 | } | 1435 | } |
1447 | 1436 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7a8c9172d30a..f4490cdd9804 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | |||
@@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); | |||
54 | #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L | 54 | #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L |
55 | #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L | 55 | #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L |
56 | 56 | ||
57 | #define WREG32_SDMA(instance, offset, value) \ | ||
58 | WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value) | ||
59 | #define RREG32_SDMA(instance, offset) \ | ||
60 | RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset))) | ||
61 | |||
57 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); | 62 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); |
58 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); | 63 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); |
59 | static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); | 64 | static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); |
@@ -367,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) | |||
367 | wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); | 372 | wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); |
368 | DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); | 373 | DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); |
369 | } else { | 374 | } else { |
370 | u32 lowbit, highbit; | 375 | wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); |
371 | |||
372 | lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; | ||
373 | highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; | ||
374 | |||
375 | DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", | ||
376 | ring->me, highbit, lowbit); | ||
377 | wptr = highbit; | ||
378 | wptr = wptr << 32; | 376 | wptr = wptr << 32; |
379 | wptr |= lowbit; | 377 | wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR); |
378 | DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", | ||
379 | ring->me, wptr); | ||
380 | } | 380 | } |
381 | 381 | ||
382 | return wptr >> 2; | 382 | return wptr >> 2; |
@@ -417,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) | |||
417 | lower_32_bits(ring->wptr << 2), | 417 | lower_32_bits(ring->wptr << 2), |
418 | ring->me, | 418 | ring->me, |
419 | upper_32_bits(ring->wptr << 2)); | 419 | upper_32_bits(ring->wptr << 2)); |
420 | WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); | 420 | WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR, |
421 | WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); | 421 | lower_32_bits(ring->wptr << 2)); |
422 | WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI, | ||
423 | upper_32_bits(ring->wptr << 2)); | ||
424 | } | ||
425 | } | ||
426 | |||
427 | /** | ||
428 | * sdma_v4_0_page_ring_get_wptr - get the current write pointer | ||
429 | * | ||
430 | * @ring: amdgpu ring pointer | ||
431 | * | ||
432 | * Get the current wptr from the hardware (VEGA10+). | ||
433 | */ | ||
434 | static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) | ||
435 | { | ||
436 | struct amdgpu_device *adev = ring->adev; | ||
437 | u64 wptr; | ||
438 | |||
439 | if (ring->use_doorbell) { | ||
440 | /* XXX check if swapping is necessary on BE */ | ||
441 | wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); | ||
442 | } else { | ||
443 | wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI); | ||
444 | wptr = wptr << 32; | ||
445 | wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR); | ||
446 | } | ||
447 | |||
448 | return wptr >> 2; | ||
449 | } | ||
450 | |||
451 | /** | ||
452 | * sdma_v4_0_ring_set_wptr - commit the write pointer | ||
453 | * | ||
454 | * @ring: amdgpu ring pointer | ||
455 | * | ||
456 | * Write the wptr back to the hardware (VEGA10+). | ||
457 | */ | ||
458 | static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) | ||
459 | { | ||
460 | struct amdgpu_device *adev = ring->adev; | ||
461 | |||
462 | if (ring->use_doorbell) { | ||
463 | u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; | ||
464 | |||
465 | /* XXX check if swapping is necessary on BE */ | ||
466 | WRITE_ONCE(*wb, (ring->wptr << 2)); | ||
467 | WDOORBELL64(ring->doorbell_index, ring->wptr << 2); | ||
468 | } else { | ||
469 | uint64_t wptr = ring->wptr << 2; | ||
470 | |||
471 | WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR, | ||
472 | lower_32_bits(wptr)); | ||
473 | WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI, | ||
474 | upper_32_bits(wptr)); | ||
422 | } | 475 | } |
423 | } | 476 | } |
424 | 477 | ||
425 | static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | 478 | static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) |
426 | { | 479 | { |
427 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 480 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
428 | int i; | 481 | int i; |
429 | 482 | ||
430 | for (i = 0; i < count; i++) | 483 | for (i = 0; i < count; i++) |
@@ -444,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) | |||
444 | * Schedule an IB in the DMA ring (VEGA10). | 497 | * Schedule an IB in the DMA ring (VEGA10). |
445 | */ | 498 | */ |
446 | static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, | 499 | static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, |
447 | struct amdgpu_ib *ib, | 500 | struct amdgpu_job *job, |
448 | unsigned vmid, bool ctx_switch) | 501 | struct amdgpu_ib *ib, |
502 | bool ctx_switch) | ||
449 | { | 503 | { |
504 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
505 | |||
450 | /* IB packet must end on a 8 DW boundary */ | 506 | /* IB packet must end on a 8 DW boundary */ |
451 | sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); | 507 | sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); |
452 | 508 | ||
@@ -568,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) | |||
568 | amdgpu_ttm_set_buffer_funcs_status(adev, false); | 624 | amdgpu_ttm_set_buffer_funcs_status(adev, false); |
569 | 625 | ||
570 | for (i = 0; i < adev->sdma.num_instances; i++) { | 626 | for (i = 0; i < adev->sdma.num_instances; i++) { |
571 | rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); | 627 | rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); |
572 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); | 628 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); |
573 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); | 629 | WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); |
574 | ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); | 630 | ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); |
575 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); | 631 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); |
576 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); | 632 | WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); |
577 | } | 633 | } |
578 | 634 | ||
579 | sdma0->ready = false; | 635 | sdma0->sched.ready = false; |
580 | sdma1->ready = false; | 636 | sdma1->sched.ready = false; |
581 | } | 637 | } |
582 | 638 | ||
583 | /** | 639 | /** |
@@ -593,6 +649,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) | |||
593 | } | 649 | } |
594 | 650 | ||
595 | /** | 651 | /** |
652 | * sdma_v4_0_page_stop - stop the page async dma engines | ||
653 | * | ||
654 | * @adev: amdgpu_device pointer | ||
655 | * | ||
656 | * Stop the page async dma ring buffers (VEGA10). | ||
657 | */ | ||
658 | static void sdma_v4_0_page_stop(struct amdgpu_device *adev) | ||
659 | { | ||
660 | struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; | ||
661 | struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; | ||
662 | u32 rb_cntl, ib_cntl; | ||
663 | int i; | ||
664 | |||
665 | if ((adev->mman.buffer_funcs_ring == sdma0) || | ||
666 | (adev->mman.buffer_funcs_ring == sdma1)) | ||
667 | amdgpu_ttm_set_buffer_funcs_status(adev, false); | ||
668 | |||
669 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
670 | rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); | ||
671 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, | ||
672 | RB_ENABLE, 0); | ||
673 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); | ||
674 | ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); | ||
675 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, | ||
676 | IB_ENABLE, 0); | ||
677 | WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); | ||
678 | } | ||
679 | |||
680 | sdma0->sched.ready = false; | ||
681 | sdma1->sched.ready = false; | ||
682 | } | ||
683 | |||
684 | /** | ||
596 | * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch | 685 | * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch |
597 | * | 686 | * |
598 | * @adev: amdgpu_device pointer | 687 | * @adev: amdgpu_device pointer |
@@ -630,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) | |||
630 | } | 719 | } |
631 | 720 | ||
632 | for (i = 0; i < adev->sdma.num_instances; i++) { | 721 | for (i = 0; i < adev->sdma.num_instances; i++) { |
633 | f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); | 722 | f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL); |
634 | f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, | 723 | f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, |
635 | AUTO_CTXSW_ENABLE, enable ? 1 : 0); | 724 | AUTO_CTXSW_ENABLE, enable ? 1 : 0); |
636 | if (enable && amdgpu_sdma_phase_quantum) { | 725 | if (enable && amdgpu_sdma_phase_quantum) { |
637 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), | 726 | WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum); |
638 | phase_quantum); | 727 | WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum); |
639 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), | 728 | WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum); |
640 | phase_quantum); | ||
641 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), | ||
642 | phase_quantum); | ||
643 | } | 729 | } |
644 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); | 730 | WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl); |
645 | } | 731 | } |
646 | 732 | ||
647 | } | 733 | } |
@@ -662,156 +748,217 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) | |||
662 | if (enable == false) { | 748 | if (enable == false) { |
663 | sdma_v4_0_gfx_stop(adev); | 749 | sdma_v4_0_gfx_stop(adev); |
664 | sdma_v4_0_rlc_stop(adev); | 750 | sdma_v4_0_rlc_stop(adev); |
751 | if (adev->sdma.has_page_queue) | ||
752 | sdma_v4_0_page_stop(adev); | ||
665 | } | 753 | } |
666 | 754 | ||
667 | for (i = 0; i < adev->sdma.num_instances; i++) { | 755 | for (i = 0; i < adev->sdma.num_instances; i++) { |
668 | f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); | 756 | f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL); |
669 | f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); | 757 | f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); |
670 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); | 758 | WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl); |
671 | } | 759 | } |
672 | } | 760 | } |
673 | 761 | ||
674 | /** | 762 | /** |
763 | * sdma_v4_0_rb_cntl - get parameters for rb_cntl | ||
764 | */ | ||
765 | static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) | ||
766 | { | ||
767 | /* Set ring buffer size in dwords */ | ||
768 | uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); | ||
769 | |||
770 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); | ||
771 | #ifdef __BIG_ENDIAN | ||
772 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); | ||
773 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, | ||
774 | RPTR_WRITEBACK_SWAP_ENABLE, 1); | ||
775 | #endif | ||
776 | return rb_cntl; | ||
777 | } | ||
778 | |||
779 | /** | ||
675 | * sdma_v4_0_gfx_resume - setup and start the async dma engines | 780 | * sdma_v4_0_gfx_resume - setup and start the async dma engines |
676 | * | 781 | * |
677 | * @adev: amdgpu_device pointer | 782 | * @adev: amdgpu_device pointer |
783 | * @i: instance to resume | ||
678 | * | 784 | * |
679 | * Set up the gfx DMA ring buffers and enable them (VEGA10). | 785 | * Set up the gfx DMA ring buffers and enable them (VEGA10). |
680 | * Returns 0 for success, error for failure. | 786 | * Returns 0 for success, error for failure. |
681 | */ | 787 | */ |
682 | static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) | 788 | static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) |
683 | { | 789 | { |
684 | struct amdgpu_ring *ring; | 790 | struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; |
685 | u32 rb_cntl, ib_cntl, wptr_poll_cntl; | 791 | u32 rb_cntl, ib_cntl, wptr_poll_cntl; |
686 | u32 rb_bufsz; | ||
687 | u32 wb_offset; | 792 | u32 wb_offset; |
688 | u32 doorbell; | 793 | u32 doorbell; |
689 | u32 doorbell_offset; | 794 | u32 doorbell_offset; |
690 | u32 temp; | ||
691 | u64 wptr_gpu_addr; | 795 | u64 wptr_gpu_addr; |
692 | int i, r; | ||
693 | |||
694 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
695 | ring = &adev->sdma.instance[i].ring; | ||
696 | wb_offset = (ring->rptr_offs * 4); | ||
697 | 796 | ||
698 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); | 797 | wb_offset = (ring->rptr_offs * 4); |
699 | 798 | ||
700 | /* Set ring buffer size in dwords */ | 799 | rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); |
701 | rb_bufsz = order_base_2(ring->ring_size / 4); | 800 | rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); |
702 | rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); | 801 | WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); |
703 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); | ||
704 | #ifdef __BIG_ENDIAN | ||
705 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); | ||
706 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, | ||
707 | RPTR_WRITEBACK_SWAP_ENABLE, 1); | ||
708 | #endif | ||
709 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); | ||
710 | 802 | ||
711 | /* Initialize the ring buffer's read and write pointers */ | 803 | /* Initialize the ring buffer's read and write pointers */ |
712 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); | 804 | WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0); |
713 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); | 805 | WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0); |
714 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); | 806 | WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0); |
715 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); | 807 | WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0); |
716 | 808 | ||
717 | /* set the wb address whether it's enabled or not */ | 809 | /* set the wb address whether it's enabled or not */ |
718 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), | 810 | WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI, |
719 | upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); | 811 | upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); |
720 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), | 812 | WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO, |
721 | lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); | 813 | lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); |
722 | 814 | ||
723 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); | 815 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, |
816 | RPTR_WRITEBACK_ENABLE, 1); | ||
724 | 817 | ||
725 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); | 818 | WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8); |
726 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); | 819 | WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40); |
727 | 820 | ||
728 | ring->wptr = 0; | 821 | ring->wptr = 0; |
729 | 822 | ||
730 | /* before programing wptr to a less value, need set minor_ptr_update first */ | 823 | /* before programing wptr to a less value, need set minor_ptr_update first */ |
731 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); | 824 | WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1); |
732 | 825 | ||
733 | if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ | 826 | doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL); |
734 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); | 827 | doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET); |
735 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); | ||
736 | } | ||
737 | 828 | ||
738 | doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); | 829 | doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, |
739 | doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); | 830 | ring->use_doorbell); |
740 | 831 | doorbell_offset = REG_SET_FIELD(doorbell_offset, | |
741 | if (ring->use_doorbell) { | 832 | SDMA0_GFX_DOORBELL_OFFSET, |
742 | doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); | ||
743 | doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, | ||
744 | OFFSET, ring->doorbell_index); | 833 | OFFSET, ring->doorbell_index); |
745 | } else { | 834 | WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); |
746 | doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); | 835 | WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); |
747 | } | 836 | adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, |
748 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); | 837 | ring->doorbell_index); |
749 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); | 838 | |
750 | adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, | 839 | sdma_v4_0_ring_set_wptr(ring); |
751 | ring->doorbell_index); | 840 | |
841 | /* set minor_ptr_update to 0 after wptr programed */ | ||
842 | WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0); | ||
843 | |||
844 | /* setup the wptr shadow polling */ | ||
845 | wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
846 | WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO, | ||
847 | lower_32_bits(wptr_gpu_addr)); | ||
848 | WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI, | ||
849 | upper_32_bits(wptr_gpu_addr)); | ||
850 | wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); | ||
851 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, | ||
852 | SDMA0_GFX_RB_WPTR_POLL_CNTL, | ||
853 | F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); | ||
854 | WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); | ||
855 | |||
856 | /* enable DMA RB */ | ||
857 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); | ||
858 | WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); | ||
859 | |||
860 | ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); | ||
861 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); | ||
862 | #ifdef __BIG_ENDIAN | ||
863 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); | ||
864 | #endif | ||
865 | /* enable DMA IBs */ | ||
866 | WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); | ||
752 | 867 | ||
753 | if (amdgpu_sriov_vf(adev)) | 868 | ring->sched.ready = true; |
754 | sdma_v4_0_ring_set_wptr(ring); | 869 | } |
755 | 870 | ||
756 | /* set minor_ptr_update to 0 after wptr programed */ | 871 | /** |
757 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); | 872 | * sdma_v4_0_page_resume - setup and start the async dma engines |
873 | * | ||
874 | * @adev: amdgpu_device pointer | ||
875 | * @i: instance to resume | ||
876 | * | ||
877 | * Set up the page DMA ring buffers and enable them (VEGA10). | ||
878 | * Returns 0 for success, error for failure. | ||
879 | */ | ||
880 | static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) | ||
881 | { | ||
882 | struct amdgpu_ring *ring = &adev->sdma.instance[i].page; | ||
883 | u32 rb_cntl, ib_cntl, wptr_poll_cntl; | ||
884 | u32 wb_offset; | ||
885 | u32 doorbell; | ||
886 | u32 doorbell_offset; | ||
887 | u64 wptr_gpu_addr; | ||
758 | 888 | ||
759 | /* set utc l1 enable flag always to 1 */ | 889 | wb_offset = (ring->rptr_offs * 4); |
760 | temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); | ||
761 | temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); | ||
762 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); | ||
763 | 890 | ||
764 | if (!amdgpu_sriov_vf(adev)) { | 891 | rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); |
765 | /* unhalt engine */ | 892 | rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); |
766 | temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); | 893 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); |
767 | temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); | ||
768 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); | ||
769 | } | ||
770 | 894 | ||
771 | /* setup the wptr shadow polling */ | 895 | /* Initialize the ring buffer's read and write pointers */ |
772 | wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | 896 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0); |
773 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), | 897 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0); |
774 | lower_32_bits(wptr_gpu_addr)); | 898 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0); |
775 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), | 899 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0); |
776 | upper_32_bits(wptr_gpu_addr)); | ||
777 | wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); | ||
778 | if (amdgpu_sriov_vf(adev)) | ||
779 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); | ||
780 | else | ||
781 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); | ||
782 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); | ||
783 | 900 | ||
784 | /* enable DMA RB */ | 901 | /* set the wb address whether it's enabled or not */ |
785 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); | 902 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI, |
786 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); | 903 | upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); |
904 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, | ||
905 | lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); | ||
787 | 906 | ||
788 | ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); | 907 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, |
789 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); | 908 | RPTR_WRITEBACK_ENABLE, 1); |
790 | #ifdef __BIG_ENDIAN | ||
791 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); | ||
792 | #endif | ||
793 | /* enable DMA IBs */ | ||
794 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); | ||
795 | 909 | ||
796 | ring->ready = true; | 910 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8); |
911 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); | ||
797 | 912 | ||
798 | if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ | 913 | ring->wptr = 0; |
799 | sdma_v4_0_ctx_switch_enable(adev, true); | ||
800 | sdma_v4_0_enable(adev, true); | ||
801 | } | ||
802 | 914 | ||
803 | r = amdgpu_ring_test_ring(ring); | 915 | /* before programing wptr to a less value, need set minor_ptr_update first */ |
804 | if (r) { | 916 | WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1); |
805 | ring->ready = false; | ||
806 | return r; | ||
807 | } | ||
808 | 917 | ||
809 | if (adev->mman.buffer_funcs_ring == ring) | 918 | doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL); |
810 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | 919 | doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET); |
811 | 920 | ||
812 | } | 921 | doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE, |
922 | ring->use_doorbell); | ||
923 | doorbell_offset = REG_SET_FIELD(doorbell_offset, | ||
924 | SDMA0_PAGE_DOORBELL_OFFSET, | ||
925 | OFFSET, ring->doorbell_index); | ||
926 | WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell); | ||
927 | WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset); | ||
928 | /* TODO: enable doorbell support */ | ||
929 | /*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, | ||
930 | ring->doorbell_index);*/ | ||
931 | |||
932 | sdma_v4_0_ring_set_wptr(ring); | ||
933 | |||
934 | /* set minor_ptr_update to 0 after wptr programed */ | ||
935 | WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0); | ||
936 | |||
937 | /* setup the wptr shadow polling */ | ||
938 | wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); | ||
939 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO, | ||
940 | lower_32_bits(wptr_gpu_addr)); | ||
941 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI, | ||
942 | upper_32_bits(wptr_gpu_addr)); | ||
943 | wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); | ||
944 | wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, | ||
945 | SDMA0_PAGE_RB_WPTR_POLL_CNTL, | ||
946 | F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); | ||
947 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); | ||
948 | |||
949 | /* enable DMA RB */ | ||
950 | rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1); | ||
951 | WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); | ||
952 | |||
953 | ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); | ||
954 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1); | ||
955 | #ifdef __BIG_ENDIAN | ||
956 | ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1); | ||
957 | #endif | ||
958 | /* enable DMA IBs */ | ||
959 | WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); | ||
813 | 960 | ||
814 | return 0; | 961 | ring->sched.ready = true; |
815 | } | 962 | } |
816 | 963 | ||
817 | static void | 964 | static void |
@@ -922,12 +1069,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) | |||
922 | (adev->sdma.instance[i].fw->data + | 1069 | (adev->sdma.instance[i].fw->data + |
923 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); | 1070 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
924 | 1071 | ||
925 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); | 1072 | WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0); |
926 | 1073 | ||
927 | for (j = 0; j < fw_size; j++) | 1074 | for (j = 0; j < fw_size; j++) |
928 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); | 1075 | WREG32_SDMA(i, mmSDMA0_UCODE_DATA, |
1076 | le32_to_cpup(fw_data++)); | ||
929 | 1077 | ||
930 | WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); | 1078 | WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, |
1079 | adev->sdma.instance[i].fw_version); | ||
931 | } | 1080 | } |
932 | 1081 | ||
933 | return 0; | 1082 | return 0; |
@@ -943,33 +1092,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) | |||
943 | */ | 1092 | */ |
944 | static int sdma_v4_0_start(struct amdgpu_device *adev) | 1093 | static int sdma_v4_0_start(struct amdgpu_device *adev) |
945 | { | 1094 | { |
946 | int r = 0; | 1095 | struct amdgpu_ring *ring; |
1096 | int i, r; | ||
947 | 1097 | ||
948 | if (amdgpu_sriov_vf(adev)) { | 1098 | if (amdgpu_sriov_vf(adev)) { |
949 | sdma_v4_0_ctx_switch_enable(adev, false); | 1099 | sdma_v4_0_ctx_switch_enable(adev, false); |
950 | sdma_v4_0_enable(adev, false); | 1100 | sdma_v4_0_enable(adev, false); |
1101 | } else { | ||
951 | 1102 | ||
952 | /* set RB registers */ | 1103 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
953 | r = sdma_v4_0_gfx_resume(adev); | 1104 | r = sdma_v4_0_load_microcode(adev); |
954 | return r; | 1105 | if (r) |
1106 | return r; | ||
1107 | } | ||
1108 | |||
1109 | /* unhalt the MEs */ | ||
1110 | sdma_v4_0_enable(adev, true); | ||
1111 | /* enable sdma ring preemption */ | ||
1112 | sdma_v4_0_ctx_switch_enable(adev, true); | ||
1113 | } | ||
1114 | |||
1115 | /* start the gfx rings and rlc compute queues */ | ||
1116 | for (i = 0; i < adev->sdma.num_instances; i++) { | ||
1117 | uint32_t temp; | ||
1118 | |||
1119 | WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0); | ||
1120 | sdma_v4_0_gfx_resume(adev, i); | ||
1121 | if (adev->sdma.has_page_queue) | ||
1122 | sdma_v4_0_page_resume(adev, i); | ||
1123 | |||
1124 | /* set utc l1 enable flag always to 1 */ | ||
1125 | temp = RREG32_SDMA(i, mmSDMA0_CNTL); | ||
1126 | temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); | ||
1127 | WREG32_SDMA(i, mmSDMA0_CNTL, temp); | ||
1128 | |||
1129 | if (!amdgpu_sriov_vf(adev)) { | ||
1130 | /* unhalt engine */ | ||
1131 | temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); | ||
1132 | temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); | ||
1133 | WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp); | ||
1134 | } | ||
955 | } | 1135 | } |
956 | 1136 | ||
957 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { | 1137 | if (amdgpu_sriov_vf(adev)) { |
958 | r = sdma_v4_0_load_microcode(adev); | 1138 | sdma_v4_0_ctx_switch_enable(adev, true); |
1139 | sdma_v4_0_enable(adev, true); | ||
1140 | } else { | ||
1141 | r = sdma_v4_0_rlc_resume(adev); | ||
959 | if (r) | 1142 | if (r) |
960 | return r; | 1143 | return r; |
961 | } | 1144 | } |
962 | 1145 | ||
963 | /* unhalt the MEs */ | 1146 | for (i = 0; i < adev->sdma.num_instances; i++) { |
964 | sdma_v4_0_enable(adev, true); | 1147 | ring = &adev->sdma.instance[i].ring; |
965 | /* enable sdma ring preemption */ | ||
966 | sdma_v4_0_ctx_switch_enable(adev, true); | ||
967 | 1148 | ||
968 | /* start the gfx rings and rlc compute queues */ | 1149 | r = amdgpu_ring_test_helper(ring); |
969 | r = sdma_v4_0_gfx_resume(adev); | 1150 | if (r) |
970 | if (r) | 1151 | return r; |
971 | return r; | 1152 | |
972 | r = sdma_v4_0_rlc_resume(adev); | 1153 | if (adev->sdma.has_page_queue) { |
1154 | struct amdgpu_ring *page = &adev->sdma.instance[i].page; | ||
1155 | |||
1156 | r = amdgpu_ring_test_helper(page); | ||
1157 | if (r) | ||
1158 | return r; | ||
1159 | |||
1160 | if (adev->mman.buffer_funcs_ring == page) | ||
1161 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | ||
1162 | } | ||
1163 | |||
1164 | if (adev->mman.buffer_funcs_ring == ring) | ||
1165 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | ||
1166 | } | ||
973 | 1167 | ||
974 | return r; | 1168 | return r; |
975 | } | 1169 | } |
@@ -993,21 +1187,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) | |||
993 | u64 gpu_addr; | 1187 | u64 gpu_addr; |
994 | 1188 | ||
995 | r = amdgpu_device_wb_get(adev, &index); | 1189 | r = amdgpu_device_wb_get(adev, &index); |
996 | if (r) { | 1190 | if (r) |
997 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | ||
998 | return r; | 1191 | return r; |
999 | } | ||
1000 | 1192 | ||
1001 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 1193 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
1002 | tmp = 0xCAFEDEAD; | 1194 | tmp = 0xCAFEDEAD; |
1003 | adev->wb.wb[index] = cpu_to_le32(tmp); | 1195 | adev->wb.wb[index] = cpu_to_le32(tmp); |
1004 | 1196 | ||
1005 | r = amdgpu_ring_alloc(ring, 5); | 1197 | r = amdgpu_ring_alloc(ring, 5); |
1006 | if (r) { | 1198 | if (r) |
1007 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 1199 | goto error_free_wb; |
1008 | amdgpu_device_wb_free(adev, index); | ||
1009 | return r; | ||
1010 | } | ||
1011 | 1200 | ||
1012 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 1201 | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
1013 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); | 1202 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); |
@@ -1024,15 +1213,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1024 | DRM_UDELAY(1); | 1213 | DRM_UDELAY(1); |
1025 | } | 1214 | } |
1026 | 1215 | ||
1027 | if (i < adev->usec_timeout) { | 1216 | if (i >= adev->usec_timeout) |
1028 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 1217 | r = -ETIMEDOUT; |
1029 | } else { | ||
1030 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
1031 | ring->idx, tmp); | ||
1032 | r = -EINVAL; | ||
1033 | } | ||
1034 | amdgpu_device_wb_free(adev, index); | ||
1035 | 1218 | ||
1219 | error_free_wb: | ||
1220 | amdgpu_device_wb_free(adev, index); | ||
1036 | return r; | 1221 | return r; |
1037 | } | 1222 | } |
1038 | 1223 | ||
@@ -1055,20 +1240,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1055 | u64 gpu_addr; | 1240 | u64 gpu_addr; |
1056 | 1241 | ||
1057 | r = amdgpu_device_wb_get(adev, &index); | 1242 | r = amdgpu_device_wb_get(adev, &index); |
1058 | if (r) { | 1243 | if (r) |
1059 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
1060 | return r; | 1244 | return r; |
1061 | } | ||
1062 | 1245 | ||
1063 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 1246 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
1064 | tmp = 0xCAFEDEAD; | 1247 | tmp = 0xCAFEDEAD; |
1065 | adev->wb.wb[index] = cpu_to_le32(tmp); | 1248 | adev->wb.wb[index] = cpu_to_le32(tmp); |
1066 | memset(&ib, 0, sizeof(ib)); | 1249 | memset(&ib, 0, sizeof(ib)); |
1067 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 1250 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
1068 | if (r) { | 1251 | if (r) |
1069 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
1070 | goto err0; | 1252 | goto err0; |
1071 | } | ||
1072 | 1253 | ||
1073 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | | 1254 | ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | |
1074 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); | 1255 | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); |
@@ -1087,21 +1268,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
1087 | 1268 | ||
1088 | r = dma_fence_wait_timeout(f, false, timeout); | 1269 | r = dma_fence_wait_timeout(f, false, timeout); |
1089 | if (r == 0) { | 1270 | if (r == 0) { |
1090 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
1091 | r = -ETIMEDOUT; | 1271 | r = -ETIMEDOUT; |
1092 | goto err1; | 1272 | goto err1; |
1093 | } else if (r < 0) { | 1273 | } else if (r < 0) { |
1094 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
1095 | goto err1; | 1274 | goto err1; |
1096 | } | 1275 | } |
1097 | tmp = le32_to_cpu(adev->wb.wb[index]); | 1276 | tmp = le32_to_cpu(adev->wb.wb[index]); |
1098 | if (tmp == 0xDEADBEEF) { | 1277 | if (tmp == 0xDEADBEEF) |
1099 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
1100 | r = 0; | 1278 | r = 0; |
1101 | } else { | 1279 | else |
1102 | DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); | ||
1103 | r = -EINVAL; | 1280 | r = -EINVAL; |
1104 | } | 1281 | |
1105 | err1: | 1282 | err1: |
1106 | amdgpu_ib_free(adev, &ib, NULL); | 1283 | amdgpu_ib_free(adev, &ib, NULL); |
1107 | dma_fence_put(f); | 1284 | dma_fence_put(f); |
@@ -1206,7 +1383,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, | |||
1206 | */ | 1383 | */ |
1207 | static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) | 1384 | static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) |
1208 | { | 1385 | { |
1209 | struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); | 1386 | struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); |
1210 | u32 pad_count; | 1387 | u32 pad_count; |
1211 | int i; | 1388 | int i; |
1212 | 1389 | ||
@@ -1276,10 +1453,18 @@ static int sdma_v4_0_early_init(void *handle) | |||
1276 | { | 1453 | { |
1277 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1454 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1278 | 1455 | ||
1279 | if (adev->asic_type == CHIP_RAVEN) | 1456 | if (adev->asic_type == CHIP_RAVEN) { |
1280 | adev->sdma.num_instances = 1; | 1457 | adev->sdma.num_instances = 1; |
1281 | else | 1458 | adev->sdma.has_page_queue = false; |
1459 | } else { | ||
1282 | adev->sdma.num_instances = 2; | 1460 | adev->sdma.num_instances = 2; |
1461 | /* TODO: Page queue breaks driver reload under SRIOV */ | ||
1462 | if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev))) | ||
1463 | adev->sdma.has_page_queue = false; | ||
1464 | else if (adev->asic_type != CHIP_VEGA20 && | ||
1465 | adev->asic_type != CHIP_VEGA12) | ||
1466 | adev->sdma.has_page_queue = true; | ||
1467 | } | ||
1283 | 1468 | ||
1284 | sdma_v4_0_set_ring_funcs(adev); | 1469 | sdma_v4_0_set_ring_funcs(adev); |
1285 | sdma_v4_0_set_buffer_funcs(adev); | 1470 | sdma_v4_0_set_buffer_funcs(adev); |
@@ -1340,6 +1525,21 @@ static int sdma_v4_0_sw_init(void *handle) | |||
1340 | AMDGPU_SDMA_IRQ_TRAP1); | 1525 | AMDGPU_SDMA_IRQ_TRAP1); |
1341 | if (r) | 1526 | if (r) |
1342 | return r; | 1527 | return r; |
1528 | |||
1529 | if (adev->sdma.has_page_queue) { | ||
1530 | ring = &adev->sdma.instance[i].page; | ||
1531 | ring->ring_obj = NULL; | ||
1532 | ring->use_doorbell = false; | ||
1533 | |||
1534 | sprintf(ring->name, "page%d", i); | ||
1535 | r = amdgpu_ring_init(adev, ring, 1024, | ||
1536 | &adev->sdma.trap_irq, | ||
1537 | (i == 0) ? | ||
1538 | AMDGPU_SDMA_IRQ_TRAP0 : | ||
1539 | AMDGPU_SDMA_IRQ_TRAP1); | ||
1540 | if (r) | ||
1541 | return r; | ||
1542 | } | ||
1343 | } | 1543 | } |
1344 | 1544 | ||
1345 | return r; | 1545 | return r; |
@@ -1350,8 +1550,11 @@ static int sdma_v4_0_sw_fini(void *handle) | |||
1350 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1550 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1351 | int i; | 1551 | int i; |
1352 | 1552 | ||
1353 | for (i = 0; i < adev->sdma.num_instances; i++) | 1553 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1354 | amdgpu_ring_fini(&adev->sdma.instance[i].ring); | 1554 | amdgpu_ring_fini(&adev->sdma.instance[i].ring); |
1555 | if (adev->sdma.has_page_queue) | ||
1556 | amdgpu_ring_fini(&adev->sdma.instance[i].page); | ||
1557 | } | ||
1355 | 1558 | ||
1356 | for (i = 0; i < adev->sdma.num_instances; i++) { | 1559 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1357 | release_firmware(adev->sdma.instance[i].fw); | 1560 | release_firmware(adev->sdma.instance[i].fw); |
@@ -1414,7 +1617,7 @@ static bool sdma_v4_0_is_idle(void *handle) | |||
1414 | u32 i; | 1617 | u32 i; |
1415 | 1618 | ||
1416 | for (i = 0; i < adev->sdma.num_instances; i++) { | 1619 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1417 | u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); | 1620 | u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG); |
1418 | 1621 | ||
1419 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) | 1622 | if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) |
1420 | return false; | 1623 | return false; |
@@ -1430,8 +1633,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) | |||
1430 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 1633 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
1431 | 1634 | ||
1432 | for (i = 0; i < adev->usec_timeout; i++) { | 1635 | for (i = 0; i < adev->usec_timeout; i++) { |
1433 | sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); | 1636 | sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); |
1434 | sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); | 1637 | sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); |
1435 | 1638 | ||
1436 | if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) | 1639 | if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) |
1437 | return 0; | 1640 | return 0; |
@@ -1452,16 +1655,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, | |||
1452 | unsigned type, | 1655 | unsigned type, |
1453 | enum amdgpu_interrupt_state state) | 1656 | enum amdgpu_interrupt_state state) |
1454 | { | 1657 | { |
1658 | unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; | ||
1455 | u32 sdma_cntl; | 1659 | u32 sdma_cntl; |
1456 | 1660 | ||
1457 | u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? | 1661 | sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); |
1458 | sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : | ||
1459 | sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); | ||
1460 | |||
1461 | sdma_cntl = RREG32(reg_offset); | ||
1462 | sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, | 1662 | sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, |
1463 | state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); | 1663 | state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); |
1464 | WREG32(reg_offset, sdma_cntl); | 1664 | WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); |
1465 | 1665 | ||
1466 | return 0; | 1666 | return 0; |
1467 | } | 1667 | } |
@@ -1470,39 +1670,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, | |||
1470 | struct amdgpu_irq_src *source, | 1670 | struct amdgpu_irq_src *source, |
1471 | struct amdgpu_iv_entry *entry) | 1671 | struct amdgpu_iv_entry *entry) |
1472 | { | 1672 | { |
1673 | uint32_t instance; | ||
1674 | |||
1473 | DRM_DEBUG("IH: SDMA trap\n"); | 1675 | DRM_DEBUG("IH: SDMA trap\n"); |
1474 | switch (entry->client_id) { | 1676 | switch (entry->client_id) { |
1475 | case SOC15_IH_CLIENTID_SDMA0: | 1677 | case SOC15_IH_CLIENTID_SDMA0: |
1476 | switch (entry->ring_id) { | 1678 | instance = 0; |
1477 | case 0: | ||
1478 | amdgpu_fence_process(&adev->sdma.instance[0].ring); | ||
1479 | break; | ||
1480 | case 1: | ||
1481 | /* XXX compute */ | ||
1482 | break; | ||
1483 | case 2: | ||
1484 | /* XXX compute */ | ||
1485 | break; | ||
1486 | case 3: | ||
1487 | /* XXX page queue*/ | ||
1488 | break; | ||
1489 | } | ||
1490 | break; | 1679 | break; |
1491 | case SOC15_IH_CLIENTID_SDMA1: | 1680 | case SOC15_IH_CLIENTID_SDMA1: |
1492 | switch (entry->ring_id) { | 1681 | instance = 1; |
1493 | case 0: | 1682 | break; |
1494 | amdgpu_fence_process(&adev->sdma.instance[1].ring); | 1683 | default: |
1495 | break; | 1684 | return 0; |
1496 | case 1: | 1685 | } |
1497 | /* XXX compute */ | 1686 | |
1498 | break; | 1687 | switch (entry->ring_id) { |
1499 | case 2: | 1688 | case 0: |
1500 | /* XXX compute */ | 1689 | amdgpu_fence_process(&adev->sdma.instance[instance].ring); |
1501 | break; | 1690 | break; |
1502 | case 3: | 1691 | case 1: |
1503 | /* XXX page queue*/ | 1692 | /* XXX compute */ |
1504 | break; | 1693 | break; |
1505 | } | 1694 | case 2: |
1695 | /* XXX compute */ | ||
1696 | break; | ||
1697 | case 3: | ||
1698 | amdgpu_fence_process(&adev->sdma.instance[instance].page); | ||
1506 | break; | 1699 | break; |
1507 | } | 1700 | } |
1508 | return 0; | 1701 | return 0; |
@@ -1512,12 +1705,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, | |||
1512 | struct amdgpu_irq_src *source, | 1705 | struct amdgpu_irq_src *source, |
1513 | struct amdgpu_iv_entry *entry) | 1706 | struct amdgpu_iv_entry *entry) |
1514 | { | 1707 | { |
1708 | int instance; | ||
1709 | |||
1515 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); | 1710 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); |
1516 | schedule_work(&adev->reset_work); | 1711 | |
1712 | switch (entry->client_id) { | ||
1713 | case SOC15_IH_CLIENTID_SDMA0: | ||
1714 | instance = 0; | ||
1715 | break; | ||
1716 | case SOC15_IH_CLIENTID_SDMA1: | ||
1717 | instance = 1; | ||
1718 | break; | ||
1719 | default: | ||
1720 | return 0; | ||
1721 | } | ||
1722 | |||
1723 | switch (entry->ring_id) { | ||
1724 | case 0: | ||
1725 | drm_sched_fault(&adev->sdma.instance[instance].ring.sched); | ||
1726 | break; | ||
1727 | } | ||
1517 | return 0; | 1728 | return 0; |
1518 | } | 1729 | } |
1519 | 1730 | ||
1520 | |||
1521 | static void sdma_v4_0_update_medium_grain_clock_gating( | 1731 | static void sdma_v4_0_update_medium_grain_clock_gating( |
1522 | struct amdgpu_device *adev, | 1732 | struct amdgpu_device *adev, |
1523 | bool enable) | 1733 | bool enable) |
@@ -1730,6 +1940,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { | |||
1730 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | 1940 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, |
1731 | }; | 1941 | }; |
1732 | 1942 | ||
1943 | static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { | ||
1944 | .type = AMDGPU_RING_TYPE_SDMA, | ||
1945 | .align_mask = 0xf, | ||
1946 | .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), | ||
1947 | .support_64bit_ptrs = true, | ||
1948 | .vmhub = AMDGPU_MMHUB, | ||
1949 | .get_rptr = sdma_v4_0_ring_get_rptr, | ||
1950 | .get_wptr = sdma_v4_0_page_ring_get_wptr, | ||
1951 | .set_wptr = sdma_v4_0_page_ring_set_wptr, | ||
1952 | .emit_frame_size = | ||
1953 | 6 + /* sdma_v4_0_ring_emit_hdp_flush */ | ||
1954 | 3 + /* hdp invalidate */ | ||
1955 | 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ | ||
1956 | /* sdma_v4_0_ring_emit_vm_flush */ | ||
1957 | SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + | ||
1958 | SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + | ||
1959 | 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ | ||
1960 | .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ | ||
1961 | .emit_ib = sdma_v4_0_ring_emit_ib, | ||
1962 | .emit_fence = sdma_v4_0_ring_emit_fence, | ||
1963 | .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, | ||
1964 | .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, | ||
1965 | .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, | ||
1966 | .test_ring = sdma_v4_0_ring_test_ring, | ||
1967 | .test_ib = sdma_v4_0_ring_test_ib, | ||
1968 | .insert_nop = sdma_v4_0_ring_insert_nop, | ||
1969 | .pad_ib = sdma_v4_0_ring_pad_ib, | ||
1970 | .emit_wreg = sdma_v4_0_ring_emit_wreg, | ||
1971 | .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, | ||
1972 | .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, | ||
1973 | }; | ||
1974 | |||
1733 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) | 1975 | static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) |
1734 | { | 1976 | { |
1735 | int i; | 1977 | int i; |
@@ -1737,6 +1979,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) | |||
1737 | for (i = 0; i < adev->sdma.num_instances; i++) { | 1979 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1738 | adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; | 1980 | adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; |
1739 | adev->sdma.instance[i].ring.me = i; | 1981 | adev->sdma.instance[i].ring.me = i; |
1982 | if (adev->sdma.has_page_queue) { | ||
1983 | adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; | ||
1984 | adev->sdma.instance[i].page.me = i; | ||
1985 | } | ||
1740 | } | 1986 | } |
1741 | } | 1987 | } |
1742 | 1988 | ||
@@ -1818,7 +2064,10 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { | |||
1818 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) | 2064 | static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) |
1819 | { | 2065 | { |
1820 | adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; | 2066 | adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; |
1821 | adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; | 2067 | if (adev->sdma.has_page_queue) |
2068 | adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; | ||
2069 | else | ||
2070 | adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; | ||
1822 | } | 2071 | } |
1823 | 2072 | ||
1824 | static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { | 2073 | static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { |
@@ -1836,7 +2085,10 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) | |||
1836 | 2085 | ||
1837 | adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; | 2086 | adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; |
1838 | for (i = 0; i < adev->sdma.num_instances; i++) { | 2087 | for (i = 0; i < adev->sdma.num_instances; i++) { |
1839 | sched = &adev->sdma.instance[i].ring.sched; | 2088 | if (adev->sdma.has_page_queue) |
2089 | sched = &adev->sdma.instance[i].page.sched; | ||
2090 | else | ||
2091 | sched = &adev->sdma.instance[i].ring.sched; | ||
1840 | adev->vm_manager.vm_pte_rqs[i] = | 2092 | adev->vm_manager.vm_pte_rqs[i] = |
1841 | &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; | 2093 | &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; |
1842 | } | 2094 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index adbaea6da0d7..b6e473134e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c | |||
@@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, | 63 | static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, |
64 | struct amdgpu_job *job, | ||
64 | struct amdgpu_ib *ib, | 65 | struct amdgpu_ib *ib, |
65 | unsigned vmid, bool ctx_switch) | 66 | bool ctx_switch) |
66 | { | 67 | { |
68 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
67 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. | 69 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
68 | * Pad as necessary with NOPs. | 70 | * Pad as necessary with NOPs. |
69 | */ | 71 | */ |
@@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev) | |||
122 | 124 | ||
123 | if (adev->mman.buffer_funcs_ring == ring) | 125 | if (adev->mman.buffer_funcs_ring == ring) |
124 | amdgpu_ttm_set_buffer_funcs_status(adev, false); | 126 | amdgpu_ttm_set_buffer_funcs_status(adev, false); |
125 | ring->ready = false; | 127 | ring->sched.ready = false; |
126 | } | 128 | } |
127 | } | 129 | } |
128 | 130 | ||
@@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev) | |||
175 | WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); | 177 | WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); |
176 | WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); | 178 | WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); |
177 | 179 | ||
178 | ring->ready = true; | 180 | ring->sched.ready = true; |
179 | 181 | ||
180 | r = amdgpu_ring_test_ring(ring); | 182 | r = amdgpu_ring_test_helper(ring); |
181 | if (r) { | 183 | if (r) |
182 | ring->ready = false; | ||
183 | return r; | 184 | return r; |
184 | } | ||
185 | 185 | ||
186 | if (adev->mman.buffer_funcs_ring == ring) | 186 | if (adev->mman.buffer_funcs_ring == ring) |
187 | amdgpu_ttm_set_buffer_funcs_status(adev, true); | 187 | amdgpu_ttm_set_buffer_funcs_status(adev, true); |
@@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) | |||
209 | u64 gpu_addr; | 209 | u64 gpu_addr; |
210 | 210 | ||
211 | r = amdgpu_device_wb_get(adev, &index); | 211 | r = amdgpu_device_wb_get(adev, &index); |
212 | if (r) { | 212 | if (r) |
213 | dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); | ||
214 | return r; | 213 | return r; |
215 | } | ||
216 | 214 | ||
217 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 215 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
218 | tmp = 0xCAFEDEAD; | 216 | tmp = 0xCAFEDEAD; |
219 | adev->wb.wb[index] = cpu_to_le32(tmp); | 217 | adev->wb.wb[index] = cpu_to_le32(tmp); |
220 | 218 | ||
221 | r = amdgpu_ring_alloc(ring, 4); | 219 | r = amdgpu_ring_alloc(ring, 4); |
222 | if (r) { | 220 | if (r) |
223 | DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); | 221 | goto error_free_wb; |
224 | amdgpu_device_wb_free(adev, index); | ||
225 | return r; | ||
226 | } | ||
227 | 222 | ||
228 | amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); | 223 | amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); |
229 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); | 224 | amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); |
@@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) | |||
238 | DRM_UDELAY(1); | 233 | DRM_UDELAY(1); |
239 | } | 234 | } |
240 | 235 | ||
241 | if (i < adev->usec_timeout) { | 236 | if (i >= adev->usec_timeout) |
242 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); | 237 | r = -ETIMEDOUT; |
243 | } else { | ||
244 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
245 | ring->idx, tmp); | ||
246 | r = -EINVAL; | ||
247 | } | ||
248 | amdgpu_device_wb_free(adev, index); | ||
249 | 238 | ||
239 | error_free_wb: | ||
240 | amdgpu_device_wb_free(adev, index); | ||
250 | return r; | 241 | return r; |
251 | } | 242 | } |
252 | 243 | ||
@@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
269 | long r; | 260 | long r; |
270 | 261 | ||
271 | r = amdgpu_device_wb_get(adev, &index); | 262 | r = amdgpu_device_wb_get(adev, &index); |
272 | if (r) { | 263 | if (r) |
273 | dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); | ||
274 | return r; | 264 | return r; |
275 | } | ||
276 | 265 | ||
277 | gpu_addr = adev->wb.gpu_addr + (index * 4); | 266 | gpu_addr = adev->wb.gpu_addr + (index * 4); |
278 | tmp = 0xCAFEDEAD; | 267 | tmp = 0xCAFEDEAD; |
279 | adev->wb.wb[index] = cpu_to_le32(tmp); | 268 | adev->wb.wb[index] = cpu_to_le32(tmp); |
280 | memset(&ib, 0, sizeof(ib)); | 269 | memset(&ib, 0, sizeof(ib)); |
281 | r = amdgpu_ib_get(adev, NULL, 256, &ib); | 270 | r = amdgpu_ib_get(adev, NULL, 256, &ib); |
282 | if (r) { | 271 | if (r) |
283 | DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); | ||
284 | goto err0; | 272 | goto err0; |
285 | } | ||
286 | 273 | ||
287 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); | 274 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); |
288 | ib.ptr[1] = lower_32_bits(gpu_addr); | 275 | ib.ptr[1] = lower_32_bits(gpu_addr); |
@@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
295 | 282 | ||
296 | r = dma_fence_wait_timeout(f, false, timeout); | 283 | r = dma_fence_wait_timeout(f, false, timeout); |
297 | if (r == 0) { | 284 | if (r == 0) { |
298 | DRM_ERROR("amdgpu: IB test timed out\n"); | ||
299 | r = -ETIMEDOUT; | 285 | r = -ETIMEDOUT; |
300 | goto err1; | 286 | goto err1; |
301 | } else if (r < 0) { | 287 | } else if (r < 0) { |
302 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
303 | goto err1; | 288 | goto err1; |
304 | } | 289 | } |
305 | tmp = le32_to_cpu(adev->wb.wb[index]); | 290 | tmp = le32_to_cpu(adev->wb.wb[index]); |
306 | if (tmp == 0xDEADBEEF) { | 291 | if (tmp == 0xDEADBEEF) |
307 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
308 | r = 0; | 292 | r = 0; |
309 | } else { | 293 | else |
310 | DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); | ||
311 | r = -EINVAL; | 294 | r = -EINVAL; |
312 | } | ||
313 | 295 | ||
314 | err1: | 296 | err1: |
315 | amdgpu_ib_free(adev, &ib, NULL); | 297 | amdgpu_ib_free(adev, &ib, NULL); |
@@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, | |||
658 | return 0; | 640 | return 0; |
659 | } | 641 | } |
660 | 642 | ||
661 | static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, | ||
662 | struct amdgpu_irq_src *source, | ||
663 | struct amdgpu_iv_entry *entry) | ||
664 | { | ||
665 | DRM_ERROR("Illegal instruction in SDMA command stream\n"); | ||
666 | schedule_work(&adev->reset_work); | ||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | static int si_dma_set_clockgating_state(void *handle, | 643 | static int si_dma_set_clockgating_state(void *handle, |
671 | enum amd_clockgating_state state) | 644 | enum amd_clockgating_state state) |
672 | { | 645 | { |
@@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { | |||
781 | .process = si_dma_process_trap_irq, | 754 | .process = si_dma_process_trap_irq, |
782 | }; | 755 | }; |
783 | 756 | ||
784 | static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { | ||
785 | .process = si_dma_process_illegal_inst_irq, | ||
786 | }; | ||
787 | |||
788 | static void si_dma_set_irq_funcs(struct amdgpu_device *adev) | 757 | static void si_dma_set_irq_funcs(struct amdgpu_device *adev) |
789 | { | 758 | { |
790 | adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; | 759 | adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; |
791 | adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; | 760 | adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; |
792 | adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; | ||
793 | } | 761 | } |
794 | 762 | ||
795 | /** | 763 | /** |
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h new file mode 100644 index 000000000000..ac2c27b7630c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * Copyright 2018 Advanced Micro Devices, Inc. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
20 | * OTHER DEALINGS IN THE SOFTWARE. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _TA_XGMI_IF_H | ||
25 | #define _TA_XGMI_IF_H | ||
26 | |||
27 | /* Responses have bit 31 set */ | ||
28 | #define RSP_ID_MASK (1U << 31) | ||
29 | #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) | ||
30 | |||
31 | enum ta_command_xgmi { | ||
32 | TA_COMMAND_XGMI__INITIALIZE = 0x00, | ||
33 | TA_COMMAND_XGMI__GET_NODE_ID = 0x01, | ||
34 | TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, | ||
35 | TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, | ||
36 | TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 | ||
37 | }; | ||
38 | |||
39 | /* XGMI related enumerations */ | ||
40 | /**********************************************************/; | ||
41 | enum ta_xgmi_connected_nodes { | ||
42 | TA_XGMI__MAX_CONNECTED_NODES = 64 | ||
43 | }; | ||
44 | |||
45 | enum ta_xgmi_status { | ||
46 | TA_XGMI_STATUS__SUCCESS = 0x00, | ||
47 | TA_XGMI_STATUS__GENERIC_FAILURE = 0x01, | ||
48 | TA_XGMI_STATUS__NULL_POINTER = 0x02, | ||
49 | TA_XGMI_STATUS__INVALID_PARAMETER = 0x03, | ||
50 | TA_XGMI_STATUS__NOT_INITIALIZED = 0x04, | ||
51 | TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05, | ||
52 | TA_XGMI_STATUS__INVALID_NODE_ID = 0x06, | ||
53 | TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07, | ||
54 | TA_XGMI_STATUS__FAILED_ID_GEN = 0x08, | ||
55 | TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09, | ||
56 | TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A | ||
57 | }; | ||
58 | |||
59 | enum ta_xgmi_assigned_sdma_engine { | ||
60 | TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1, | ||
61 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0, | ||
62 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1, | ||
63 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2, | ||
64 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3, | ||
65 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4, | ||
66 | TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5 | ||
67 | }; | ||
68 | |||
69 | /* input/output structures for XGMI commands */ | ||
70 | /**********************************************************/ | ||
71 | struct ta_xgmi_node_info { | ||
72 | uint64_t node_id; | ||
73 | uint8_t num_hops; | ||
74 | uint8_t is_sharing_enabled; | ||
75 | enum ta_xgmi_assigned_sdma_engine sdma_engine; | ||
76 | }; | ||
77 | |||
78 | struct ta_xgmi_cmd_initialize_output { | ||
79 | uint32_t status; | ||
80 | }; | ||
81 | |||
82 | struct ta_xgmi_cmd_get_node_id_output { | ||
83 | uint64_t node_id; | ||
84 | }; | ||
85 | |||
86 | struct ta_xgmi_cmd_get_hive_id_output { | ||
87 | uint64_t hive_id; | ||
88 | }; | ||
89 | |||
90 | struct ta_xgmi_cmd_get_topology_info_input { | ||
91 | uint32_t num_nodes; | ||
92 | struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; | ||
93 | }; | ||
94 | |||
95 | struct ta_xgmi_cmd_get_topology_info_output { | ||
96 | uint32_t num_nodes; | ||
97 | struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; | ||
98 | }; | ||
99 | |||
100 | struct ta_xgmi_cmd_set_topology_info_input { | ||
101 | uint32_t num_nodes; | ||
102 | struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; | ||
103 | }; | ||
104 | |||
105 | /**********************************************************/ | ||
106 | /* Common input structure for XGMI callbacks */ | ||
107 | union ta_xgmi_cmd_input { | ||
108 | struct ta_xgmi_cmd_get_topology_info_input get_topology_info; | ||
109 | struct ta_xgmi_cmd_set_topology_info_input set_topology_info; | ||
110 | }; | ||
111 | |||
112 | /* Common output structure for XGMI callbacks */ | ||
113 | union ta_xgmi_cmd_output { | ||
114 | struct ta_xgmi_cmd_initialize_output initialize; | ||
115 | struct ta_xgmi_cmd_get_node_id_output get_node_id; | ||
116 | struct ta_xgmi_cmd_get_hive_id_output get_hive_id; | ||
117 | struct ta_xgmi_cmd_get_topology_info_output get_topology_info; | ||
118 | }; | ||
119 | /**********************************************************/ | ||
120 | |||
121 | struct ta_xgmi_shared_memory { | ||
122 | uint32_t cmd_id; | ||
123 | uint32_t resp_id; | ||
124 | enum ta_xgmi_status xgmi_status; | ||
125 | uint32_t reserved; | ||
126 | union ta_xgmi_cmd_input xgmi_in_message; | ||
127 | union ta_xgmi_cmd_output xgmi_out_message; | ||
128 | }; | ||
129 | |||
130 | #endif //_TA_XGMI_IF_H | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1fc17bf39fed..90bbcee00f28 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | |||
@@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle) | |||
162 | uvd_v4_2_enable_mgcg(adev, true); | 162 | uvd_v4_2_enable_mgcg(adev, true); |
163 | amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); | 163 | amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); |
164 | 164 | ||
165 | ring->ready = true; | 165 | r = amdgpu_ring_test_helper(ring); |
166 | r = amdgpu_ring_test_ring(ring); | 166 | if (r) |
167 | if (r) { | ||
168 | ring->ready = false; | ||
169 | goto done; | 167 | goto done; |
170 | } | ||
171 | 168 | ||
172 | r = amdgpu_ring_alloc(ring, 10); | 169 | r = amdgpu_ring_alloc(ring, 10); |
173 | if (r) { | 170 | if (r) { |
@@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle) | |||
218 | if (RREG32(mmUVD_STATUS) != 0) | 215 | if (RREG32(mmUVD_STATUS) != 0) |
219 | uvd_v4_2_stop(adev); | 216 | uvd_v4_2_stop(adev); |
220 | 217 | ||
221 | ring->ready = false; | 218 | ring->sched.ready = false; |
222 | 219 | ||
223 | return 0; | 220 | return 0; |
224 | } | 221 | } |
@@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) | |||
484 | 481 | ||
485 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 482 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
486 | r = amdgpu_ring_alloc(ring, 3); | 483 | r = amdgpu_ring_alloc(ring, 3); |
487 | if (r) { | 484 | if (r) |
488 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
489 | ring->idx, r); | ||
490 | return r; | 485 | return r; |
491 | } | 486 | |
492 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 487 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
493 | amdgpu_ring_write(ring, 0xDEADBEEF); | 488 | amdgpu_ring_write(ring, 0xDEADBEEF); |
494 | amdgpu_ring_commit(ring); | 489 | amdgpu_ring_commit(ring); |
@@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) | |||
499 | DRM_UDELAY(1); | 494 | DRM_UDELAY(1); |
500 | } | 495 | } |
501 | 496 | ||
502 | if (i < adev->usec_timeout) { | 497 | if (i >= adev->usec_timeout) |
503 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 498 | r = -ETIMEDOUT; |
504 | ring->idx, i); | 499 | |
505 | } else { | ||
506 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
507 | ring->idx, tmp); | ||
508 | r = -EINVAL; | ||
509 | } | ||
510 | return r; | 500 | return r; |
511 | } | 501 | } |
512 | 502 | ||
@@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) | |||
519 | * Write ring commands to execute the indirect buffer | 509 | * Write ring commands to execute the indirect buffer |
520 | */ | 510 | */ |
521 | static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, | 511 | static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, |
512 | struct amdgpu_job *job, | ||
522 | struct amdgpu_ib *ib, | 513 | struct amdgpu_ib *ib, |
523 | unsigned vmid, bool ctx_switch) | 514 | bool ctx_switch) |
524 | { | 515 | { |
525 | amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); | 516 | amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); |
526 | amdgpu_ring_write(ring, ib->gpu_addr); | 517 | amdgpu_ring_write(ring, ib->gpu_addr); |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index fde6ad5ac9ab..1c5e12703103 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | |||
@@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle) | |||
158 | uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); | 158 | uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); |
159 | uvd_v5_0_enable_mgcg(adev, true); | 159 | uvd_v5_0_enable_mgcg(adev, true); |
160 | 160 | ||
161 | ring->ready = true; | 161 | r = amdgpu_ring_test_helper(ring); |
162 | r = amdgpu_ring_test_ring(ring); | 162 | if (r) |
163 | if (r) { | ||
164 | ring->ready = false; | ||
165 | goto done; | 163 | goto done; |
166 | } | ||
167 | 164 | ||
168 | r = amdgpu_ring_alloc(ring, 10); | 165 | r = amdgpu_ring_alloc(ring, 10); |
169 | if (r) { | 166 | if (r) { |
@@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle) | |||
215 | if (RREG32(mmUVD_STATUS) != 0) | 212 | if (RREG32(mmUVD_STATUS) != 0) |
216 | uvd_v5_0_stop(adev); | 213 | uvd_v5_0_stop(adev); |
217 | 214 | ||
218 | ring->ready = false; | 215 | ring->sched.ready = false; |
219 | 216 | ||
220 | return 0; | 217 | return 0; |
221 | } | 218 | } |
@@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) | |||
500 | 497 | ||
501 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 498 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
502 | r = amdgpu_ring_alloc(ring, 3); | 499 | r = amdgpu_ring_alloc(ring, 3); |
503 | if (r) { | 500 | if (r) |
504 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
505 | ring->idx, r); | ||
506 | return r; | 501 | return r; |
507 | } | ||
508 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 502 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
509 | amdgpu_ring_write(ring, 0xDEADBEEF); | 503 | amdgpu_ring_write(ring, 0xDEADBEEF); |
510 | amdgpu_ring_commit(ring); | 504 | amdgpu_ring_commit(ring); |
@@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) | |||
515 | DRM_UDELAY(1); | 509 | DRM_UDELAY(1); |
516 | } | 510 | } |
517 | 511 | ||
518 | if (i < adev->usec_timeout) { | 512 | if (i >= adev->usec_timeout) |
519 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 513 | r = -ETIMEDOUT; |
520 | ring->idx, i); | 514 | |
521 | } else { | ||
522 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
523 | ring->idx, tmp); | ||
524 | r = -EINVAL; | ||
525 | } | ||
526 | return r; | 515 | return r; |
527 | } | 516 | } |
528 | 517 | ||
@@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) | |||
535 | * Write ring commands to execute the indirect buffer | 524 | * Write ring commands to execute the indirect buffer |
536 | */ | 525 | */ |
537 | static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, | 526 | static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, |
527 | struct amdgpu_job *job, | ||
538 | struct amdgpu_ib *ib, | 528 | struct amdgpu_ib *ib, |
539 | unsigned vmid, bool ctx_switch) | 529 | bool ctx_switch) |
540 | { | 530 | { |
541 | amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); | 531 | amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); |
542 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 532 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 7a5b40275e8e..f184842ef2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | |||
@@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
175 | int r; | 175 | int r; |
176 | 176 | ||
177 | r = amdgpu_ring_alloc(ring, 16); | 177 | r = amdgpu_ring_alloc(ring, 16); |
178 | if (r) { | 178 | if (r) |
179 | DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", | ||
180 | ring->idx, r); | ||
181 | return r; | 179 | return r; |
182 | } | ||
183 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); | 180 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); |
184 | amdgpu_ring_commit(ring); | 181 | amdgpu_ring_commit(ring); |
185 | 182 | ||
@@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
189 | DRM_UDELAY(1); | 186 | DRM_UDELAY(1); |
190 | } | 187 | } |
191 | 188 | ||
192 | if (i < adev->usec_timeout) { | 189 | if (i >= adev->usec_timeout) |
193 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | ||
194 | ring->idx, i); | ||
195 | } else { | ||
196 | DRM_ERROR("amdgpu: ring %d test failed\n", | ||
197 | ring->idx); | ||
198 | r = -ETIMEDOUT; | 190 | r = -ETIMEDOUT; |
199 | } | ||
200 | 191 | ||
201 | return r; | 192 | return r; |
202 | } | 193 | } |
@@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
336 | long r; | 327 | long r; |
337 | 328 | ||
338 | r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); | 329 | r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); |
339 | if (r) { | 330 | if (r) |
340 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); | ||
341 | goto error; | 331 | goto error; |
342 | } | ||
343 | 332 | ||
344 | r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); | 333 | r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); |
345 | if (r) { | 334 | if (r) |
346 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); | ||
347 | goto error; | 335 | goto error; |
348 | } | ||
349 | 336 | ||
350 | r = dma_fence_wait_timeout(fence, false, timeout); | 337 | r = dma_fence_wait_timeout(fence, false, timeout); |
351 | if (r == 0) { | 338 | if (r == 0) |
352 | DRM_ERROR("amdgpu: IB test timed out.\n"); | ||
353 | r = -ETIMEDOUT; | 339 | r = -ETIMEDOUT; |
354 | } else if (r < 0) { | 340 | else if (r > 0) |
355 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | ||
356 | } else { | ||
357 | DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); | ||
358 | r = 0; | 341 | r = 0; |
359 | } | 342 | |
360 | error: | 343 | error: |
361 | dma_fence_put(fence); | 344 | dma_fence_put(fence); |
362 | return r; | 345 | return r; |
363 | } | 346 | } |
347 | |||
364 | static int uvd_v6_0_early_init(void *handle) | 348 | static int uvd_v6_0_early_init(void *handle) |
365 | { | 349 | { |
366 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; | 350 | struct amdgpu_device *adev = (struct amdgpu_device *)handle; |
@@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle) | |||
476 | uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); | 460 | uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); |
477 | uvd_v6_0_enable_mgcg(adev, true); | 461 | uvd_v6_0_enable_mgcg(adev, true); |
478 | 462 | ||
479 | ring->ready = true; | 463 | r = amdgpu_ring_test_helper(ring); |
480 | r = amdgpu_ring_test_ring(ring); | 464 | if (r) |
481 | if (r) { | ||
482 | ring->ready = false; | ||
483 | goto done; | 465 | goto done; |
484 | } | ||
485 | 466 | ||
486 | r = amdgpu_ring_alloc(ring, 10); | 467 | r = amdgpu_ring_alloc(ring, 10); |
487 | if (r) { | 468 | if (r) { |
@@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle) | |||
513 | if (uvd_v6_0_enc_support(adev)) { | 494 | if (uvd_v6_0_enc_support(adev)) { |
514 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 495 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
515 | ring = &adev->uvd.inst->ring_enc[i]; | 496 | ring = &adev->uvd.inst->ring_enc[i]; |
516 | ring->ready = true; | 497 | r = amdgpu_ring_test_helper(ring); |
517 | r = amdgpu_ring_test_ring(ring); | 498 | if (r) |
518 | if (r) { | ||
519 | ring->ready = false; | ||
520 | goto done; | 499 | goto done; |
521 | } | ||
522 | } | 500 | } |
523 | } | 501 | } |
524 | 502 | ||
@@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle) | |||
548 | if (RREG32(mmUVD_STATUS) != 0) | 526 | if (RREG32(mmUVD_STATUS) != 0) |
549 | uvd_v6_0_stop(adev); | 527 | uvd_v6_0_stop(adev); |
550 | 528 | ||
551 | ring->ready = false; | 529 | ring->sched.ready = false; |
552 | 530 | ||
553 | return 0; | 531 | return 0; |
554 | } | 532 | } |
@@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
969 | 947 | ||
970 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 948 | WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
971 | r = amdgpu_ring_alloc(ring, 3); | 949 | r = amdgpu_ring_alloc(ring, 3); |
972 | if (r) { | 950 | if (r) |
973 | DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", | ||
974 | ring->idx, r); | ||
975 | return r; | 951 | return r; |
976 | } | 952 | |
977 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); | 953 | amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); |
978 | amdgpu_ring_write(ring, 0xDEADBEEF); | 954 | amdgpu_ring_write(ring, 0xDEADBEEF); |
979 | amdgpu_ring_commit(ring); | 955 | amdgpu_ring_commit(ring); |
@@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
984 | DRM_UDELAY(1); | 960 | DRM_UDELAY(1); |
985 | } | 961 | } |
986 | 962 | ||
987 | if (i < adev->usec_timeout) { | 963 | if (i >= adev->usec_timeout) |
988 | DRM_DEBUG("ring test on %d succeeded in %d usecs\n", | 964 | r = -ETIMEDOUT; |
989 | ring->idx, i); | 965 | |
990 | } else { | ||
991 | DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", | ||
992 | ring->idx, tmp); | ||
993 | r = -EINVAL; | ||
994 | } | ||
995 | return r; | 966 | return r; |
996 | } | 967 | } |
997 | 968 | ||
@@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1004 | * Write ring commands to execute the indirect buffer | 975 | * Write ring commands to execute the indirect buffer |
1005 | */ | 976 | */ |
1006 | static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, | 977 | static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, |
978 | struct amdgpu_job *job, | ||
1007 | struct amdgpu_ib *ib, | 979 | struct amdgpu_ib *ib, |
1008 | unsigned vmid, bool ctx_switch) | 980 | bool ctx_switch) |
1009 | { | 981 | { |
982 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
983 | |||
1010 | amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); | 984 | amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); |
1011 | amdgpu_ring_write(ring, vmid); | 985 | amdgpu_ring_write(ring, vmid); |
1012 | 986 | ||
@@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, | |||
1027 | * Write enc ring commands to execute the indirect buffer | 1001 | * Write enc ring commands to execute the indirect buffer |
1028 | */ | 1002 | */ |
1029 | static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, | 1003 | static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, |
1030 | struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) | 1004 | struct amdgpu_job *job, |
1005 | struct amdgpu_ib *ib, | ||
1006 | bool ctx_switch) | ||
1031 | { | 1007 | { |
1008 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1009 | |||
1032 | amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); | 1010 | amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); |
1033 | amdgpu_ring_write(ring, vmid); | 1011 | amdgpu_ring_write(ring, vmid); |
1034 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 1012 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 58b39afcfb86..8a4595968d98 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | |||
@@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
183 | return 0; | 183 | return 0; |
184 | 184 | ||
185 | r = amdgpu_ring_alloc(ring, 16); | 185 | r = amdgpu_ring_alloc(ring, 16); |
186 | if (r) { | 186 | if (r) |
187 | DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", | ||
188 | ring->me, ring->idx, r); | ||
189 | return r; | 187 | return r; |
190 | } | ||
191 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); | 188 | amdgpu_ring_write(ring, HEVC_ENC_CMD_END); |
192 | amdgpu_ring_commit(ring); | 189 | amdgpu_ring_commit(ring); |
193 | 190 | ||
@@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) | |||
197 | DRM_UDELAY(1); | 194 | DRM_UDELAY(1); |
198 | } | 195 | } |
199 | 196 | ||
200 | if (i < adev->usec_timeout) { | 197 | if (i >= adev->usec_timeout) |
201 | DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", | ||
202 | ring->me, ring->idx, i); | ||
203 | } else { | ||
204 | DRM_ERROR("amdgpu: (%d)ring %d test failed\n", | ||
205 | ring->me, ring->idx); | ||
206 | r = -ETIMEDOUT; | 198 | r = -ETIMEDOUT; |
207 | } | ||
208 | 199 | ||
209 | return r; | 200 | return r; |
210 | } | 201 | } |
@@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) | |||
343 | long r; | 334 | long r; |
344 | 335 | ||
345 | r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); | 336 | r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); |
346 | if (r) { | 337 | if (r) |
347 | DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); | ||
348 | goto error; | 338 | goto error; |
349 | } | ||
350 | 339 | ||
351 | r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); | 340 | r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); |
352 | if (r) { | 341 | if (r) |
353 | DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); | ||
354 | goto error; | 342 | goto error; |
355 | } | ||
356 | 343 | ||
357 | r = dma_fence_wait_timeout(fence, false, timeout); | 344 | r = dma_fence_wait_timeout(fence, false, timeout); |
358 | if (r == 0) { | 345 | if (r == 0) |
359 | DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); | ||
360 | r = -ETIMEDOUT; | 346 | r = -ETIMEDOUT; |
361 | } else if (r < 0) { | 347 | else if (r > 0) |
362 | DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); | ||
363 | } else { | ||
364 | DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); | ||
365 | r = 0; | 348 | r = 0; |
366 | } | 349 | |
367 | error: | 350 | error: |
368 | dma_fence_put(fence); | 351 | dma_fence_put(fence); |
369 | return r; | 352 | return r; |
@@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle) | |||
540 | ring = &adev->uvd.inst[j].ring; | 523 | ring = &adev->uvd.inst[j].ring; |
541 | 524 | ||
542 | if (!amdgpu_sriov_vf(adev)) { | 525 | if (!amdgpu_sriov_vf(adev)) { |
543 | ring->ready = true; | 526 | r = amdgpu_ring_test_helper(ring); |
544 | r = amdgpu_ring_test_ring(ring); | 527 | if (r) |
545 | if (r) { | ||
546 | ring->ready = false; | ||
547 | goto done; | 528 | goto done; |
548 | } | ||
549 | 529 | ||
550 | r = amdgpu_ring_alloc(ring, 10); | 530 | r = amdgpu_ring_alloc(ring, 10); |
551 | if (r) { | 531 | if (r) { |
@@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle) | |||
582 | 562 | ||
583 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { | 563 | for (i = 0; i < adev->uvd.num_enc_rings; ++i) { |
584 | ring = &adev->uvd.inst[j].ring_enc[i]; | 564 | ring = &adev->uvd.inst[j].ring_enc[i]; |
585 | ring->ready = true; | 565 | r = amdgpu_ring_test_helper(ring); |
586 | r = amdgpu_ring_test_ring(ring); | 566 | if (r) |
587 | if (r) { | ||
588 | ring->ready = false; | ||
589 | goto done; | 567 | goto done; |
590 | } | ||
591 | } | 568 | } |
592 | } | 569 | } |
593 | done: | 570 | done: |
@@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle) | |||
619 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { | 596 | for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |
620 | if (adev->uvd.harvest_config & (1 << i)) | 597 | if (adev->uvd.harvest_config & (1 << i)) |
621 | continue; | 598 | continue; |
622 | adev->uvd.inst[i].ring.ready = false; | 599 | adev->uvd.inst[i].ring.sched.ready = false; |
623 | } | 600 | } |
624 | 601 | ||
625 | return 0; | 602 | return 0; |
@@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1235 | 1212 | ||
1236 | WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); | 1213 | WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); |
1237 | r = amdgpu_ring_alloc(ring, 3); | 1214 | r = amdgpu_ring_alloc(ring, 3); |
1238 | if (r) { | 1215 | if (r) |
1239 | DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", | ||
1240 | ring->me, ring->idx, r); | ||
1241 | return r; | 1216 | return r; |
1242 | } | 1217 | |
1243 | amdgpu_ring_write(ring, | 1218 | amdgpu_ring_write(ring, |
1244 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); | 1219 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); |
1245 | amdgpu_ring_write(ring, 0xDEADBEEF); | 1220 | amdgpu_ring_write(ring, 0xDEADBEEF); |
@@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) | |||
1251 | DRM_UDELAY(1); | 1226 | DRM_UDELAY(1); |
1252 | } | 1227 | } |
1253 | 1228 | ||
1254 | if (i < adev->usec_timeout) { | 1229 | if (i >= adev->usec_timeout) |
1255 | DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", | 1230 | r = -ETIMEDOUT; |
1256 | ring->me, ring->idx, i); | 1231 | |
1257 | } else { | ||
1258 | DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", | ||
1259 | ring->me, ring->idx, tmp); | ||
1260 | r = -EINVAL; | ||
1261 | } | ||
1262 | return r; | 1232 | return r; |
1263 | } | 1233 | } |
1264 | 1234 | ||
@@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, | |||
1300 | * Write ring commands to execute the indirect buffer | 1270 | * Write ring commands to execute the indirect buffer |
1301 | */ | 1271 | */ |
1302 | static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, | 1272 | static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, |
1273 | struct amdgpu_job *job, | ||
1303 | struct amdgpu_ib *ib, | 1274 | struct amdgpu_ib *ib, |
1304 | unsigned vmid, bool ctx_switch) | 1275 | bool ctx_switch) |
1305 | { | 1276 | { |
1306 | struct amdgpu_device *adev = ring->adev; | 1277 | struct amdgpu_device *adev = ring->adev; |
1278 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1307 | 1279 | ||
1308 | amdgpu_ring_write(ring, | 1280 | amdgpu_ring_write(ring, |
1309 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); | 1281 | PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); |
@@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, | |||
1329 | * Write enc ring commands to execute the indirect buffer | 1301 | * Write enc ring commands to execute the indirect buffer |
1330 | */ | 1302 | */ |
1331 | static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, | 1303 | static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, |
1332 | struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) | 1304 | struct amdgpu_job *job, |
1305 | struct amdgpu_ib *ib, | ||
1306 | bool ctx_switch) | ||
1333 | { | 1307 | { |
1308 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1309 | |||
1334 | amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); | 1310 | amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); |
1335 | amdgpu_ring_write(ring, vmid); | 1311 | amdgpu_ring_write(ring, vmid); |
1336 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 1312 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index ea28828360d3..bed78a778e3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | |||
@@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle) | |||
463 | 463 | ||
464 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); | 464 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); |
465 | vce_v2_0_enable_mgcg(adev, true, false); | 465 | vce_v2_0_enable_mgcg(adev, true, false); |
466 | for (i = 0; i < adev->vce.num_rings; i++) | ||
467 | adev->vce.ring[i].ready = false; | ||
468 | 466 | ||
469 | for (i = 0; i < adev->vce.num_rings; i++) { | 467 | for (i = 0; i < adev->vce.num_rings; i++) { |
470 | r = amdgpu_ring_test_ring(&adev->vce.ring[i]); | 468 | r = amdgpu_ring_test_helper(&adev->vce.ring[i]); |
471 | if (r) | 469 | if (r) |
472 | return r; | 470 | return r; |
473 | else | ||
474 | adev->vce.ring[i].ready = true; | ||
475 | } | 471 | } |
476 | 472 | ||
477 | DRM_INFO("VCE initialized successfully.\n"); | 473 | DRM_INFO("VCE initialized successfully.\n"); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6dbd39730070..3e84840859a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | |||
@@ -474,15 +474,10 @@ static int vce_v3_0_hw_init(void *handle) | |||
474 | 474 | ||
475 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); | 475 | amdgpu_asic_set_vce_clocks(adev, 10000, 10000); |
476 | 476 | ||
477 | for (i = 0; i < adev->vce.num_rings; i++) | ||
478 | adev->vce.ring[i].ready = false; | ||
479 | |||
480 | for (i = 0; i < adev->vce.num_rings; i++) { | 477 | for (i = 0; i < adev->vce.num_rings; i++) { |
481 | r = amdgpu_ring_test_ring(&adev->vce.ring[i]); | 478 | r = amdgpu_ring_test_helper(&adev->vce.ring[i]); |
482 | if (r) | 479 | if (r) |
483 | return r; | 480 | return r; |
484 | else | ||
485 | adev->vce.ring[i].ready = true; | ||
486 | } | 481 | } |
487 | 482 | ||
488 | DRM_INFO("VCE initialized successfully.\n"); | 483 | DRM_INFO("VCE initialized successfully.\n"); |
@@ -838,8 +833,12 @@ out: | |||
838 | } | 833 | } |
839 | 834 | ||
840 | static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, | 835 | static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, |
841 | struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) | 836 | struct amdgpu_job *job, |
837 | struct amdgpu_ib *ib, | ||
838 | bool ctx_switch) | ||
842 | { | 839 | { |
840 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
841 | |||
843 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); | 842 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); |
844 | amdgpu_ring_write(ring, vmid); | 843 | amdgpu_ring_write(ring, vmid); |
845 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 844 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 1c9471890bf7..0054ba1b9a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | |||
@@ -519,15 +519,10 @@ static int vce_v4_0_hw_init(void *handle) | |||
519 | if (r) | 519 | if (r) |
520 | return r; | 520 | return r; |
521 | 521 | ||
522 | for (i = 0; i < adev->vce.num_rings; i++) | ||
523 | adev->vce.ring[i].ready = false; | ||
524 | |||
525 | for (i = 0; i < adev->vce.num_rings; i++) { | 522 | for (i = 0; i < adev->vce.num_rings; i++) { |
526 | r = amdgpu_ring_test_ring(&adev->vce.ring[i]); | 523 | r = amdgpu_ring_test_helper(&adev->vce.ring[i]); |
527 | if (r) | 524 | if (r) |
528 | return r; | 525 | return r; |
529 | else | ||
530 | adev->vce.ring[i].ready = true; | ||
531 | } | 526 | } |
532 | 527 | ||
533 | DRM_INFO("VCE initialized successfully.\n"); | 528 | DRM_INFO("VCE initialized successfully.\n"); |
@@ -549,7 +544,7 @@ static int vce_v4_0_hw_fini(void *handle) | |||
549 | } | 544 | } |
550 | 545 | ||
551 | for (i = 0; i < adev->vce.num_rings; i++) | 546 | for (i = 0; i < adev->vce.num_rings; i++) |
552 | adev->vce.ring[i].ready = false; | 547 | adev->vce.ring[i].sched.ready = false; |
553 | 548 | ||
554 | return 0; | 549 | return 0; |
555 | } | 550 | } |
@@ -951,9 +946,11 @@ static int vce_v4_0_set_powergating_state(void *handle, | |||
951 | } | 946 | } |
952 | #endif | 947 | #endif |
953 | 948 | ||
954 | static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, | 949 | static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, |
955 | struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) | 950 | struct amdgpu_ib *ib, bool ctx_switch) |
956 | { | 951 | { |
952 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
953 | |||
957 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); | 954 | amdgpu_ring_write(ring, VCE_CMD_IB_VM); |
958 | amdgpu_ring_write(ring, vmid); | 955 | amdgpu_ring_write(ring, vmid); |
959 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 956 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index eae90922fdbe..c1a03505f956 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | |||
@@ -176,30 +176,22 @@ static int vcn_v1_0_hw_init(void *handle) | |||
176 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; | 176 | struct amdgpu_ring *ring = &adev->vcn.ring_dec; |
177 | int i, r; | 177 | int i, r; |
178 | 178 | ||
179 | ring->ready = true; | 179 | r = amdgpu_ring_test_helper(ring); |
180 | r = amdgpu_ring_test_ring(ring); | 180 | if (r) |
181 | if (r) { | ||
182 | ring->ready = false; | ||
183 | goto done; | 181 | goto done; |
184 | } | ||
185 | 182 | ||
186 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { | 183 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { |
187 | ring = &adev->vcn.ring_enc[i]; | 184 | ring = &adev->vcn.ring_enc[i]; |
188 | ring->ready = true; | 185 | ring->sched.ready = true; |
189 | r = amdgpu_ring_test_ring(ring); | 186 | r = amdgpu_ring_test_helper(ring); |
190 | if (r) { | 187 | if (r) |
191 | ring->ready = false; | ||
192 | goto done; | 188 | goto done; |
193 | } | ||
194 | } | 189 | } |
195 | 190 | ||
196 | ring = &adev->vcn.ring_jpeg; | 191 | ring = &adev->vcn.ring_jpeg; |
197 | ring->ready = true; | 192 | r = amdgpu_ring_test_helper(ring); |
198 | r = amdgpu_ring_test_ring(ring); | 193 | if (r) |
199 | if (r) { | ||
200 | ring->ready = false; | ||
201 | goto done; | 194 | goto done; |
202 | } | ||
203 | 195 | ||
204 | done: | 196 | done: |
205 | if (!r) | 197 | if (!r) |
@@ -224,7 +216,7 @@ static int vcn_v1_0_hw_fini(void *handle) | |||
224 | if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) | 216 | if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) |
225 | vcn_v1_0_stop(adev); | 217 | vcn_v1_0_stop(adev); |
226 | 218 | ||
227 | ring->ready = false; | 219 | ring->sched.ready = false; |
228 | 220 | ||
229 | return 0; | 221 | return 0; |
230 | } | 222 | } |
@@ -1366,10 +1358,12 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 | |||
1366 | * Write ring commands to execute the indirect buffer | 1358 | * Write ring commands to execute the indirect buffer |
1367 | */ | 1359 | */ |
1368 | static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, | 1360 | static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, |
1369 | struct amdgpu_ib *ib, | 1361 | struct amdgpu_job *job, |
1370 | unsigned vmid, bool ctx_switch) | 1362 | struct amdgpu_ib *ib, |
1363 | bool ctx_switch) | ||
1371 | { | 1364 | { |
1372 | struct amdgpu_device *adev = ring->adev; | 1365 | struct amdgpu_device *adev = ring->adev; |
1366 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1373 | 1367 | ||
1374 | amdgpu_ring_write(ring, | 1368 | amdgpu_ring_write(ring, |
1375 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); | 1369 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); |
@@ -1524,8 +1518,12 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) | |||
1524 | * Write enc ring commands to execute the indirect buffer | 1518 | * Write enc ring commands to execute the indirect buffer |
1525 | */ | 1519 | */ |
1526 | static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, | 1520 | static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, |
1527 | struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) | 1521 | struct amdgpu_job *job, |
1522 | struct amdgpu_ib *ib, | ||
1523 | bool ctx_switch) | ||
1528 | { | 1524 | { |
1525 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1526 | |||
1529 | amdgpu_ring_write(ring, VCN_ENC_CMD_IB); | 1527 | amdgpu_ring_write(ring, VCN_ENC_CMD_IB); |
1530 | amdgpu_ring_write(ring, vmid); | 1528 | amdgpu_ring_write(ring, vmid); |
1531 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | 1529 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); |
@@ -1725,10 +1723,12 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 | |||
1725 | * Write ring commands to execute the indirect buffer. | 1723 | * Write ring commands to execute the indirect buffer. |
1726 | */ | 1724 | */ |
1727 | static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, | 1725 | static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, |
1728 | struct amdgpu_ib *ib, | 1726 | struct amdgpu_job *job, |
1729 | unsigned vmid, bool ctx_switch) | 1727 | struct amdgpu_ib *ib, |
1728 | bool ctx_switch) | ||
1730 | { | 1729 | { |
1731 | struct amdgpu_device *adev = ring->adev; | 1730 | struct amdgpu_device *adev = ring->adev; |
1731 | unsigned vmid = AMDGPU_JOB_GET_VMID(job); | ||
1732 | 1732 | ||
1733 | amdgpu_ring_write(ring, | 1733 | amdgpu_ring_write(ring, |
1734 | PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); | 1734 | PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a99f71797aa3..a0fda6f9252a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c | |||
@@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) | |||
129 | else | 129 | else |
130 | wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); | 130 | wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); |
131 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); | 131 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); |
132 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); | 132 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF); |
133 | 133 | ||
134 | /* set rptr, wptr to 0 */ | 134 | /* set rptr, wptr to 0 */ |
135 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); | 135 | WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); |
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 2d4473557b0d..d13fc4fcb517 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | |||
@@ -49,6 +49,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev) | |||
49 | adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); | 49 | adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); |
50 | adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); | 50 | adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); |
51 | adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); | 51 | adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); |
52 | adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); | ||
52 | } | 53 | } |
53 | return 0; | 54 | return 0; |
54 | } | 55 | } |