aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2017-10-03 05:09:16 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2017-10-03 05:09:16 -0400
commit0d3c24e936feefeca854073ccb40613cd6eba9a9 (patch)
tree1f675397b924846740b0931b066ddce6f3d7eb3d /drivers/gpu/drm/amd/amdgpu
parent1af0838de60e723cb02253ecc9b555c30f8f6a6f (diff)
parentebec44a2456fbe5fe18aae88f6010f6878f0cb4a (diff)
Merge airlied/drm-next into drm-misc-next
Just catching up with upstream. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c228
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c215
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h165
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c147
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c180
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c349
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c825
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_dpm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c165
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c368
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c3
87 files changed, 3549 insertions, 1904 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 658bac0cdc5e..25a95c95df14 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -133,5 +133,3 @@ include $(FULL_AMD_PATH)/powerplay/Makefile
133amdgpu-y += $(AMD_POWERPLAY_FILES) 133amdgpu-y += $(AMD_POWERPLAY_FILES)
134 134
135obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o 135obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
136
137CFLAGS_amdgpu_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a5427cf4b19d..ebfc267467ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -65,6 +65,7 @@
65#include "amdgpu_uvd.h" 65#include "amdgpu_uvd.h"
66#include "amdgpu_vce.h" 66#include "amdgpu_vce.h"
67#include "amdgpu_vcn.h" 67#include "amdgpu_vcn.h"
68#include "amdgpu_mn.h"
68 69
69#include "gpu_scheduler.h" 70#include "gpu_scheduler.h"
70#include "amdgpu_virt.h" 71#include "amdgpu_virt.h"
@@ -76,7 +77,7 @@
76extern int amdgpu_modeset; 77extern int amdgpu_modeset;
77extern int amdgpu_vram_limit; 78extern int amdgpu_vram_limit;
78extern int amdgpu_vis_vram_limit; 79extern int amdgpu_vis_vram_limit;
79extern unsigned amdgpu_gart_size; 80extern int amdgpu_gart_size;
80extern int amdgpu_gtt_size; 81extern int amdgpu_gtt_size;
81extern int amdgpu_moverate; 82extern int amdgpu_moverate;
82extern int amdgpu_benchmarking; 83extern int amdgpu_benchmarking;
@@ -91,11 +92,12 @@ extern int amdgpu_dpm;
91extern int amdgpu_fw_load_type; 92extern int amdgpu_fw_load_type;
92extern int amdgpu_aspm; 93extern int amdgpu_aspm;
93extern int amdgpu_runtime_pm; 94extern int amdgpu_runtime_pm;
94extern unsigned amdgpu_ip_block_mask; 95extern uint amdgpu_ip_block_mask;
95extern int amdgpu_bapm; 96extern int amdgpu_bapm;
96extern int amdgpu_deep_color; 97extern int amdgpu_deep_color;
97extern int amdgpu_vm_size; 98extern int amdgpu_vm_size;
98extern int amdgpu_vm_block_size; 99extern int amdgpu_vm_block_size;
100extern int amdgpu_vm_fragment_size;
99extern int amdgpu_vm_fault_stop; 101extern int amdgpu_vm_fault_stop;
100extern int amdgpu_vm_debug; 102extern int amdgpu_vm_debug;
101extern int amdgpu_vm_update_mode; 103extern int amdgpu_vm_update_mode;
@@ -103,14 +105,14 @@ extern int amdgpu_sched_jobs;
103extern int amdgpu_sched_hw_submission; 105extern int amdgpu_sched_hw_submission;
104extern int amdgpu_no_evict; 106extern int amdgpu_no_evict;
105extern int amdgpu_direct_gma_size; 107extern int amdgpu_direct_gma_size;
106extern unsigned amdgpu_pcie_gen_cap; 108extern uint amdgpu_pcie_gen_cap;
107extern unsigned amdgpu_pcie_lane_cap; 109extern uint amdgpu_pcie_lane_cap;
108extern unsigned amdgpu_cg_mask; 110extern uint amdgpu_cg_mask;
109extern unsigned amdgpu_pg_mask; 111extern uint amdgpu_pg_mask;
110extern unsigned amdgpu_sdma_phase_quantum; 112extern uint amdgpu_sdma_phase_quantum;
111extern char *amdgpu_disable_cu; 113extern char *amdgpu_disable_cu;
112extern char *amdgpu_virtual_display; 114extern char *amdgpu_virtual_display;
113extern unsigned amdgpu_pp_feature_mask; 115extern uint amdgpu_pp_feature_mask;
114extern int amdgpu_vram_page_split; 116extern int amdgpu_vram_page_split;
115extern int amdgpu_ngg; 117extern int amdgpu_ngg;
116extern int amdgpu_prim_buf_per_se; 118extern int amdgpu_prim_buf_per_se;
@@ -177,6 +179,7 @@ struct amdgpu_cs_parser;
177struct amdgpu_job; 179struct amdgpu_job;
178struct amdgpu_irq_src; 180struct amdgpu_irq_src;
179struct amdgpu_fpriv; 181struct amdgpu_fpriv;
182struct amdgpu_bo_va_mapping;
180 183
181enum amdgpu_cp_irq { 184enum amdgpu_cp_irq {
182 AMDGPU_CP_IRQ_GFX_EOP = 0, 185 AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -291,14 +294,25 @@ struct amdgpu_buffer_funcs {
291 294
292/* provided by hw blocks that can write ptes, e.g., sdma */ 295/* provided by hw blocks that can write ptes, e.g., sdma */
293struct amdgpu_vm_pte_funcs { 296struct amdgpu_vm_pte_funcs {
297 /* number of dw to reserve per operation */
298 unsigned copy_pte_num_dw;
299
294 /* copy pte entries from GART */ 300 /* copy pte entries from GART */
295 void (*copy_pte)(struct amdgpu_ib *ib, 301 void (*copy_pte)(struct amdgpu_ib *ib,
296 uint64_t pe, uint64_t src, 302 uint64_t pe, uint64_t src,
297 unsigned count); 303 unsigned count);
304
298 /* write pte one entry at a time with addr mapping */ 305 /* write pte one entry at a time with addr mapping */
299 void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, 306 void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
300 uint64_t value, unsigned count, 307 uint64_t value, unsigned count,
301 uint32_t incr); 308 uint32_t incr);
309
310 /* maximum nums of PTEs/PDEs in a single operation */
311 uint32_t set_max_nums_pte_pde;
312
313 /* number of dw to reserve per operation */
314 unsigned set_pte_pde_num_dw;
315
302 /* for linear pte/pde updates without addr mapping */ 316 /* for linear pte/pde updates without addr mapping */
303 void (*set_pte_pde)(struct amdgpu_ib *ib, 317 void (*set_pte_pde)(struct amdgpu_ib *ib,
304 uint64_t pe, 318 uint64_t pe,
@@ -331,6 +345,7 @@ struct amdgpu_gart_funcs {
331struct amdgpu_ih_funcs { 345struct amdgpu_ih_funcs {
332 /* ring read/write ptr handling, called from interrupt context */ 346 /* ring read/write ptr handling, called from interrupt context */
333 u32 (*get_wptr)(struct amdgpu_device *adev); 347 u32 (*get_wptr)(struct amdgpu_device *adev);
348 bool (*prescreen_iv)(struct amdgpu_device *adev);
334 void (*decode_iv)(struct amdgpu_device *adev, 349 void (*decode_iv)(struct amdgpu_device *adev,
335 struct amdgpu_iv_entry *entry); 350 struct amdgpu_iv_entry *entry);
336 void (*set_rptr)(struct amdgpu_device *adev); 351 void (*set_rptr)(struct amdgpu_device *adev);
@@ -398,6 +413,7 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
398struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); 413struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
399void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); 414void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
400void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); 415void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
416int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
401int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); 417int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
402 418
403/* sub-allocation manager, it has to be protected by another lock. 419/* sub-allocation manager, it has to be protected by another lock.
@@ -454,9 +470,10 @@ struct amdgpu_sa_bo {
454 */ 470 */
455void amdgpu_gem_force_release(struct amdgpu_device *adev); 471void amdgpu_gem_force_release(struct amdgpu_device *adev);
456int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, 472int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
457 int alignment, u32 initial_domain, 473 int alignment, u32 initial_domain,
458 u64 flags, bool kernel, 474 u64 flags, bool kernel,
459 struct drm_gem_object **obj); 475 struct reservation_object *resv,
476 struct drm_gem_object **obj);
460 477
461int amdgpu_mode_dumb_create(struct drm_file *file_priv, 478int amdgpu_mode_dumb_create(struct drm_file *file_priv,
462 struct drm_device *dev, 479 struct drm_device *dev,
@@ -730,8 +747,8 @@ struct amdgpu_ctx_mgr {
730struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); 747struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
731int amdgpu_ctx_put(struct amdgpu_ctx *ctx); 748int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
732 749
733uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 750int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
734 struct dma_fence *fence); 751 struct dma_fence *fence, uint64_t *seq);
735struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 752struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
736 struct amdgpu_ring *ring, uint64_t seq); 753 struct amdgpu_ring *ring, uint64_t seq);
737 754
@@ -748,6 +765,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
748struct amdgpu_fpriv { 765struct amdgpu_fpriv {
749 struct amdgpu_vm vm; 766 struct amdgpu_vm vm;
750 struct amdgpu_bo_va *prt_va; 767 struct amdgpu_bo_va *prt_va;
768 struct amdgpu_bo_va *csa_va;
751 struct mutex bo_list_lock; 769 struct mutex bo_list_lock;
752 struct idr bo_list_handles; 770 struct idr bo_list_handles;
753 struct amdgpu_ctx_mgr ctx_mgr; 771 struct amdgpu_ctx_mgr ctx_mgr;
@@ -1012,7 +1030,6 @@ struct amdgpu_gfx {
1012 /* reset mask */ 1030 /* reset mask */
1013 uint32_t grbm_soft_reset; 1031 uint32_t grbm_soft_reset;
1014 uint32_t srbm_soft_reset; 1032 uint32_t srbm_soft_reset;
1015 bool in_reset;
1016 /* s3/s4 mask */ 1033 /* s3/s4 mask */
1017 bool in_suspend; 1034 bool in_suspend;
1018 /* NGG */ 1035 /* NGG */
@@ -1054,6 +1071,7 @@ struct amdgpu_cs_parser {
1054 /* buffer objects */ 1071 /* buffer objects */
1055 struct ww_acquire_ctx ticket; 1072 struct ww_acquire_ctx ticket;
1056 struct amdgpu_bo_list *bo_list; 1073 struct amdgpu_bo_list *bo_list;
1074 struct amdgpu_mn *mn;
1057 struct amdgpu_bo_list_entry vm_pd; 1075 struct amdgpu_bo_list_entry vm_pd;
1058 struct list_head validated; 1076 struct list_head validated;
1059 struct dma_fence *fence; 1077 struct dma_fence *fence;
@@ -1181,6 +1199,9 @@ struct amdgpu_firmware {
1181 1199
1182 /* gpu info firmware data pointer */ 1200 /* gpu info firmware data pointer */
1183 const struct firmware *gpu_info_fw; 1201 const struct firmware *gpu_info_fw;
1202
1203 void *fw_buf_ptr;
1204 uint64_t fw_buf_mc;
1184}; 1205};
1185 1206
1186/* 1207/*
@@ -1195,20 +1216,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
1195void amdgpu_test_moves(struct amdgpu_device *adev); 1216void amdgpu_test_moves(struct amdgpu_device *adev);
1196 1217
1197/* 1218/*
1198 * MMU Notifier
1199 */
1200#if defined(CONFIG_MMU_NOTIFIER)
1201int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
1202void amdgpu_mn_unregister(struct amdgpu_bo *bo);
1203#else
1204static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
1205{
1206 return -ENODEV;
1207}
1208static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
1209#endif
1210
1211/*
1212 * Debugfs 1219 * Debugfs
1213 */ 1220 */
1214struct amdgpu_debugfs { 1221struct amdgpu_debugfs {
@@ -1482,9 +1489,6 @@ struct amdgpu_device {
1482 struct amdgpu_mman mman; 1489 struct amdgpu_mman mman;
1483 struct amdgpu_vram_scratch vram_scratch; 1490 struct amdgpu_vram_scratch vram_scratch;
1484 struct amdgpu_wb wb; 1491 struct amdgpu_wb wb;
1485 atomic64_t vram_usage;
1486 atomic64_t vram_vis_usage;
1487 atomic64_t gtt_usage;
1488 atomic64_t num_bytes_moved; 1492 atomic64_t num_bytes_moved;
1489 atomic64_t num_evictions; 1493 atomic64_t num_evictions;
1490 atomic64_t num_vram_cpu_page_faults; 1494 atomic64_t num_vram_cpu_page_faults;
@@ -1593,6 +1597,7 @@ struct amdgpu_device {
1593 1597
1594 /* record last mm index being written through WREG32*/ 1598 /* record last mm index being written through WREG32*/
1595 unsigned long last_mm_index; 1599 unsigned long last_mm_index;
1600 bool in_sriov_reset;
1596}; 1601};
1597 1602
1598static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) 1603static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1760,6 +1765,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1760#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 1765#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
1761#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) 1766#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
1762#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) 1767#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
1768#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
1763#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) 1769#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
1764#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) 1770#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
1765#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) 1771#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
@@ -1792,18 +1798,6 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
1792 u64 num_vis_bytes); 1798 u64 num_vis_bytes);
1793void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); 1799void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
1794bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 1800bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
1795int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
1796int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
1797 uint32_t flags);
1798bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
1799struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
1800bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1801 unsigned long end);
1802bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1803 int *last_invalidated);
1804bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
1805uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1806 struct ttm_mem_reg *mem);
1807void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); 1801void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
1808void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); 1802void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
1809void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); 1803void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
@@ -1886,10 +1880,9 @@ static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
1886static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } 1880static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
1887#endif 1881#endif
1888 1882
1889struct amdgpu_bo_va_mapping * 1883int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1890amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1884 uint64_t addr, struct amdgpu_bo **bo,
1891 uint64_t addr, struct amdgpu_bo **bo); 1885 struct amdgpu_bo_va_mapping **mapping);
1892int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
1893 1886
1894#include "amdgpu_object.h" 1887#include "amdgpu_object.h"
1895#endif 1888#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c7bcf5207d79..5432af39a674 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,14 +28,14 @@
28#include <linux/module.h> 28#include <linux/module.h>
29 29
30const struct kgd2kfd_calls *kgd2kfd; 30const struct kgd2kfd_calls *kgd2kfd;
31bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); 31bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
32 32
33int amdgpu_amdkfd_init(void) 33int amdgpu_amdkfd_init(void)
34{ 34{
35 int ret; 35 int ret;
36 36
37#if defined(CONFIG_HSA_AMD_MODULE) 37#if defined(CONFIG_HSA_AMD_MODULE)
38 int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); 38 int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
39 39
40 kgd2kfd_init_p = symbol_request(kgd2kfd_init); 40 kgd2kfd_init_p = symbol_request(kgd2kfd_init);
41 41
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b8802a561cbd..8d689ab7e429 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
26#define AMDGPU_AMDKFD_H_INCLUDED 26#define AMDGPU_AMDKFD_H_INCLUDED
27 27
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/mmu_context.h>
29#include <kgd_kfd_interface.h> 30#include <kgd_kfd_interface.h>
30 31
31struct amdgpu_device; 32struct amdgpu_device;
@@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
60 61
61uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); 62uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
62 63
64#define read_user_wptr(mmptr, wptr, dst) \
65 ({ \
66 bool valid = false; \
67 if ((mmptr) && (wptr)) { \
68 if ((mmptr) == current->mm) { \
69 valid = !get_user((dst), (wptr)); \
70 } else if (current->mm == NULL) { \
71 use_mm(mmptr); \
72 valid = !get_user((dst), (wptr)); \
73 unuse_mm(mmptr); \
74 } \
75 } \
76 valid; \
77 })
78
63#endif /* AMDGPU_AMDKFD_H_INCLUDED */ 79#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5254562fd0f9..dc7e25cce741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -39,6 +39,12 @@
39#include "gmc/gmc_7_1_sh_mask.h" 39#include "gmc/gmc_7_1_sh_mask.h"
40#include "cik_structs.h" 40#include "cik_structs.h"
41 41
42enum hqd_dequeue_request_type {
43 NO_ACTION = 0,
44 DRAIN_PIPE,
45 RESET_WAVES
46};
47
42enum { 48enum {
43 MAX_TRAPID = 8, /* 3 bits in the bitfield. */ 49 MAX_TRAPID = 8, /* 3 bits in the bitfield. */
44 MAX_WATCH_ADDRESSES = 4 50 MAX_WATCH_ADDRESSES = 4
@@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
96 uint32_t hpd_size, uint64_t hpd_gpu_addr); 102 uint32_t hpd_size, uint64_t hpd_gpu_addr);
97static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 103static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
98static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 104static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
99 uint32_t queue_id, uint32_t __user *wptr); 105 uint32_t queue_id, uint32_t __user *wptr,
106 uint32_t wptr_shift, uint32_t wptr_mask,
107 struct mm_struct *mm);
100static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 108static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
101static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 109static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
102 uint32_t pipe_id, uint32_t queue_id); 110 uint32_t pipe_id, uint32_t queue_id);
103 111
104static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 112static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
113 enum kfd_preempt_type reset_type,
105 unsigned int utimeout, uint32_t pipe_id, 114 unsigned int utimeout, uint32_t pipe_id,
106 uint32_t queue_id); 115 uint32_t queue_id);
107static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 116static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
126static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 135static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
127 136
128static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 137static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
138static void set_scratch_backing_va(struct kgd_dev *kgd,
139 uint64_t va, uint32_t vmid);
140
141/* Because of REG_GET_FIELD() being used, we put this function in the
142 * asic specific file.
143 */
144static int get_tile_config(struct kgd_dev *kgd,
145 struct tile_config *config)
146{
147 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
148
149 config->gb_addr_config = adev->gfx.config.gb_addr_config;
150 config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
151 MC_ARB_RAMCFG, NOOFBANK);
152 config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
153 MC_ARB_RAMCFG, NOOFRANKS);
154
155 config->tile_config_ptr = adev->gfx.config.tile_mode_array;
156 config->num_tile_configs =
157 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
158 config->macro_tile_config_ptr =
159 adev->gfx.config.macrotile_mode_array;
160 config->num_macro_tile_configs =
161 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
162
163 return 0;
164}
129 165
130static const struct kfd2kgd_calls kfd2kgd = { 166static const struct kfd2kgd_calls kfd2kgd = {
131 .init_gtt_mem_allocation = alloc_gtt_mem, 167 .init_gtt_mem_allocation = alloc_gtt_mem,
@@ -133,6 +169,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
133 .get_vmem_size = get_vmem_size, 169 .get_vmem_size = get_vmem_size,
134 .get_gpu_clock_counter = get_gpu_clock_counter, 170 .get_gpu_clock_counter = get_gpu_clock_counter,
135 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 171 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
172 .alloc_pasid = amdgpu_vm_alloc_pasid,
173 .free_pasid = amdgpu_vm_free_pasid,
136 .program_sh_mem_settings = kgd_program_sh_mem_settings, 174 .program_sh_mem_settings = kgd_program_sh_mem_settings,
137 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 175 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
138 .init_pipeline = kgd_init_pipeline, 176 .init_pipeline = kgd_init_pipeline,
@@ -150,7 +188,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
150 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 188 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
151 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 189 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
152 .write_vmid_invalidate_request = write_vmid_invalidate_request, 190 .write_vmid_invalidate_request = write_vmid_invalidate_request,
153 .get_fw_version = get_fw_version 191 .get_fw_version = get_fw_version,
192 .set_scratch_backing_va = set_scratch_backing_va,
193 .get_tile_config = get_tile_config,
154}; 194};
155 195
156struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 196struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -186,7 +226,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
186{ 226{
187 struct amdgpu_device *adev = get_amdgpu_device(kgd); 227 struct amdgpu_device *adev = get_amdgpu_device(kgd);
188 228
189 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 229 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
190 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 230 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
191 231
192 lock_srbm(kgd, mec, pipe, queue_id, 0); 232 lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -290,20 +330,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
290} 330}
291 331
292static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 332static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
293 uint32_t queue_id, uint32_t __user *wptr) 333 uint32_t queue_id, uint32_t __user *wptr,
334 uint32_t wptr_shift, uint32_t wptr_mask,
335 struct mm_struct *mm)
294{ 336{
295 struct amdgpu_device *adev = get_amdgpu_device(kgd); 337 struct amdgpu_device *adev = get_amdgpu_device(kgd);
296 uint32_t wptr_shadow, is_wptr_shadow_valid;
297 struct cik_mqd *m; 338 struct cik_mqd *m;
339 uint32_t *mqd_hqd;
340 uint32_t reg, wptr_val, data;
298 341
299 m = get_mqd(mqd); 342 m = get_mqd(mqd);
300 343
301 is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
302 if (is_wptr_shadow_valid)
303 m->cp_hqd_pq_wptr = wptr_shadow;
304
305 acquire_queue(kgd, pipe_id, queue_id); 344 acquire_queue(kgd, pipe_id, queue_id);
306 gfx_v7_0_mqd_commit(adev, m); 345
346 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
347 mqd_hqd = &m->cp_mqd_base_addr_lo;
348
349 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
350 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
351
352 /* Copy userspace write pointer value to register.
353 * Activate doorbell logic to monitor subsequent changes.
354 */
355 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
356 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
357 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
358
359 if (read_user_wptr(mm, wptr, wptr_val))
360 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
361
362 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
363 WREG32(mmCP_HQD_ACTIVE, data);
364
307 release_queue(kgd); 365 release_queue(kgd);
308 366
309 return 0; 367 return 0;
@@ -382,30 +440,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
382 return false; 440 return false;
383} 441}
384 442
385static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 443static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
444 enum kfd_preempt_type reset_type,
386 unsigned int utimeout, uint32_t pipe_id, 445 unsigned int utimeout, uint32_t pipe_id,
387 uint32_t queue_id) 446 uint32_t queue_id)
388{ 447{
389 struct amdgpu_device *adev = get_amdgpu_device(kgd); 448 struct amdgpu_device *adev = get_amdgpu_device(kgd);
390 uint32_t temp; 449 uint32_t temp;
391 int timeout = utimeout; 450 enum hqd_dequeue_request_type type;
451 unsigned long flags, end_jiffies;
452 int retry;
392 453
393 acquire_queue(kgd, pipe_id, queue_id); 454 acquire_queue(kgd, pipe_id, queue_id);
394 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 455 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
395 456
396 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); 457 switch (reset_type) {
458 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
459 type = DRAIN_PIPE;
460 break;
461 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
462 type = RESET_WAVES;
463 break;
464 default:
465 type = DRAIN_PIPE;
466 break;
467 }
468
469 /* Workaround: If IQ timer is active and the wait time is close to or
470 * equal to 0, dequeueing is not safe. Wait until either the wait time
471 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
472 * cleared before continuing. Also, ensure wait times are set to at
473 * least 0x3.
474 */
475 local_irq_save(flags);
476 preempt_disable();
477 retry = 5000; /* wait for 500 usecs at maximum */
478 while (true) {
479 temp = RREG32(mmCP_HQD_IQ_TIMER);
480 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
481 pr_debug("HW is processing IQ\n");
482 goto loop;
483 }
484 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
485 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
486 == 3) /* SEM-rearm is safe */
487 break;
488 /* Wait time 3 is safe for CP, but our MMIO read/write
489 * time is close to 1 microsecond, so check for 10 to
490 * leave more buffer room
491 */
492 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
493 >= 10)
494 break;
495 pr_debug("IQ timer is active\n");
496 } else
497 break;
498loop:
499 if (!retry) {
500 pr_err("CP HQD IQ timer status time out\n");
501 break;
502 }
503 ndelay(100);
504 --retry;
505 }
506 retry = 1000;
507 while (true) {
508 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
509 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
510 break;
511 pr_debug("Dequeue request is pending\n");
397 512
513 if (!retry) {
514 pr_err("CP HQD dequeue request time out\n");
515 break;
516 }
517 ndelay(100);
518 --retry;
519 }
520 local_irq_restore(flags);
521 preempt_enable();
522
523 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
524
525 end_jiffies = (utimeout * HZ / 1000) + jiffies;
398 while (true) { 526 while (true) {
399 temp = RREG32(mmCP_HQD_ACTIVE); 527 temp = RREG32(mmCP_HQD_ACTIVE);
400 if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) 528 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
401 break; 529 break;
402 if (timeout <= 0) { 530 if (time_after(jiffies, end_jiffies)) {
403 pr_err("kfd: cp queue preemption time out.\n"); 531 pr_err("cp queue preemption time out\n");
404 release_queue(kgd); 532 release_queue(kgd);
405 return -ETIME; 533 return -ETIME;
406 } 534 }
407 msleep(20); 535 usleep_range(500, 1000);
408 timeout -= 20;
409 } 536 }
410 537
411 release_queue(kgd); 538 release_queue(kgd);
@@ -556,6 +683,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
556 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 683 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
557} 684}
558 685
686static void set_scratch_backing_va(struct kgd_dev *kgd,
687 uint64_t va, uint32_t vmid)
688{
689 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
690
691 lock_srbm(kgd, 0, 0, 0, vmid);
692 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
693 unlock_srbm(kgd);
694}
695
559static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 696static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
560{ 697{
561 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 698 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -566,42 +703,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
566 switch (type) { 703 switch (type) {
567 case KGD_ENGINE_PFP: 704 case KGD_ENGINE_PFP:
568 hdr = (const union amdgpu_firmware_header *) 705 hdr = (const union amdgpu_firmware_header *)
569 adev->gfx.pfp_fw->data; 706 adev->gfx.pfp_fw->data;
570 break; 707 break;
571 708
572 case KGD_ENGINE_ME: 709 case KGD_ENGINE_ME:
573 hdr = (const union amdgpu_firmware_header *) 710 hdr = (const union amdgpu_firmware_header *)
574 adev->gfx.me_fw->data; 711 adev->gfx.me_fw->data;
575 break; 712 break;
576 713
577 case KGD_ENGINE_CE: 714 case KGD_ENGINE_CE:
578 hdr = (const union amdgpu_firmware_header *) 715 hdr = (const union amdgpu_firmware_header *)
579 adev->gfx.ce_fw->data; 716 adev->gfx.ce_fw->data;
580 break; 717 break;
581 718
582 case KGD_ENGINE_MEC1: 719 case KGD_ENGINE_MEC1:
583 hdr = (const union amdgpu_firmware_header *) 720 hdr = (const union amdgpu_firmware_header *)
584 adev->gfx.mec_fw->data; 721 adev->gfx.mec_fw->data;
585 break; 722 break;
586 723
587 case KGD_ENGINE_MEC2: 724 case KGD_ENGINE_MEC2:
588 hdr = (const union amdgpu_firmware_header *) 725 hdr = (const union amdgpu_firmware_header *)
589 adev->gfx.mec2_fw->data; 726 adev->gfx.mec2_fw->data;
590 break; 727 break;
591 728
592 case KGD_ENGINE_RLC: 729 case KGD_ENGINE_RLC:
593 hdr = (const union amdgpu_firmware_header *) 730 hdr = (const union amdgpu_firmware_header *)
594 adev->gfx.rlc_fw->data; 731 adev->gfx.rlc_fw->data;
595 break; 732 break;
596 733
597 case KGD_ENGINE_SDMA1: 734 case KGD_ENGINE_SDMA1:
598 hdr = (const union amdgpu_firmware_header *) 735 hdr = (const union amdgpu_firmware_header *)
599 adev->sdma.instance[0].fw->data; 736 adev->sdma.instance[0].fw->data;
600 break; 737 break;
601 738
602 case KGD_ENGINE_SDMA2: 739 case KGD_ENGINE_SDMA2:
603 hdr = (const union amdgpu_firmware_header *) 740 hdr = (const union amdgpu_firmware_header *)
604 adev->sdma.instance[1].fw->data; 741 adev->sdma.instance[1].fw->data;
605 break; 742 break;
606 743
607 default: 744 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 133d06671e46..c678c69936a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,6 +39,12 @@
39#include "vi_structs.h" 39#include "vi_structs.h"
40#include "vid.h" 40#include "vid.h"
41 41
42enum hqd_dequeue_request_type {
43 NO_ACTION = 0,
44 DRAIN_PIPE,
45 RESET_WAVES
46};
47
42struct cik_sdma_rlc_registers; 48struct cik_sdma_rlc_registers;
43 49
44/* 50/*
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
55 uint32_t hpd_size, uint64_t hpd_gpu_addr); 61 uint32_t hpd_size, uint64_t hpd_gpu_addr);
56static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); 62static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
57static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 63static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
58 uint32_t queue_id, uint32_t __user *wptr); 64 uint32_t queue_id, uint32_t __user *wptr,
65 uint32_t wptr_shift, uint32_t wptr_mask,
66 struct mm_struct *mm);
59static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); 67static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
60static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, 68static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
61 uint32_t pipe_id, uint32_t queue_id); 69 uint32_t pipe_id, uint32_t queue_id);
62static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); 70static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
63static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 71static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
72 enum kfd_preempt_type reset_type,
64 unsigned int utimeout, uint32_t pipe_id, 73 unsigned int utimeout, uint32_t pipe_id,
65 uint32_t queue_id); 74 uint32_t queue_id);
66static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 75static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
@@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
85 uint8_t vmid); 94 uint8_t vmid);
86static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); 95static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
87static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 96static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
97static void set_scratch_backing_va(struct kgd_dev *kgd,
98 uint64_t va, uint32_t vmid);
99
100/* Because of REG_GET_FIELD() being used, we put this function in the
101 * asic specific file.
102 */
103static int get_tile_config(struct kgd_dev *kgd,
104 struct tile_config *config)
105{
106 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
107
108 config->gb_addr_config = adev->gfx.config.gb_addr_config;
109 config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
110 MC_ARB_RAMCFG, NOOFBANK);
111 config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
112 MC_ARB_RAMCFG, NOOFRANKS);
113
114 config->tile_config_ptr = adev->gfx.config.tile_mode_array;
115 config->num_tile_configs =
116 ARRAY_SIZE(adev->gfx.config.tile_mode_array);
117 config->macro_tile_config_ptr =
118 adev->gfx.config.macrotile_mode_array;
119 config->num_macro_tile_configs =
120 ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
121
122 return 0;
123}
88 124
89static const struct kfd2kgd_calls kfd2kgd = { 125static const struct kfd2kgd_calls kfd2kgd = {
90 .init_gtt_mem_allocation = alloc_gtt_mem, 126 .init_gtt_mem_allocation = alloc_gtt_mem,
@@ -92,6 +128,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
92 .get_vmem_size = get_vmem_size, 128 .get_vmem_size = get_vmem_size,
93 .get_gpu_clock_counter = get_gpu_clock_counter, 129 .get_gpu_clock_counter = get_gpu_clock_counter,
94 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, 130 .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
131 .alloc_pasid = amdgpu_vm_alloc_pasid,
132 .free_pasid = amdgpu_vm_free_pasid,
95 .program_sh_mem_settings = kgd_program_sh_mem_settings, 133 .program_sh_mem_settings = kgd_program_sh_mem_settings,
96 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, 134 .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
97 .init_pipeline = kgd_init_pipeline, 135 .init_pipeline = kgd_init_pipeline,
@@ -111,7 +149,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
111 .get_atc_vmid_pasid_mapping_valid = 149 .get_atc_vmid_pasid_mapping_valid =
112 get_atc_vmid_pasid_mapping_valid, 150 get_atc_vmid_pasid_mapping_valid,
113 .write_vmid_invalidate_request = write_vmid_invalidate_request, 151 .write_vmid_invalidate_request = write_vmid_invalidate_request,
114 .get_fw_version = get_fw_version 152 .get_fw_version = get_fw_version,
153 .set_scratch_backing_va = set_scratch_backing_va,
154 .get_tile_config = get_tile_config,
115}; 155};
116 156
117struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 157struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -147,7 +187,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
147{ 187{
148 struct amdgpu_device *adev = get_amdgpu_device(kgd); 188 struct amdgpu_device *adev = get_amdgpu_device(kgd);
149 189
150 uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 190 uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
151 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 191 uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
152 192
153 lock_srbm(kgd, mec, pipe, queue_id, 0); 193 lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -216,7 +256,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
216 uint32_t mec; 256 uint32_t mec;
217 uint32_t pipe; 257 uint32_t pipe;
218 258
219 mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; 259 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
220 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); 260 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
221 261
222 lock_srbm(kgd, mec, pipe, 0, 0); 262 lock_srbm(kgd, mec, pipe, 0, 0);
@@ -244,20 +284,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
244} 284}
245 285
246static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, 286static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
247 uint32_t queue_id, uint32_t __user *wptr) 287 uint32_t queue_id, uint32_t __user *wptr,
288 uint32_t wptr_shift, uint32_t wptr_mask,
289 struct mm_struct *mm)
248{ 290{
249 struct vi_mqd *m;
250 uint32_t shadow_wptr, valid_wptr;
251 struct amdgpu_device *adev = get_amdgpu_device(kgd); 291 struct amdgpu_device *adev = get_amdgpu_device(kgd);
292 struct vi_mqd *m;
293 uint32_t *mqd_hqd;
294 uint32_t reg, wptr_val, data;
252 295
253 m = get_mqd(mqd); 296 m = get_mqd(mqd);
254 297
255 valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
256 if (valid_wptr == 0)
257 m->cp_hqd_pq_wptr = shadow_wptr;
258
259 acquire_queue(kgd, pipe_id, queue_id); 298 acquire_queue(kgd, pipe_id, queue_id);
260 gfx_v8_0_mqd_commit(adev, mqd); 299
300 /* HIQ is set during driver init period with vmid set to 0*/
301 if (m->cp_hqd_vmid == 0) {
302 uint32_t value, mec, pipe;
303
304 mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
305 pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
306
307 pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
308 mec, pipe, queue_id);
309 value = RREG32(mmRLC_CP_SCHEDULERS);
310 value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
311 ((mec << 5) | (pipe << 3) | queue_id | 0x80));
312 WREG32(mmRLC_CP_SCHEDULERS, value);
313 }
314
315 /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
316 mqd_hqd = &m->cp_mqd_base_addr_lo;
317
318 for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
319 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
320
321 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
322 * This is safe since EOP RPTR==WPTR for any inactive HQD
323 * on ASICs that do not support context-save.
324 * EOP writes/reads can start anywhere in the ring.
325 */
326 if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
327 WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
328 WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
329 WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
330 }
331
332 for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
333 WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
334
335 /* Copy userspace write pointer value to register.
336 * Activate doorbell logic to monitor subsequent changes.
337 */
338 data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
339 CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
340 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
341
342 if (read_user_wptr(mm, wptr, wptr_val))
343 WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
344
345 data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
346 WREG32(mmCP_HQD_ACTIVE, data);
347
261 release_queue(kgd); 348 release_queue(kgd);
262 349
263 return 0; 350 return 0;
@@ -308,29 +395,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
308 return false; 395 return false;
309} 396}
310 397
311static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, 398static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
399 enum kfd_preempt_type reset_type,
312 unsigned int utimeout, uint32_t pipe_id, 400 unsigned int utimeout, uint32_t pipe_id,
313 uint32_t queue_id) 401 uint32_t queue_id)
314{ 402{
315 struct amdgpu_device *adev = get_amdgpu_device(kgd); 403 struct amdgpu_device *adev = get_amdgpu_device(kgd);
316 uint32_t temp; 404 uint32_t temp;
317 int timeout = utimeout; 405 enum hqd_dequeue_request_type type;
406 unsigned long flags, end_jiffies;
407 int retry;
408 struct vi_mqd *m = get_mqd(mqd);
318 409
319 acquire_queue(kgd, pipe_id, queue_id); 410 acquire_queue(kgd, pipe_id, queue_id);
320 411
321 WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); 412 if (m->cp_hqd_vmid == 0)
413 WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
322 414
415 switch (reset_type) {
416 case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
417 type = DRAIN_PIPE;
418 break;
419 case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
420 type = RESET_WAVES;
421 break;
422 default:
423 type = DRAIN_PIPE;
424 break;
425 }
426
427 /* Workaround: If IQ timer is active and the wait time is close to or
428 * equal to 0, dequeueing is not safe. Wait until either the wait time
429 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
430 * cleared before continuing. Also, ensure wait times are set to at
431 * least 0x3.
432 */
433 local_irq_save(flags);
434 preempt_disable();
435 retry = 5000; /* wait for 500 usecs at maximum */
436 while (true) {
437 temp = RREG32(mmCP_HQD_IQ_TIMER);
438 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
439 pr_debug("HW is processing IQ\n");
440 goto loop;
441 }
442 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
443 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
444 == 3) /* SEM-rearm is safe */
445 break;
446 /* Wait time 3 is safe for CP, but our MMIO read/write
447 * time is close to 1 microsecond, so check for 10 to
448 * leave more buffer room
449 */
450 if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
451 >= 10)
452 break;
453 pr_debug("IQ timer is active\n");
454 } else
455 break;
456loop:
457 if (!retry) {
458 pr_err("CP HQD IQ timer status time out\n");
459 break;
460 }
461 ndelay(100);
462 --retry;
463 }
464 retry = 1000;
465 while (true) {
466 temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
467 if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
468 break;
469 pr_debug("Dequeue request is pending\n");
470
471 if (!retry) {
472 pr_err("CP HQD dequeue request time out\n");
473 break;
474 }
475 ndelay(100);
476 --retry;
477 }
478 local_irq_restore(flags);
479 preempt_enable();
480
481 WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
482
483 end_jiffies = (utimeout * HZ / 1000) + jiffies;
323 while (true) { 484 while (true) {
324 temp = RREG32(mmCP_HQD_ACTIVE); 485 temp = RREG32(mmCP_HQD_ACTIVE);
325 if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) 486 if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
326 break; 487 break;
327 if (timeout <= 0) { 488 if (time_after(jiffies, end_jiffies)) {
328 pr_err("kfd: cp queue preemption time out.\n"); 489 pr_err("cp queue preemption time out.\n");
329 release_queue(kgd); 490 release_queue(kgd);
330 return -ETIME; 491 return -ETIME;
331 } 492 }
332 msleep(20); 493 usleep_range(500, 1000);
333 timeout -= 20;
334 } 494 }
335 495
336 release_queue(kgd); 496 release_queue(kgd);
@@ -444,6 +604,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
444 return 0; 604 return 0;
445} 605}
446 606
607static void set_scratch_backing_va(struct kgd_dev *kgd,
608 uint64_t va, uint32_t vmid)
609{
610 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
611
612 lock_srbm(kgd, 0, 0, 0, vmid);
613 WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
614 unlock_srbm(kgd);
615}
616
447static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) 617static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
448{ 618{
449 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 619 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -454,42 +624,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
454 switch (type) { 624 switch (type) {
455 case KGD_ENGINE_PFP: 625 case KGD_ENGINE_PFP:
456 hdr = (const union amdgpu_firmware_header *) 626 hdr = (const union amdgpu_firmware_header *)
457 adev->gfx.pfp_fw->data; 627 adev->gfx.pfp_fw->data;
458 break; 628 break;
459 629
460 case KGD_ENGINE_ME: 630 case KGD_ENGINE_ME:
461 hdr = (const union amdgpu_firmware_header *) 631 hdr = (const union amdgpu_firmware_header *)
462 adev->gfx.me_fw->data; 632 adev->gfx.me_fw->data;
463 break; 633 break;
464 634
465 case KGD_ENGINE_CE: 635 case KGD_ENGINE_CE:
466 hdr = (const union amdgpu_firmware_header *) 636 hdr = (const union amdgpu_firmware_header *)
467 adev->gfx.ce_fw->data; 637 adev->gfx.ce_fw->data;
468 break; 638 break;
469 639
470 case KGD_ENGINE_MEC1: 640 case KGD_ENGINE_MEC1:
471 hdr = (const union amdgpu_firmware_header *) 641 hdr = (const union amdgpu_firmware_header *)
472 adev->gfx.mec_fw->data; 642 adev->gfx.mec_fw->data;
473 break; 643 break;
474 644
475 case KGD_ENGINE_MEC2: 645 case KGD_ENGINE_MEC2:
476 hdr = (const union amdgpu_firmware_header *) 646 hdr = (const union amdgpu_firmware_header *)
477 adev->gfx.mec2_fw->data; 647 adev->gfx.mec2_fw->data;
478 break; 648 break;
479 649
480 case KGD_ENGINE_RLC: 650 case KGD_ENGINE_RLC:
481 hdr = (const union amdgpu_firmware_header *) 651 hdr = (const union amdgpu_firmware_header *)
482 adev->gfx.rlc_fw->data; 652 adev->gfx.rlc_fw->data;
483 break; 653 break;
484 654
485 case KGD_ENGINE_SDMA1: 655 case KGD_ENGINE_SDMA1:
486 hdr = (const union amdgpu_firmware_header *) 656 hdr = (const union amdgpu_firmware_header *)
487 adev->sdma.instance[0].fw->data; 657 adev->sdma.instance[0].fw->data;
488 break; 658 break;
489 659
490 case KGD_ENGINE_SDMA2: 660 case KGD_ENGINE_SDMA2:
491 hdr = (const union amdgpu_firmware_header *) 661 hdr = (const union amdgpu_firmware_header *)
492 adev->sdma.instance[1].fw->data; 662 adev->sdma.instance[1].fw->data;
493 break; 663 break;
494 664
495 default: 665 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index fd435a96481c..383204e911a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -45,7 +45,6 @@ struct amdgpu_cgs_device {
45static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, 45static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
46 enum cgs_gpu_mem_type type, 46 enum cgs_gpu_mem_type type,
47 uint64_t size, uint64_t align, 47 uint64_t size, uint64_t align,
48 uint64_t min_offset, uint64_t max_offset,
49 cgs_handle_t *handle) 48 cgs_handle_t *handle)
50{ 49{
51 CGS_FUNC_ADEV; 50 CGS_FUNC_ADEV;
@@ -53,13 +52,6 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
53 int ret = 0; 52 int ret = 0;
54 uint32_t domain = 0; 53 uint32_t domain = 0;
55 struct amdgpu_bo *obj; 54 struct amdgpu_bo *obj;
56 struct ttm_placement placement;
57 struct ttm_place place;
58
59 if (min_offset > max_offset) {
60 BUG_ON(1);
61 return -EINVAL;
62 }
63 55
64 /* fail if the alignment is not a power of 2 */ 56 /* fail if the alignment is not a power of 2 */
65 if (((align != 1) && (align & (align - 1))) 57 if (((align != 1) && (align & (align - 1)))
@@ -73,41 +65,19 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
73 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 65 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
74 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 66 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
75 domain = AMDGPU_GEM_DOMAIN_VRAM; 67 domain = AMDGPU_GEM_DOMAIN_VRAM;
76 if (max_offset > adev->mc.real_vram_size)
77 return -EINVAL;
78 place.fpfn = min_offset >> PAGE_SHIFT;
79 place.lpfn = max_offset >> PAGE_SHIFT;
80 place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
81 TTM_PL_FLAG_VRAM;
82 break; 68 break;
83 case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: 69 case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
84 case CGS_GPU_MEM_TYPE__INVISIBLE_FB: 70 case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
85 flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 71 flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
86 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 72 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
87 domain = AMDGPU_GEM_DOMAIN_VRAM; 73 domain = AMDGPU_GEM_DOMAIN_VRAM;
88 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
89 place.fpfn =
90 max(min_offset, adev->mc.visible_vram_size) >> PAGE_SHIFT;
91 place.lpfn =
92 min(max_offset, adev->mc.real_vram_size) >> PAGE_SHIFT;
93 place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
94 TTM_PL_FLAG_VRAM;
95 }
96
97 break; 74 break;
98 case CGS_GPU_MEM_TYPE__GART_CACHEABLE: 75 case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
99 domain = AMDGPU_GEM_DOMAIN_GTT; 76 domain = AMDGPU_GEM_DOMAIN_GTT;
100 place.fpfn = min_offset >> PAGE_SHIFT;
101 place.lpfn = max_offset >> PAGE_SHIFT;
102 place.flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
103 break; 77 break;
104 case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: 78 case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
105 flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 79 flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
106 domain = AMDGPU_GEM_DOMAIN_GTT; 80 domain = AMDGPU_GEM_DOMAIN_GTT;
107 place.fpfn = min_offset >> PAGE_SHIFT;
108 place.lpfn = max_offset >> PAGE_SHIFT;
109 place.flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT |
110 TTM_PL_FLAG_UNCACHED;
111 break; 81 break;
112 default: 82 default:
113 return -EINVAL; 83 return -EINVAL;
@@ -116,15 +86,8 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
116 86
117 *handle = 0; 87 *handle = 0;
118 88
119 placement.placement = &place; 89 ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
120 placement.num_placement = 1; 90 NULL, NULL, 0, &obj);
121 placement.busy_placement = &place;
122 placement.num_busy_placement = 1;
123
124 ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE,
125 true, domain, flags,
126 NULL, &placement, NULL,
127 0, &obj);
128 if (ret) { 91 if (ret) {
129 DRM_ERROR("(%d) bo create failed\n", ret); 92 DRM_ERROR("(%d) bo create failed\n", ret);
130 return ret; 93 return ret;
@@ -155,19 +118,14 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
155 uint64_t *mcaddr) 118 uint64_t *mcaddr)
156{ 119{
157 int r; 120 int r;
158 u64 min_offset, max_offset;
159 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; 121 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
160 122
161 WARN_ON_ONCE(obj->placement.num_placement > 1); 123 WARN_ON_ONCE(obj->placement.num_placement > 1);
162 124
163 min_offset = obj->placements[0].fpfn << PAGE_SHIFT;
164 max_offset = obj->placements[0].lpfn << PAGE_SHIFT;
165
166 r = amdgpu_bo_reserve(obj, true); 125 r = amdgpu_bo_reserve(obj, true);
167 if (unlikely(r != 0)) 126 if (unlikely(r != 0))
168 return r; 127 return r;
169 r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, 128 r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
170 min_offset, max_offset, mcaddr);
171 amdgpu_bo_unreserve(obj); 129 amdgpu_bo_unreserve(obj);
172 return r; 130 return r;
173} 131}
@@ -675,6 +633,85 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
675 633
676 if (!adev->pm.fw) { 634 if (!adev->pm.fw) {
677 switch (adev->asic_type) { 635 switch (adev->asic_type) {
636 case CHIP_TAHITI:
637 strcpy(fw_name, "radeon/tahiti_smc.bin");
638 break;
639 case CHIP_PITCAIRN:
640 if ((adev->pdev->revision == 0x81) &&
641 ((adev->pdev->device == 0x6810) ||
642 (adev->pdev->device == 0x6811))) {
643 info->is_kicker = true;
644 strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
645 } else {
646 strcpy(fw_name, "radeon/pitcairn_smc.bin");
647 }
648 break;
649 case CHIP_VERDE:
650 if (((adev->pdev->device == 0x6820) &&
651 ((adev->pdev->revision == 0x81) ||
652 (adev->pdev->revision == 0x83))) ||
653 ((adev->pdev->device == 0x6821) &&
654 ((adev->pdev->revision == 0x83) ||
655 (adev->pdev->revision == 0x87))) ||
656 ((adev->pdev->revision == 0x87) &&
657 ((adev->pdev->device == 0x6823) ||
658 (adev->pdev->device == 0x682b)))) {
659 info->is_kicker = true;
660 strcpy(fw_name, "radeon/verde_k_smc.bin");
661 } else {
662 strcpy(fw_name, "radeon/verde_smc.bin");
663 }
664 break;
665 case CHIP_OLAND:
666 if (((adev->pdev->revision == 0x81) &&
667 ((adev->pdev->device == 0x6600) ||
668 (adev->pdev->device == 0x6604) ||
669 (adev->pdev->device == 0x6605) ||
670 (adev->pdev->device == 0x6610))) ||
671 ((adev->pdev->revision == 0x83) &&
672 (adev->pdev->device == 0x6610))) {
673 info->is_kicker = true;
674 strcpy(fw_name, "radeon/oland_k_smc.bin");
675 } else {
676 strcpy(fw_name, "radeon/oland_smc.bin");
677 }
678 break;
679 case CHIP_HAINAN:
680 if (((adev->pdev->revision == 0x81) &&
681 (adev->pdev->device == 0x6660)) ||
682 ((adev->pdev->revision == 0x83) &&
683 ((adev->pdev->device == 0x6660) ||
684 (adev->pdev->device == 0x6663) ||
685 (adev->pdev->device == 0x6665) ||
686 (adev->pdev->device == 0x6667)))) {
687 info->is_kicker = true;
688 strcpy(fw_name, "radeon/hainan_k_smc.bin");
689 } else if ((adev->pdev->revision == 0xc3) &&
690 (adev->pdev->device == 0x6665)) {
691 info->is_kicker = true;
692 strcpy(fw_name, "radeon/banks_k_2_smc.bin");
693 } else {
694 strcpy(fw_name, "radeon/hainan_smc.bin");
695 }
696 break;
697 case CHIP_BONAIRE:
698 if ((adev->pdev->revision == 0x80) ||
699 (adev->pdev->revision == 0x81) ||
700 (adev->pdev->device == 0x665f)) {
701 info->is_kicker = true;
702 strcpy(fw_name, "radeon/bonaire_k_smc.bin");
703 } else {
704 strcpy(fw_name, "radeon/bonaire_smc.bin");
705 }
706 break;
707 case CHIP_HAWAII:
708 if (adev->pdev->revision == 0x80) {
709 info->is_kicker = true;
710 strcpy(fw_name, "radeon/hawaii_k_smc.bin");
711 } else {
712 strcpy(fw_name, "radeon/hawaii_smc.bin");
713 }
714 break;
678 case CHIP_TOPAZ: 715 case CHIP_TOPAZ:
679 if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || 716 if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
680 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || 717 ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
@@ -838,6 +875,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
838 case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID: 875 case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
839 sys_info->value = adev->pdev->subsystem_vendor; 876 sys_info->value = adev->pdev->subsystem_vendor;
840 break; 877 break;
878 case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN:
879 sys_info->value = adev->pdev->devfn;
880 break;
841 default: 881 default:
842 return -ENODEV; 882 return -ENODEV;
843 } 883 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 8d1cf2d3e663..f51b41f094ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -346,10 +346,8 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector)
346{ 346{
347 struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); 347 struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
348 348
349 if (amdgpu_connector->edid) { 349 kfree(amdgpu_connector->edid);
350 kfree(amdgpu_connector->edid); 350 amdgpu_connector->edid = NULL;
351 amdgpu_connector->edid = NULL;
352 }
353} 351}
354 352
355static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) 353static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c05479ec825a..c6a214f1e991 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -246,7 +246,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
246 } 246 }
247 247
248 total_vram = adev->mc.real_vram_size - adev->vram_pin_size; 248 total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
249 used_vram = atomic64_read(&adev->vram_usage); 249 used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
250 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 250 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
251 251
252 spin_lock(&adev->mm_stats.lock); 252 spin_lock(&adev->mm_stats.lock);
@@ -292,7 +292,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
292 /* Do the same for visible VRAM if half of it is free */ 292 /* Do the same for visible VRAM if half of it is free */
293 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { 293 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
294 u64 total_vis_vram = adev->mc.visible_vram_size; 294 u64 total_vis_vram = adev->mc.visible_vram_size;
295 u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); 295 u64 used_vis_vram =
296 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
296 297
297 if (used_vis_vram < total_vis_vram) { 298 if (used_vis_vram < total_vis_vram) {
298 u64 free_vis_vram = total_vis_vram - used_vis_vram; 299 u64 free_vis_vram = total_vis_vram - used_vis_vram;
@@ -472,11 +473,16 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
472 return -EPERM; 473 return -EPERM;
473 474
474 /* Check if we have user pages and nobody bound the BO already */ 475 /* Check if we have user pages and nobody bound the BO already */
475 if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { 476 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
476 size_t size = sizeof(struct page *); 477 lobj->user_pages) {
477 478 amdgpu_ttm_placement_from_domain(bo,
478 size *= bo->tbo.ttm->num_pages; 479 AMDGPU_GEM_DOMAIN_CPU);
479 memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); 480 r = ttm_bo_validate(&bo->tbo, &bo->placement, true,
481 false);
482 if (r)
483 return r;
484 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
485 lobj->user_pages);
480 binding_userptr = true; 486 binding_userptr = true;
481 } 487 }
482 488
@@ -501,7 +507,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
501 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 507 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
502 struct amdgpu_bo_list_entry *e; 508 struct amdgpu_bo_list_entry *e;
503 struct list_head duplicates; 509 struct list_head duplicates;
504 bool need_mmap_lock = false;
505 unsigned i, tries = 10; 510 unsigned i, tries = 10;
506 int r; 511 int r;
507 512
@@ -509,9 +514,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
509 514
510 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 515 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
511 if (p->bo_list) { 516 if (p->bo_list) {
512 need_mmap_lock = p->bo_list->first_userptr !=
513 p->bo_list->num_entries;
514 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 517 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
518 if (p->bo_list->first_userptr != p->bo_list->num_entries)
519 p->mn = amdgpu_mn_get(p->adev);
515 } 520 }
516 521
517 INIT_LIST_HEAD(&duplicates); 522 INIT_LIST_HEAD(&duplicates);
@@ -520,9 +525,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
520 if (p->uf_entry.robj) 525 if (p->uf_entry.robj)
521 list_add(&p->uf_entry.tv.head, &p->validated); 526 list_add(&p->uf_entry.tv.head, &p->validated);
522 527
523 if (need_mmap_lock)
524 down_read(&current->mm->mmap_sem);
525
526 while (1) { 528 while (1) {
527 struct list_head need_pages; 529 struct list_head need_pages;
528 unsigned i; 530 unsigned i;
@@ -542,23 +544,25 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
542 INIT_LIST_HEAD(&need_pages); 544 INIT_LIST_HEAD(&need_pages);
543 for (i = p->bo_list->first_userptr; 545 for (i = p->bo_list->first_userptr;
544 i < p->bo_list->num_entries; ++i) { 546 i < p->bo_list->num_entries; ++i) {
547 struct amdgpu_bo *bo;
545 548
546 e = &p->bo_list->array[i]; 549 e = &p->bo_list->array[i];
550 bo = e->robj;
547 551
548 if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, 552 if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
549 &e->user_invalidated) && e->user_pages) { 553 &e->user_invalidated) && e->user_pages) {
550 554
551 /* We acquired a page array, but somebody 555 /* We acquired a page array, but somebody
552 * invalidated it. Free it and try again 556 * invalidated it. Free it and try again
553 */ 557 */
554 release_pages(e->user_pages, 558 release_pages(e->user_pages,
555 e->robj->tbo.ttm->num_pages, 559 bo->tbo.ttm->num_pages,
556 false); 560 false);
557 kvfree(e->user_pages); 561 kvfree(e->user_pages);
558 e->user_pages = NULL; 562 e->user_pages = NULL;
559 } 563 }
560 564
561 if (e->robj->tbo.ttm->state != tt_bound && 565 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
562 !e->user_pages) { 566 !e->user_pages) {
563 list_del(&e->tv.head); 567 list_del(&e->tv.head);
564 list_add(&e->tv.head, &need_pages); 568 list_add(&e->tv.head, &need_pages);
@@ -635,9 +639,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
635 639
636 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, 640 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
637 p->bytes_moved_vis); 641 p->bytes_moved_vis);
638 fpriv->vm.last_eviction_counter =
639 atomic64_read(&p->adev->num_evictions);
640
641 if (p->bo_list) { 642 if (p->bo_list) {
642 struct amdgpu_bo *gds = p->bo_list->gds_obj; 643 struct amdgpu_bo *gds = p->bo_list->gds_obj;
643 struct amdgpu_bo *gws = p->bo_list->gws_obj; 644 struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -673,16 +674,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
673 } 674 }
674 675
675error_validate: 676error_validate:
676 if (r) { 677 if (r)
677 amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
678 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 678 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
679 }
680 679
681error_free_pages: 680error_free_pages:
682 681
683 if (need_mmap_lock)
684 up_read(&current->mm->mmap_sem);
685
686 if (p->bo_list) { 682 if (p->bo_list) {
687 for (i = p->bo_list->first_userptr; 683 for (i = p->bo_list->first_userptr;
688 i < p->bo_list->num_entries; ++i) { 684 i < p->bo_list->num_entries; ++i) {
@@ -724,21 +720,14 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
724 * If error is set than unvalidate buffer, otherwise just free memory 720 * If error is set than unvalidate buffer, otherwise just free memory
725 * used by parsing context. 721 * used by parsing context.
726 **/ 722 **/
727static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) 723static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
724 bool backoff)
728{ 725{
729 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
730 unsigned i; 726 unsigned i;
731 727
732 if (!error) { 728 if (error && backoff)
733 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
734
735 ttm_eu_fence_buffer_objects(&parser->ticket,
736 &parser->validated,
737 parser->fence);
738 } else if (backoff) {
739 ttm_eu_backoff_reservation(&parser->ticket, 729 ttm_eu_backoff_reservation(&parser->ticket,
740 &parser->validated); 730 &parser->validated);
741 }
742 731
743 for (i = 0; i < parser->num_post_dep_syncobjs; i++) 732 for (i = 0; i < parser->num_post_dep_syncobjs; i++)
744 drm_syncobj_put(parser->post_dep_syncobjs[i]); 733 drm_syncobj_put(parser->post_dep_syncobjs[i]);
@@ -772,10 +761,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
772 if (r) 761 if (r)
773 return r; 762 return r;
774 763
775 r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
776 if (r)
777 return r;
778
779 r = amdgpu_vm_clear_freed(adev, vm, NULL); 764 r = amdgpu_vm_clear_freed(adev, vm, NULL);
780 if (r) 765 if (r)
781 return r; 766 return r;
@@ -791,7 +776,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
791 776
792 if (amdgpu_sriov_vf(adev)) { 777 if (amdgpu_sriov_vf(adev)) {
793 struct dma_fence *f; 778 struct dma_fence *f;
794 bo_va = vm->csa_bo_va; 779
780 bo_va = fpriv->csa_va;
795 BUG_ON(!bo_va); 781 BUG_ON(!bo_va);
796 r = amdgpu_vm_bo_update(adev, bo_va, false); 782 r = amdgpu_vm_bo_update(adev, bo_va, false);
797 if (r) 783 if (r)
@@ -828,7 +814,13 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
828 814
829 } 815 }
830 816
831 r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); 817 r = amdgpu_vm_handle_moved(adev, vm);
818 if (r)
819 return r;
820
821 r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
822 if (r)
823 return r;
832 824
833 if (amdgpu_vm_debug && p->bo_list) { 825 if (amdgpu_vm_debug && p->bo_list) {
834 /* Invalidate all BOs to test for userspace bugs */ 826 /* Invalidate all BOs to test for userspace bugs */
@@ -838,7 +830,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
838 if (!bo) 830 if (!bo)
839 continue; 831 continue;
840 832
841 amdgpu_vm_bo_invalidate(adev, bo); 833 amdgpu_vm_bo_invalidate(adev, bo, false);
842 } 834 }
843 } 835 }
844 836
@@ -863,7 +855,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
863 } 855 }
864 856
865 if (p->job->vm) { 857 if (p->job->vm) {
866 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); 858 p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
867 859
868 r = amdgpu_bo_vm_update_pte(p); 860 r = amdgpu_bo_vm_update_pte(p);
869 if (r) 861 if (r)
@@ -931,11 +923,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
931 uint64_t offset; 923 uint64_t offset;
932 uint8_t *kptr; 924 uint8_t *kptr;
933 925
934 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, 926 r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
935 &aobj); 927 &aobj, &m);
936 if (!aobj) { 928 if (r) {
937 DRM_ERROR("IB va_start is invalid\n"); 929 DRM_ERROR("IB va_start is invalid\n");
938 return -EINVAL; 930 return r;
939 } 931 }
940 932
941 if ((chunk_ib->va_start + chunk_ib->ib_bytes) > 933 if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
@@ -1038,7 +1030,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1038{ 1030{
1039 int r; 1031 int r;
1040 struct dma_fence *fence; 1032 struct dma_fence *fence;
1041 r = drm_syncobj_fence_get(p->filp, handle, &fence); 1033 r = drm_syncobj_find_fence(p->filp, handle, &fence);
1042 if (r) 1034 if (r)
1043 return r; 1035 return r;
1044 1036
@@ -1082,6 +1074,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1082 GFP_KERNEL); 1074 GFP_KERNEL);
1083 p->num_post_dep_syncobjs = 0; 1075 p->num_post_dep_syncobjs = 0;
1084 1076
1077 if (!p->post_dep_syncobjs)
1078 return -ENOMEM;
1079
1085 for (i = 0; i < num_deps; ++i) { 1080 for (i = 0; i < num_deps; ++i) {
1086 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); 1081 p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
1087 if (!p->post_dep_syncobjs[i]) 1082 if (!p->post_dep_syncobjs[i])
@@ -1133,14 +1128,31 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1133 struct amdgpu_ring *ring = p->job->ring; 1128 struct amdgpu_ring *ring = p->job->ring;
1134 struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; 1129 struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
1135 struct amdgpu_job *job; 1130 struct amdgpu_job *job;
1131 unsigned i;
1132 uint64_t seq;
1133
1136 int r; 1134 int r;
1137 1135
1136 amdgpu_mn_lock(p->mn);
1137 if (p->bo_list) {
1138 for (i = p->bo_list->first_userptr;
1139 i < p->bo_list->num_entries; ++i) {
1140 struct amdgpu_bo *bo = p->bo_list->array[i].robj;
1141
1142 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
1143 amdgpu_mn_unlock(p->mn);
1144 return -ERESTARTSYS;
1145 }
1146 }
1147 }
1148
1138 job = p->job; 1149 job = p->job;
1139 p->job = NULL; 1150 p->job = NULL;
1140 1151
1141 r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); 1152 r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
1142 if (r) { 1153 if (r) {
1143 amdgpu_job_free(job); 1154 amdgpu_job_free(job);
1155 amdgpu_mn_unlock(p->mn);
1144 return r; 1156 return r;
1145 } 1157 }
1146 1158
@@ -1148,15 +1160,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1148 job->fence_ctx = entity->fence_context; 1160 job->fence_ctx = entity->fence_context;
1149 p->fence = dma_fence_get(&job->base.s_fence->finished); 1161 p->fence = dma_fence_get(&job->base.s_fence->finished);
1150 1162
1163 r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
1164 if (r) {
1165 dma_fence_put(p->fence);
1166 dma_fence_put(&job->base.s_fence->finished);
1167 amdgpu_job_free(job);
1168 amdgpu_mn_unlock(p->mn);
1169 return r;
1170 }
1171
1151 amdgpu_cs_post_dependencies(p); 1172 amdgpu_cs_post_dependencies(p);
1152 1173
1153 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); 1174 cs->out.handle = seq;
1154 job->uf_sequence = cs->out.handle; 1175 job->uf_sequence = seq;
1176
1155 amdgpu_job_free_resources(job); 1177 amdgpu_job_free_resources(job);
1156 amdgpu_cs_parser_fini(p, 0, true);
1157 1178
1158 trace_amdgpu_cs_ioctl(job); 1179 trace_amdgpu_cs_ioctl(job);
1159 amd_sched_entity_push_job(&job->base); 1180 amd_sched_entity_push_job(&job->base);
1181
1182 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1183 amdgpu_mn_unlock(p->mn);
1184
1160 return 0; 1185 return 0;
1161} 1186}
1162 1187
@@ -1211,10 +1236,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1211 goto out; 1236 goto out;
1212 1237
1213 r = amdgpu_cs_submit(&parser, cs); 1238 r = amdgpu_cs_submit(&parser, cs);
1214 if (r)
1215 goto out;
1216 1239
1217 return 0;
1218out: 1240out:
1219 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 1241 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1220 return r; 1242 return r;
@@ -1387,6 +1409,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1387 array[i] = fence; 1409 array[i] = fence;
1388 } else { /* NULL, the fence has been already signaled */ 1410 } else { /* NULL, the fence has been already signaled */
1389 r = 1; 1411 r = 1;
1412 first = i;
1390 goto out; 1413 goto out;
1391 } 1414 }
1392 } 1415 }
@@ -1466,78 +1489,36 @@ err_free_fences:
1466 * virtual memory address. Returns allocation structure when found, NULL 1489 * virtual memory address. Returns allocation structure when found, NULL
1467 * otherwise. 1490 * otherwise.
1468 */ 1491 */
1469struct amdgpu_bo_va_mapping * 1492int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1470amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 1493 uint64_t addr, struct amdgpu_bo **bo,
1471 uint64_t addr, struct amdgpu_bo **bo) 1494 struct amdgpu_bo_va_mapping **map)
1472{ 1495{
1496 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1497 struct amdgpu_vm *vm = &fpriv->vm;
1473 struct amdgpu_bo_va_mapping *mapping; 1498 struct amdgpu_bo_va_mapping *mapping;
1474 unsigned i; 1499 int r;
1475
1476 if (!parser->bo_list)
1477 return NULL;
1478 1500
1479 addr /= AMDGPU_GPU_PAGE_SIZE; 1501 addr /= AMDGPU_GPU_PAGE_SIZE;
1480 1502
1481 for (i = 0; i < parser->bo_list->num_entries; i++) { 1503 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1482 struct amdgpu_bo_list_entry *lobj; 1504 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1483 1505 return -EINVAL;
1484 lobj = &parser->bo_list->array[i];
1485 if (!lobj->bo_va)
1486 continue;
1487
1488 list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
1489 if (mapping->start > addr ||
1490 addr > mapping->last)
1491 continue;
1492
1493 *bo = lobj->bo_va->bo;
1494 return mapping;
1495 }
1496
1497 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
1498 if (mapping->start > addr ||
1499 addr > mapping->last)
1500 continue;
1501 1506
1502 *bo = lobj->bo_va->bo; 1507 *bo = mapping->bo_va->base.bo;
1503 return mapping; 1508 *map = mapping;
1504 }
1505 }
1506 1509
1507 return NULL; 1510 /* Double check that the BO is reserved by this CS */
1508} 1511 if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
1512 return -EINVAL;
1509 1513
1510/** 1514 r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
1511 * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM 1515 if (unlikely(r))
1512 * 1516 return r;
1513 * @parser: command submission parser context
1514 *
1515 * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM.
1516 */
1517int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
1518{
1519 unsigned i;
1520 int r;
1521 1517
1522 if (!parser->bo_list) 1518 if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
1523 return 0; 1519 return 0;
1524 1520
1525 for (i = 0; i < parser->bo_list->num_entries; i++) { 1521 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1526 struct amdgpu_bo *bo = parser->bo_list->array[i].robj; 1522 amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
1527 1523 return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false);
1528 r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
1529 if (unlikely(r))
1530 return r;
1531
1532 if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
1533 continue;
1534
1535 bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1536 amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains);
1537 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
1538 if (unlikely(r))
1539 return r;
1540 }
1541
1542 return 0;
1543} 1524}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a11e44340b23..75c933b1a432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -246,8 +246,8 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
246 return 0; 246 return 0;
247} 247}
248 248
249uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, 249int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
250 struct dma_fence *fence) 250 struct dma_fence *fence, uint64_t* handler)
251{ 251{
252 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; 252 struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
253 uint64_t seq = cring->sequence; 253 uint64_t seq = cring->sequence;
@@ -258,9 +258,9 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
258 other = cring->fences[idx]; 258 other = cring->fences[idx];
259 if (other) { 259 if (other) {
260 signed long r; 260 signed long r;
261 r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); 261 r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
262 if (r < 0) 262 if (r < 0)
263 DRM_ERROR("Error (%ld) waiting for fence!\n", r); 263 return r;
264 } 264 }
265 265
266 dma_fence_get(fence); 266 dma_fence_get(fence);
@@ -271,8 +271,10 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
271 spin_unlock(&ctx->ring_lock); 271 spin_unlock(&ctx->ring_lock);
272 272
273 dma_fence_put(other); 273 dma_fence_put(other);
274 if (handler)
275 *handler = seq;
274 276
275 return seq; 277 return 0;
276} 278}
277 279
278struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, 280struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a6f6cb0f2e02..3e84ddf9e3b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -65,6 +65,7 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
65static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); 65static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
66static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); 66static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
67static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); 67static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev);
68static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev);
68 69
69static const char *amdgpu_asic_name[] = { 70static const char *amdgpu_asic_name[] = {
70 "TAHITI", 71 "TAHITI",
@@ -402,6 +403,15 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev)
402 */ 403 */
403static int amdgpu_doorbell_init(struct amdgpu_device *adev) 404static int amdgpu_doorbell_init(struct amdgpu_device *adev)
404{ 405{
406 /* No doorbell on SI hardware generation */
407 if (adev->asic_type < CHIP_BONAIRE) {
408 adev->doorbell.base = 0;
409 adev->doorbell.size = 0;
410 adev->doorbell.num_doorbells = 0;
411 adev->doorbell.ptr = NULL;
412 return 0;
413 }
414
405 /* doorbell bar mapping */ 415 /* doorbell bar mapping */
406 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 416 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
407 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 417 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
@@ -887,6 +897,20 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
887 return r; 897 return r;
888} 898}
889 899
900static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
901 struct device_attribute *attr,
902 char *buf)
903{
904 struct drm_device *ddev = dev_get_drvdata(dev);
905 struct amdgpu_device *adev = ddev->dev_private;
906 struct atom_context *ctx = adev->mode_info.atom_context;
907
908 return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version);
909}
910
911static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
912 NULL);
913
890/** 914/**
891 * amdgpu_atombios_fini - free the driver info and callbacks for atombios 915 * amdgpu_atombios_fini - free the driver info and callbacks for atombios
892 * 916 *
@@ -906,6 +930,7 @@ static void amdgpu_atombios_fini(struct amdgpu_device *adev)
906 adev->mode_info.atom_context = NULL; 930 adev->mode_info.atom_context = NULL;
907 kfree(adev->mode_info.atom_card_info); 931 kfree(adev->mode_info.atom_card_info);
908 adev->mode_info.atom_card_info = NULL; 932 adev->mode_info.atom_card_info = NULL;
933 device_remove_file(adev->dev, &dev_attr_vbios_version);
909} 934}
910 935
911/** 936/**
@@ -922,6 +947,7 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev)
922{ 947{
923 struct card_info *atom_card_info = 948 struct card_info *atom_card_info =
924 kzalloc(sizeof(struct card_info), GFP_KERNEL); 949 kzalloc(sizeof(struct card_info), GFP_KERNEL);
950 int ret;
925 951
926 if (!atom_card_info) 952 if (!atom_card_info)
927 return -ENOMEM; 953 return -ENOMEM;
@@ -958,6 +984,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev)
958 amdgpu_atombios_scratch_regs_init(adev); 984 amdgpu_atombios_scratch_regs_init(adev);
959 amdgpu_atombios_allocate_fb_scratch(adev); 985 amdgpu_atombios_allocate_fb_scratch(adev);
960 } 986 }
987
988 ret = device_create_file(adev->dev, &dev_attr_vbios_version);
989 if (ret) {
990 DRM_ERROR("Failed to create device file for VBIOS version\n");
991 return ret;
992 }
993
961 return 0; 994 return 0;
962} 995}
963 996
@@ -1062,11 +1095,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
1062 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1095 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1063 } 1096 }
1064 1097
1065 if (amdgpu_gart_size < 32) { 1098 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1066 /* gart size must be greater or equal to 32M */ 1099 /* gart size must be greater or equal to 32M */
1067 dev_warn(adev->dev, "gart size (%d) too small\n", 1100 dev_warn(adev->dev, "gart size (%d) too small\n",
1068 amdgpu_gart_size); 1101 amdgpu_gart_size);
1069 amdgpu_gart_size = 32; 1102 amdgpu_gart_size = -1;
1070 } 1103 }
1071 1104
1072 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1105 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
@@ -1076,6 +1109,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
1076 amdgpu_gtt_size = -1; 1109 amdgpu_gtt_size = -1;
1077 } 1110 }
1078 1111
1112 /* valid range is between 4 and 9 inclusive */
1113 if (amdgpu_vm_fragment_size != -1 &&
1114 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1115 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1116 amdgpu_vm_fragment_size = -1;
1117 }
1118
1079 amdgpu_check_vm_size(adev); 1119 amdgpu_check_vm_size(adev);
1080 1120
1081 amdgpu_check_block_size(adev); 1121 amdgpu_check_block_size(adev);
@@ -1750,10 +1790,8 @@ static int amdgpu_fini(struct amdgpu_device *adev)
1750 adev->ip_blocks[i].status.late_initialized = false; 1790 adev->ip_blocks[i].status.late_initialized = false;
1751 } 1791 }
1752 1792
1753 if (amdgpu_sriov_vf(adev)) { 1793 if (amdgpu_sriov_vf(adev))
1754 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
1755 amdgpu_virt_release_full_gpu(adev, false); 1794 amdgpu_virt_release_full_gpu(adev, false);
1756 }
1757 1795
1758 return 0; 1796 return 0;
1759} 1797}
@@ -2044,9 +2082,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2044 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 2082 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2045 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); 2083 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2046 2084
2047 if (adev->asic_type >= CHIP_BONAIRE) 2085 /* doorbell bar mapping */
2048 /* doorbell bar mapping */ 2086 amdgpu_doorbell_init(adev);
2049 amdgpu_doorbell_init(adev);
2050 2087
2051 /* io port mapping */ 2088 /* io port mapping */
2052 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 2089 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
@@ -2194,6 +2231,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2194 if (r) 2231 if (r)
2195 DRM_ERROR("registering firmware debugfs failed (%d).\n", r); 2232 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
2196 2233
2234 r = amdgpu_debugfs_vbios_dump_init(adev);
2235 if (r)
2236 DRM_ERROR("Creating vbios dump debugfs failed (%d).\n", r);
2237
2197 if ((amdgpu_testing & 1)) { 2238 if ((amdgpu_testing & 1)) {
2198 if (adev->accel_working) 2239 if (adev->accel_working)
2199 amdgpu_test_moves(adev); 2240 amdgpu_test_moves(adev);
@@ -2269,8 +2310,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2269 adev->rio_mem = NULL; 2310 adev->rio_mem = NULL;
2270 iounmap(adev->rmmio); 2311 iounmap(adev->rmmio);
2271 adev->rmmio = NULL; 2312 adev->rmmio = NULL;
2272 if (adev->asic_type >= CHIP_BONAIRE) 2313 amdgpu_doorbell_fini(adev);
2273 amdgpu_doorbell_fini(adev);
2274 amdgpu_debugfs_regs_cleanup(adev); 2314 amdgpu_debugfs_regs_cleanup(adev);
2275} 2315}
2276 2316
@@ -2539,7 +2579,8 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
2539 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 2579 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
2540 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 2580 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
2541 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 2581 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
2542 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)) { 2582 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
2583 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2543 if (adev->ip_blocks[i].status.hang) { 2584 if (adev->ip_blocks[i].status.hang) {
2544 DRM_INFO("Some block need full reset!\n"); 2585 DRM_INFO("Some block need full reset!\n");
2545 return true; 2586 return true;
@@ -2615,12 +2656,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
2615 goto err; 2656 goto err;
2616 } 2657 }
2617 2658
2618 r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
2619 if (r) {
2620 DRM_ERROR("%p bind failed\n", bo->shadow);
2621 goto err;
2622 }
2623
2624 r = amdgpu_bo_restore_from_shadow(adev, ring, bo, 2659 r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
2625 NULL, fence, true); 2660 NULL, fence, true);
2626 if (r) { 2661 if (r) {
@@ -2653,7 +2688,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
2653 2688
2654 mutex_lock(&adev->virt.lock_reset); 2689 mutex_lock(&adev->virt.lock_reset);
2655 atomic_inc(&adev->gpu_reset_counter); 2690 atomic_inc(&adev->gpu_reset_counter);
2656 adev->gfx.in_reset = true; 2691 adev->in_sriov_reset = true;
2657 2692
2658 /* block TTM */ 2693 /* block TTM */
2659 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 2694 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
@@ -2764,7 +2799,7 @@ give_up_reset:
2764 dev_info(adev->dev, "GPU reset successed!\n"); 2799 dev_info(adev->dev, "GPU reset successed!\n");
2765 } 2800 }
2766 2801
2767 adev->gfx.in_reset = false; 2802 adev->in_sriov_reset = false;
2768 mutex_unlock(&adev->virt.lock_reset); 2803 mutex_unlock(&adev->virt.lock_reset);
2769 return r; 2804 return r;
2770} 2805}
@@ -3462,10 +3497,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
3462 3497
3463 valuesize = sizeof(values); 3498 valuesize = sizeof(values);
3464 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) 3499 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor)
3465 r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &values[0], &valuesize); 3500 r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
3466 else if (adev->pm.funcs && adev->pm.funcs->read_sensor)
3467 r = adev->pm.funcs->read_sensor(adev, idx, &values[0],
3468 &valuesize);
3469 else 3501 else
3470 return -EINVAL; 3502 return -EINVAL;
3471 3503
@@ -3753,6 +3785,28 @@ int amdgpu_debugfs_init(struct drm_minor *minor)
3753{ 3785{
3754 return 0; 3786 return 0;
3755} 3787}
3788
3789static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
3790{
3791 struct drm_info_node *node = (struct drm_info_node *) m->private;
3792 struct drm_device *dev = node->minor->dev;
3793 struct amdgpu_device *adev = dev->dev_private;
3794
3795 seq_write(m, adev->bios, adev->bios_size);
3796 return 0;
3797}
3798
3799static const struct drm_info_list amdgpu_vbios_dump_list[] = {
3800 {"amdgpu_vbios",
3801 amdgpu_debugfs_get_vbios_dump,
3802 0, NULL},
3803};
3804
3805static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev)
3806{
3807 return amdgpu_debugfs_add_files(adev,
3808 amdgpu_vbios_dump_list, 1);
3809}
3756#else 3810#else
3757static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) 3811static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev)
3758{ 3812{
@@ -3762,5 +3816,9 @@ static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
3762{ 3816{
3763 return 0; 3817 return 0;
3764} 3818}
3819static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev)
3820{
3821 return 0;
3822}
3765static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } 3823static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
3766#endif 3824#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 1cb52fd19060..e997ebbe43ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -960,8 +960,10 @@ u8 amdgpu_encode_pci_lane_width(u32 lanes)
960} 960}
961 961
962struct amd_vce_state* 962struct amd_vce_state*
963amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx) 963amdgpu_get_vce_clock_state(void *handle, u32 idx)
964{ 964{
965 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
966
965 if (idx < adev->pm.dpm.num_of_vce_states) 967 if (idx < adev->pm.dpm.num_of_vce_states)
966 return &adev->pm.dpm.vce_states[idx]; 968 return &adev->pm.dpm.vce_states[idx];
967 969
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 8c96a4caa715..f79f9ea58b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -241,177 +241,119 @@ enum amdgpu_pcie_gen {
241 AMDGPU_PCIE_GEN_INVALID = 0xffff 241 AMDGPU_PCIE_GEN_INVALID = 0xffff
242}; 242};
243 243
244struct amdgpu_dpm_funcs { 244#define amdgpu_dpm_pre_set_power_state(adev) \
245 int (*get_temperature)(struct amdgpu_device *adev); 245 ((adev)->powerplay.pp_funcs->pre_set_power_state((adev)->powerplay.pp_handle))
246 int (*pre_set_power_state)(struct amdgpu_device *adev); 246
247 int (*set_power_state)(struct amdgpu_device *adev); 247#define amdgpu_dpm_set_power_state(adev) \
248 void (*post_set_power_state)(struct amdgpu_device *adev); 248 ((adev)->powerplay.pp_funcs->set_power_state((adev)->powerplay.pp_handle))
249 void (*display_configuration_changed)(struct amdgpu_device *adev); 249
250 u32 (*get_sclk)(struct amdgpu_device *adev, bool low); 250#define amdgpu_dpm_post_set_power_state(adev) \
251 u32 (*get_mclk)(struct amdgpu_device *adev, bool low); 251 ((adev)->powerplay.pp_funcs->post_set_power_state((adev)->powerplay.pp_handle))
252 void (*print_power_state)(struct amdgpu_device *adev, struct amdgpu_ps *ps); 252
253 void (*debugfs_print_current_performance_level)(struct amdgpu_device *adev, struct seq_file *m); 253#define amdgpu_dpm_display_configuration_changed(adev) \
254 int (*force_performance_level)(struct amdgpu_device *adev, enum amd_dpm_forced_level level); 254 ((adev)->powerplay.pp_funcs->display_configuration_changed((adev)->powerplay.pp_handle))
255 bool (*vblank_too_short)(struct amdgpu_device *adev);
256 void (*powergate_uvd)(struct amdgpu_device *adev, bool gate);
257 void (*powergate_vce)(struct amdgpu_device *adev, bool gate);
258 void (*enable_bapm)(struct amdgpu_device *adev, bool enable);
259 void (*set_fan_control_mode)(struct amdgpu_device *adev, u32 mode);
260 u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
261 int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
262 int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
263 int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
264 int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
265 int (*get_sclk_od)(struct amdgpu_device *adev);
266 int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
267 int (*get_mclk_od)(struct amdgpu_device *adev);
268 int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
269 int (*check_state_equal)(struct amdgpu_device *adev,
270 struct amdgpu_ps *cps,
271 struct amdgpu_ps *rps,
272 bool *equal);
273 int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value,
274 int *size);
275
276 struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx);
277 int (*reset_power_profile_state)(struct amdgpu_device *adev,
278 struct amd_pp_profile *request);
279 int (*get_power_profile_state)(struct amdgpu_device *adev,
280 struct amd_pp_profile *query);
281 int (*set_power_profile_state)(struct amdgpu_device *adev,
282 struct amd_pp_profile *request);
283 int (*switch_power_profile)(struct amdgpu_device *adev,
284 enum amd_pp_profile_type type);
285};
286 255
287#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) 256#define amdgpu_dpm_print_power_state(adev, ps) \
288#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev)) 257 ((adev)->powerplay.pp_funcs->print_power_state((adev)->powerplay.pp_handle, (ps)))
289#define amdgpu_dpm_post_set_power_state(adev) (adev)->pm.funcs->post_set_power_state((adev)) 258
290#define amdgpu_dpm_display_configuration_changed(adev) (adev)->pm.funcs->display_configuration_changed((adev)) 259#define amdgpu_dpm_vblank_too_short(adev) \
291#define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps)) 260 ((adev)->powerplay.pp_funcs->vblank_too_short((adev)->powerplay.pp_handle))
292#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) 261
293#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) 262#define amdgpu_dpm_enable_bapm(adev, e) \
263 ((adev)->powerplay.pp_funcs->enable_bapm((adev)->powerplay.pp_handle, (e)))
294 264
295#define amdgpu_dpm_read_sensor(adev, idx, value, size) \ 265#define amdgpu_dpm_read_sensor(adev, idx, value, size) \
296 ((adev)->pp_enabled ? \ 266 ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
297 (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value), (size)) : \
298 (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size)))
299 267
300#define amdgpu_dpm_get_temperature(adev) \ 268#define amdgpu_dpm_get_temperature(adev) \
301 ((adev)->pp_enabled ? \ 269 ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle))
302 (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
303 (adev)->pm.funcs->get_temperature((adev)))
304 270
305#define amdgpu_dpm_set_fan_control_mode(adev, m) \ 271#define amdgpu_dpm_set_fan_control_mode(adev, m) \
306 ((adev)->pp_enabled ? \ 272 ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
307 (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
308 (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
309 273
310#define amdgpu_dpm_get_fan_control_mode(adev) \ 274#define amdgpu_dpm_get_fan_control_mode(adev) \
311 ((adev)->pp_enabled ? \ 275 ((adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle))
312 (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
313 (adev)->pm.funcs->get_fan_control_mode((adev)))
314 276
315#define amdgpu_dpm_set_fan_speed_percent(adev, s) \ 277#define amdgpu_dpm_set_fan_speed_percent(adev, s) \
316 ((adev)->pp_enabled ? \ 278 ((adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)))
317 (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
318 (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
319 279
320#define amdgpu_dpm_get_fan_speed_percent(adev, s) \ 280#define amdgpu_dpm_get_fan_speed_percent(adev, s) \
321 ((adev)->pp_enabled ? \ 281 ((adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)))
322 (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
323 (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
324 282
325#define amdgpu_dpm_get_fan_speed_rpm(adev, s) \ 283#define amdgpu_dpm_get_fan_speed_rpm(adev, s) \
326 ((adev)->pp_enabled ? \ 284 ((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
327 (adev)->powerplay.pp_funcs->get_fan_speed_rpm((adev)->powerplay.pp_handle, (s)) : \
328 -EINVAL)
329 285
330#define amdgpu_dpm_get_sclk(adev, l) \ 286#define amdgpu_dpm_get_sclk(adev, l) \
331 ((adev)->pp_enabled ? \ 287 ((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)))
332 (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
333 (adev)->pm.funcs->get_sclk((adev), (l)))
334 288
335#define amdgpu_dpm_get_mclk(adev, l) \ 289#define amdgpu_dpm_get_mclk(adev, l) \
336 ((adev)->pp_enabled ? \ 290 ((adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)))
337 (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
338 (adev)->pm.funcs->get_mclk((adev), (l)))
339
340 291
341#define amdgpu_dpm_force_performance_level(adev, l) \ 292#define amdgpu_dpm_force_performance_level(adev, l) \
342 ((adev)->pp_enabled ? \ 293 ((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)))
343 (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
344 (adev)->pm.funcs->force_performance_level((adev), (l)))
345 294
346#define amdgpu_dpm_powergate_uvd(adev, g) \ 295#define amdgpu_dpm_powergate_uvd(adev, g) \
347 ((adev)->pp_enabled ? \ 296 ((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)))
348 (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
349 (adev)->pm.funcs->powergate_uvd((adev), (g)))
350 297
351#define amdgpu_dpm_powergate_vce(adev, g) \ 298#define amdgpu_dpm_powergate_vce(adev, g) \
352 ((adev)->pp_enabled ? \ 299 ((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)))
353 (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
354 (adev)->pm.funcs->powergate_vce((adev), (g)))
355 300
356#define amdgpu_dpm_get_current_power_state(adev) \ 301#define amdgpu_dpm_get_current_power_state(adev) \
357 (adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle) 302 ((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
358 303
359#define amdgpu_dpm_get_pp_num_states(adev, data) \ 304#define amdgpu_dpm_get_pp_num_states(adev, data) \
360 (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data) 305 ((adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data))
361 306
362#define amdgpu_dpm_get_pp_table(adev, table) \ 307#define amdgpu_dpm_get_pp_table(adev, table) \
363 (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table) 308 ((adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table))
364 309
365#define amdgpu_dpm_set_pp_table(adev, buf, size) \ 310#define amdgpu_dpm_set_pp_table(adev, buf, size) \
366 (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size) 311 ((adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size))
367 312
368#define amdgpu_dpm_print_clock_levels(adev, type, buf) \ 313#define amdgpu_dpm_print_clock_levels(adev, type, buf) \
369 (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf) 314 ((adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf))
370 315
371#define amdgpu_dpm_force_clock_level(adev, type, level) \ 316#define amdgpu_dpm_force_clock_level(adev, type, level) \
372 (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) 317 ((adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level))
373 318
374#define amdgpu_dpm_get_sclk_od(adev) \ 319#define amdgpu_dpm_get_sclk_od(adev) \
375 (adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle) 320 ((adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle))
376 321
377#define amdgpu_dpm_set_sclk_od(adev, value) \ 322#define amdgpu_dpm_set_sclk_od(adev, value) \
378 (adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value) 323 ((adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value))
379 324
380#define amdgpu_dpm_get_mclk_od(adev) \ 325#define amdgpu_dpm_get_mclk_od(adev) \
381 ((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle)) 326 ((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
382 327
383#define amdgpu_dpm_set_mclk_od(adev, value) \ 328#define amdgpu_dpm_set_mclk_od(adev, value) \
384 ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) 329 ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
385 330
386#define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ 331#define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \
387 (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) 332 ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output))
388 333
389#define amgdpu_dpm_check_state_equal(adev, cps, rps, equal) (adev)->pm.funcs->check_state_equal((adev), (cps),(rps),(equal)) 334#define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
335 ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
390 336
391#define amdgpu_dpm_get_vce_clock_state(adev, i) \ 337#define amdgpu_dpm_get_vce_clock_state(adev, i) \
392 ((adev)->pp_enabled ? \ 338 ((adev)->powerplay.pp_funcs->get_vce_clock_state((adev)->powerplay.pp_handle, (i)))
393 (adev)->powerplay.pp_funcs->get_vce_clock_state((adev)->powerplay.pp_handle, (i)) : \
394 (adev)->pm.funcs->get_vce_clock_state((adev), (i)))
395 339
396#define amdgpu_dpm_get_performance_level(adev) \ 340#define amdgpu_dpm_get_performance_level(adev) \
397 ((adev)->pp_enabled ? \ 341 ((adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle))
398 (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) : \
399 (adev)->pm.dpm.forced_level)
400 342
401#define amdgpu_dpm_reset_power_profile_state(adev, request) \ 343#define amdgpu_dpm_reset_power_profile_state(adev, request) \
402 ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ 344 ((adev)->powerplay.pp_funcs->reset_power_profile_state(\
403 (adev)->powerplay.pp_handle, request)) 345 (adev)->powerplay.pp_handle, request))
404 346
405#define amdgpu_dpm_get_power_profile_state(adev, query) \ 347#define amdgpu_dpm_get_power_profile_state(adev, query) \
406 ((adev)->powerplay.pp_funcs->get_power_profile_state(\ 348 ((adev)->powerplay.pp_funcs->get_power_profile_state(\
407 (adev)->powerplay.pp_handle, query)) 349 (adev)->powerplay.pp_handle, query))
408 350
409#define amdgpu_dpm_set_power_profile_state(adev, request) \ 351#define amdgpu_dpm_set_power_profile_state(adev, request) \
410 ((adev)->powerplay.pp_funcs->set_power_profile_state(\ 352 ((adev)->powerplay.pp_funcs->set_power_profile_state(\
411 (adev)->powerplay.pp_handle, request)) 353 (adev)->powerplay.pp_handle, request))
412 354
413#define amdgpu_dpm_switch_power_profile(adev, type) \ 355#define amdgpu_dpm_switch_power_profile(adev, type) \
414 ((adev)->powerplay.pp_funcs->switch_power_profile(\ 356 ((adev)->powerplay.pp_funcs->switch_power_profile(\
415 (adev)->powerplay.pp_handle, type)) 357 (adev)->powerplay.pp_handle, type))
416 358
417struct amdgpu_dpm { 359struct amdgpu_dpm {
@@ -485,7 +427,6 @@ struct amdgpu_pm {
485 struct amdgpu_dpm dpm; 427 struct amdgpu_dpm dpm;
486 const struct firmware *fw; /* SMC firmware */ 428 const struct firmware *fw; /* SMC firmware */
487 uint32_t fw_version; 429 uint32_t fw_version;
488 const struct amdgpu_dpm_funcs *funcs;
489 uint32_t pcie_gen_mask; 430 uint32_t pcie_gen_mask;
490 uint32_t pcie_mlw_mask; 431 uint32_t pcie_mlw_mask;
491 struct amd_pp_display_configuration pm_display_cfg;/* set by DAL */ 432 struct amd_pp_display_configuration pm_display_cfg;/* set by DAL */
@@ -551,6 +492,6 @@ u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
551u8 amdgpu_encode_pci_lane_width(u32 lanes); 492u8 amdgpu_encode_pci_lane_width(u32 lanes);
552 493
553struct amd_vce_state* 494struct amd_vce_state*
554amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx); 495amdgpu_get_vce_clock_state(void *handle, u32 idx);
555 496
556#endif 497#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5e9ce8a29669..4f98960e47f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -68,14 +68,16 @@
68 * - 3.16.0 - Add reserved vmid support 68 * - 3.16.0 - Add reserved vmid support
69 * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. 69 * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
70 * - 3.18.0 - Export gpu always on cu bitmap 70 * - 3.18.0 - Export gpu always on cu bitmap
71 * - 3.19.0 - Add support for UVD MJPEG decode
72 * - 3.20.0 - Add support for local BOs
71 */ 73 */
72#define KMS_DRIVER_MAJOR 3 74#define KMS_DRIVER_MAJOR 3
73#define KMS_DRIVER_MINOR 18 75#define KMS_DRIVER_MINOR 20
74#define KMS_DRIVER_PATCHLEVEL 0 76#define KMS_DRIVER_PATCHLEVEL 0
75 77
76int amdgpu_vram_limit = 0; 78int amdgpu_vram_limit = 0;
77int amdgpu_vis_vram_limit = 0; 79int amdgpu_vis_vram_limit = 0;
78unsigned amdgpu_gart_size = 256; 80int amdgpu_gart_size = -1; /* auto */
79int amdgpu_gtt_size = -1; /* auto */ 81int amdgpu_gtt_size = -1; /* auto */
80int amdgpu_moverate = -1; /* auto */ 82int amdgpu_moverate = -1; /* auto */
81int amdgpu_benchmarking = 0; 83int amdgpu_benchmarking = 0;
@@ -90,10 +92,11 @@ int amdgpu_dpm = -1;
90int amdgpu_fw_load_type = -1; 92int amdgpu_fw_load_type = -1;
91int amdgpu_aspm = -1; 93int amdgpu_aspm = -1;
92int amdgpu_runtime_pm = -1; 94int amdgpu_runtime_pm = -1;
93unsigned amdgpu_ip_block_mask = 0xffffffff; 95uint amdgpu_ip_block_mask = 0xffffffff;
94int amdgpu_bapm = -1; 96int amdgpu_bapm = -1;
95int amdgpu_deep_color = 0; 97int amdgpu_deep_color = 0;
96int amdgpu_vm_size = -1; 98int amdgpu_vm_size = -1;
99int amdgpu_vm_fragment_size = -1;
97int amdgpu_vm_block_size = -1; 100int amdgpu_vm_block_size = -1;
98int amdgpu_vm_fault_stop = 0; 101int amdgpu_vm_fault_stop = 0;
99int amdgpu_vm_debug = 0; 102int amdgpu_vm_debug = 0;
@@ -104,14 +107,14 @@ int amdgpu_sched_jobs = 32;
104int amdgpu_sched_hw_submission = 2; 107int amdgpu_sched_hw_submission = 2;
105int amdgpu_no_evict = 0; 108int amdgpu_no_evict = 0;
106int amdgpu_direct_gma_size = 0; 109int amdgpu_direct_gma_size = 0;
107unsigned amdgpu_pcie_gen_cap = 0; 110uint amdgpu_pcie_gen_cap = 0;
108unsigned amdgpu_pcie_lane_cap = 0; 111uint amdgpu_pcie_lane_cap = 0;
109unsigned amdgpu_cg_mask = 0xffffffff; 112uint amdgpu_cg_mask = 0xffffffff;
110unsigned amdgpu_pg_mask = 0xffffffff; 113uint amdgpu_pg_mask = 0xffffffff;
111unsigned amdgpu_sdma_phase_quantum = 32; 114uint amdgpu_sdma_phase_quantum = 32;
112char *amdgpu_disable_cu = NULL; 115char *amdgpu_disable_cu = NULL;
113char *amdgpu_virtual_display = NULL; 116char *amdgpu_virtual_display = NULL;
114unsigned amdgpu_pp_feature_mask = 0xffffffff; 117uint amdgpu_pp_feature_mask = 0xffffffff;
115int amdgpu_ngg = 0; 118int amdgpu_ngg = 0;
116int amdgpu_prim_buf_per_se = 0; 119int amdgpu_prim_buf_per_se = 0;
117int amdgpu_pos_buf_per_se = 0; 120int amdgpu_pos_buf_per_se = 0;
@@ -126,7 +129,7 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
126MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); 129MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
127module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); 130module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
128 131
129MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)"); 132MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
130module_param_named(gartsize, amdgpu_gart_size, uint, 0600); 133module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
131 134
132MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); 135MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
@@ -183,6 +186,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444);
183MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); 186MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
184module_param_named(vm_size, amdgpu_vm_size, int, 0444); 187module_param_named(vm_size, amdgpu_vm_size, int, 0444);
185 188
189MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
190module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
191
186MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); 192MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
187module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); 193module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
188 194
@@ -603,6 +609,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
603 609
604 drm_dev_unregister(dev); 610 drm_dev_unregister(dev);
605 drm_dev_unref(dev); 611 drm_dev_unref(dev);
612 pci_disable_device(pdev);
613 pci_set_drvdata(pdev, NULL);
606} 614}
607 615
608static void 616static void
@@ -847,6 +855,7 @@ static struct drm_driver kms_driver = {
847 .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, 855 .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
848 .gem_prime_vmap = amdgpu_gem_prime_vmap, 856 .gem_prime_vmap = amdgpu_gem_prime_vmap,
849 .gem_prime_vunmap = amdgpu_gem_prime_vunmap, 857 .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
858 .gem_prime_mmap = amdgpu_gem_prime_mmap,
850 859
851 .name = DRIVER_NAME, 860 .name = DRIVER_NAME,
852 .desc = DRIVER_DESC, 861 .desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 9afa9c097e1f..562930b17a6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -149,7 +149,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
149 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 149 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
150 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 150 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
151 AMDGPU_GEM_CREATE_VRAM_CLEARED, 151 AMDGPU_GEM_CREATE_VRAM_CLEARED,
152 true, &gobj); 152 true, NULL, &gobj);
153 if (ret) { 153 if (ret) {
154 pr_err("failed to allocate framebuffer (%d)\n", aligned_size); 154 pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
155 return -ENOMEM; 155 return -ENOMEM;
@@ -303,10 +303,10 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb
303 if (rfb->obj) { 303 if (rfb->obj) {
304 amdgpufb_destroy_pinned_object(rfb->obj); 304 amdgpufb_destroy_pinned_object(rfb->obj);
305 rfb->obj = NULL; 305 rfb->obj = NULL;
306 drm_framebuffer_unregister_private(&rfb->base);
307 drm_framebuffer_cleanup(&rfb->base);
306 } 308 }
307 drm_fb_helper_fini(&rfbdev->helper); 309 drm_fb_helper_fini(&rfbdev->helper);
308 drm_framebuffer_unregister_private(&rfb->base);
309 drm_framebuffer_cleanup(&rfb->base);
310 310
311 return 0; 311 return 0;
312} 312}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 94c1e2e8e34c..f4370081f6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -57,18 +57,6 @@
57 */ 57 */
58 58
59/** 59/**
60 * amdgpu_gart_set_defaults - set the default gart_size
61 *
62 * @adev: amdgpu_device pointer
63 *
64 * Set the default gart_size based on parameters and available VRAM.
65 */
66void amdgpu_gart_set_defaults(struct amdgpu_device *adev)
67{
68 adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20;
69}
70
71/**
72 * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table 60 * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
73 * 61 *
74 * @adev: amdgpu_device pointer 62 * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index d4cce6936200..afbe803b1a13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -56,7 +56,6 @@ struct amdgpu_gart {
56 const struct amdgpu_gart_funcs *gart_funcs; 56 const struct amdgpu_gart_funcs *gart_funcs;
57}; 57};
58 58
59void amdgpu_gart_set_defaults(struct amdgpu_device *adev);
60int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); 59int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
61void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); 60void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
62int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); 61int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 81127ffcefb2..b0d45c8e6bb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -44,11 +44,12 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
44} 44}
45 45
46int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, 46int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
47 int alignment, u32 initial_domain, 47 int alignment, u32 initial_domain,
48 u64 flags, bool kernel, 48 u64 flags, bool kernel,
49 struct drm_gem_object **obj) 49 struct reservation_object *resv,
50 struct drm_gem_object **obj)
50{ 51{
51 struct amdgpu_bo *robj; 52 struct amdgpu_bo *bo;
52 int r; 53 int r;
53 54
54 *obj = NULL; 55 *obj = NULL;
@@ -59,7 +60,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
59 60
60retry: 61retry:
61 r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, 62 r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
62 flags, NULL, NULL, 0, &robj); 63 flags, NULL, resv, 0, &bo);
63 if (r) { 64 if (r) {
64 if (r != -ERESTARTSYS) { 65 if (r != -ERESTARTSYS) {
65 if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { 66 if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -71,7 +72,7 @@ retry:
71 } 72 }
72 return r; 73 return r;
73 } 74 }
74 *obj = &robj->gem_base; 75 *obj = &bo->gem_base;
75 76
76 return 0; 77 return 0;
77} 78}
@@ -112,7 +113,17 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
112 struct amdgpu_fpriv *fpriv = file_priv->driver_priv; 113 struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
113 struct amdgpu_vm *vm = &fpriv->vm; 114 struct amdgpu_vm *vm = &fpriv->vm;
114 struct amdgpu_bo_va *bo_va; 115 struct amdgpu_bo_va *bo_va;
116 struct mm_struct *mm;
115 int r; 117 int r;
118
119 mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
120 if (mm && mm != current->mm)
121 return -EPERM;
122
123 if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
124 abo->tbo.resv != vm->root.base.bo->tbo.resv)
125 return -EPERM;
126
116 r = amdgpu_bo_reserve(abo, false); 127 r = amdgpu_bo_reserve(abo, false);
117 if (r) 128 if (r)
118 return r; 129 return r;
@@ -127,35 +138,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
127 return 0; 138 return 0;
128} 139}
129 140
130static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
131{
132 /* if anything is swapped out don't swap it in here,
133 just abort and wait for the next CS */
134 if (!amdgpu_bo_gpu_accessible(bo))
135 return -ERESTARTSYS;
136
137 if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
138 return -ERESTARTSYS;
139
140 return 0;
141}
142
143static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
144 struct amdgpu_vm *vm,
145 struct list_head *list)
146{
147 struct ttm_validate_buffer *entry;
148
149 list_for_each_entry(entry, list, head) {
150 struct amdgpu_bo *bo =
151 container_of(entry->bo, struct amdgpu_bo, tbo);
152 if (amdgpu_gem_vm_check(NULL, bo))
153 return false;
154 }
155
156 return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
157}
158
159void amdgpu_gem_object_close(struct drm_gem_object *obj, 141void amdgpu_gem_object_close(struct drm_gem_object *obj,
160 struct drm_file *file_priv) 142 struct drm_file *file_priv)
161{ 143{
@@ -165,13 +147,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
165 struct amdgpu_vm *vm = &fpriv->vm; 147 struct amdgpu_vm *vm = &fpriv->vm;
166 148
167 struct amdgpu_bo_list_entry vm_pd; 149 struct amdgpu_bo_list_entry vm_pd;
168 struct list_head list; 150 struct list_head list, duplicates;
169 struct ttm_validate_buffer tv; 151 struct ttm_validate_buffer tv;
170 struct ww_acquire_ctx ticket; 152 struct ww_acquire_ctx ticket;
171 struct amdgpu_bo_va *bo_va; 153 struct amdgpu_bo_va *bo_va;
172 int r; 154 int r;
173 155
174 INIT_LIST_HEAD(&list); 156 INIT_LIST_HEAD(&list);
157 INIT_LIST_HEAD(&duplicates);
175 158
176 tv.bo = &bo->tbo; 159 tv.bo = &bo->tbo;
177 tv.shared = true; 160 tv.shared = true;
@@ -179,7 +162,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
179 162
180 amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); 163 amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
181 164
182 r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); 165 r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
183 if (r) { 166 if (r) {
184 dev_err(adev->dev, "leaking bo va because " 167 dev_err(adev->dev, "leaking bo va because "
185 "we fail to reserve bo (%d)\n", r); 168 "we fail to reserve bo (%d)\n", r);
@@ -189,7 +172,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
189 if (bo_va && --bo_va->ref_count == 0) { 172 if (bo_va && --bo_va->ref_count == 0) {
190 amdgpu_vm_bo_rmv(adev, bo_va); 173 amdgpu_vm_bo_rmv(adev, bo_va);
191 174
192 if (amdgpu_gem_vm_ready(adev, vm, &list)) { 175 if (amdgpu_vm_ready(vm)) {
193 struct dma_fence *fence = NULL; 176 struct dma_fence *fence = NULL;
194 177
195 r = amdgpu_vm_clear_freed(adev, vm, &fence); 178 r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -214,20 +197,22 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
214 struct drm_file *filp) 197 struct drm_file *filp)
215{ 198{
216 struct amdgpu_device *adev = dev->dev_private; 199 struct amdgpu_device *adev = dev->dev_private;
200 struct amdgpu_fpriv *fpriv = filp->driver_priv;
201 struct amdgpu_vm *vm = &fpriv->vm;
217 union drm_amdgpu_gem_create *args = data; 202 union drm_amdgpu_gem_create *args = data;
203 uint64_t flags = args->in.domain_flags;
218 uint64_t size = args->in.bo_size; 204 uint64_t size = args->in.bo_size;
205 struct reservation_object *resv = NULL;
219 struct drm_gem_object *gobj; 206 struct drm_gem_object *gobj;
220 uint32_t handle; 207 uint32_t handle;
221 bool kernel = false;
222 int r; 208 int r;
223 209
224 /* reject invalid gem flags */ 210 /* reject invalid gem flags */
225 if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 211 if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
226 AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 212 AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
227 AMDGPU_GEM_CREATE_CPU_GTT_USWC | 213 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
228 AMDGPU_GEM_CREATE_VRAM_CLEARED| 214 AMDGPU_GEM_CREATE_VRAM_CLEARED |
229 AMDGPU_GEM_CREATE_SHADOW | 215 AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
230 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
231 return -EINVAL; 216 return -EINVAL;
232 217
233 /* reject invalid gem domains */ 218 /* reject invalid gem domains */
@@ -242,7 +227,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
242 /* create a gem object to contain this object in */ 227 /* create a gem object to contain this object in */
243 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | 228 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
244 AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { 229 AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
245 kernel = true; 230 flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
246 if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) 231 if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
247 size = size << AMDGPU_GDS_SHIFT; 232 size = size << AMDGPU_GDS_SHIFT;
248 else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) 233 else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
@@ -254,10 +239,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
254 } 239 }
255 size = roundup(size, PAGE_SIZE); 240 size = roundup(size, PAGE_SIZE);
256 241
242 if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
243 r = amdgpu_bo_reserve(vm->root.base.bo, false);
244 if (r)
245 return r;
246
247 resv = vm->root.base.bo->tbo.resv;
248 }
249
257 r = amdgpu_gem_object_create(adev, size, args->in.alignment, 250 r = amdgpu_gem_object_create(adev, size, args->in.alignment,
258 (u32)(0xffffffff & args->in.domains), 251 (u32)(0xffffffff & args->in.domains),
259 args->in.domain_flags, 252 flags, false, resv, &gobj);
260 kernel, &gobj); 253 if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
254 if (!r) {
255 struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
256
257 abo->parent = amdgpu_bo_ref(vm->root.base.bo);
258 }
259 amdgpu_bo_unreserve(vm->root.base.bo);
260 }
261 if (r) 261 if (r)
262 return r; 262 return r;
263 263
@@ -299,9 +299,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
299 } 299 }
300 300
301 /* create a gem object to contain this object in */ 301 /* create a gem object to contain this object in */
302 r = amdgpu_gem_object_create(adev, args->size, 0, 302 r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
303 AMDGPU_GEM_DOMAIN_CPU, 0, 303 0, 0, NULL, &gobj);
304 0, &gobj);
305 if (r) 304 if (r)
306 return r; 305 return r;
307 306
@@ -319,8 +318,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
319 } 318 }
320 319
321 if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { 320 if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
322 down_read(&current->mm->mmap_sem);
323
324 r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, 321 r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
325 bo->tbo.ttm->pages); 322 bo->tbo.ttm->pages);
326 if (r) 323 if (r)
@@ -335,8 +332,6 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
335 amdgpu_bo_unreserve(bo); 332 amdgpu_bo_unreserve(bo);
336 if (r) 333 if (r)
337 goto free_pages; 334 goto free_pages;
338
339 up_read(&current->mm->mmap_sem);
340 } 335 }
341 336
342 r = drm_gem_handle_create(filp, gobj, &handle); 337 r = drm_gem_handle_create(filp, gobj, &handle);
@@ -513,10 +508,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
513 struct list_head *list, 508 struct list_head *list,
514 uint32_t operation) 509 uint32_t operation)
515{ 510{
516 int r = -ERESTARTSYS; 511 int r;
517 512
518 if (!amdgpu_gem_vm_ready(adev, vm, list)) 513 if (!amdgpu_vm_ready(vm))
519 goto error; 514 return;
520 515
521 r = amdgpu_vm_update_directories(adev, vm); 516 r = amdgpu_vm_update_directories(adev, vm);
522 if (r) 517 if (r)
@@ -553,7 +548,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
553 struct amdgpu_bo_list_entry vm_pd; 548 struct amdgpu_bo_list_entry vm_pd;
554 struct ttm_validate_buffer tv; 549 struct ttm_validate_buffer tv;
555 struct ww_acquire_ctx ticket; 550 struct ww_acquire_ctx ticket;
556 struct list_head list; 551 struct list_head list, duplicates;
557 uint64_t va_flags; 552 uint64_t va_flags;
558 int r = 0; 553 int r = 0;
559 554
@@ -589,6 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
589 } 584 }
590 585
591 INIT_LIST_HEAD(&list); 586 INIT_LIST_HEAD(&list);
587 INIT_LIST_HEAD(&duplicates);
592 if ((args->operation != AMDGPU_VA_OP_CLEAR) && 588 if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
593 !(args->flags & AMDGPU_VM_PAGE_PRT)) { 589 !(args->flags & AMDGPU_VM_PAGE_PRT)) {
594 gobj = drm_gem_object_lookup(filp, args->handle); 590 gobj = drm_gem_object_lookup(filp, args->handle);
@@ -605,7 +601,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
605 601
606 amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); 602 amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
607 603
608 r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); 604 r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
609 if (r) 605 if (r)
610 goto error_unref; 606 goto error_unref;
611 607
@@ -623,7 +619,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
623 619
624 switch (args->operation) { 620 switch (args->operation) {
625 case AMDGPU_VA_OP_MAP: 621 case AMDGPU_VA_OP_MAP:
626 r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, 622 r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
627 args->map_size); 623 args->map_size);
628 if (r) 624 if (r)
629 goto error_backoff; 625 goto error_backoff;
@@ -643,7 +639,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
643 args->map_size); 639 args->map_size);
644 break; 640 break;
645 case AMDGPU_VA_OP_REPLACE: 641 case AMDGPU_VA_OP_REPLACE:
646 r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, 642 r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
647 args->map_size); 643 args->map_size);
648 if (r) 644 if (r)
649 goto error_backoff; 645 goto error_backoff;
@@ -671,6 +667,7 @@ error_unref:
671int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, 667int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
672 struct drm_file *filp) 668 struct drm_file *filp)
673{ 669{
670 struct amdgpu_device *adev = dev->dev_private;
674 struct drm_amdgpu_gem_op *args = data; 671 struct drm_amdgpu_gem_op *args = data;
675 struct drm_gem_object *gobj; 672 struct drm_gem_object *gobj;
676 struct amdgpu_bo *robj; 673 struct amdgpu_bo *robj;
@@ -718,6 +715,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
718 if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) 715 if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
719 robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; 716 robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
720 717
718 if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
719 amdgpu_vm_bo_invalidate(adev, robj, true);
720
721 amdgpu_bo_unreserve(robj); 721 amdgpu_bo_unreserve(robj);
722 break; 722 break;
723 default: 723 default:
@@ -747,8 +747,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
747 r = amdgpu_gem_object_create(adev, args->size, 0, 747 r = amdgpu_gem_object_create(adev, args->size, 0,
748 AMDGPU_GEM_DOMAIN_VRAM, 748 AMDGPU_GEM_DOMAIN_VRAM,
749 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 749 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
750 ttm_bo_type_device, 750 false, NULL, &gobj);
751 &gobj);
752 if (r) 751 if (r)
753 return -ENOMEM; 752 return -ENOMEM;
754 753
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 4f6c68fc1dd9..4fcd98e65998 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -260,8 +260,13 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
260 /* create MQD for KIQ */ 260 /* create MQD for KIQ */
261 ring = &adev->gfx.kiq.ring; 261 ring = &adev->gfx.kiq.ring;
262 if (!ring->mqd_obj) { 262 if (!ring->mqd_obj) {
263 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
264 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
265 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
266 * KIQ MQD no matter SRIOV or Bare-metal
267 */
263 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 268 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
264 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 269 AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
265 &ring->mqd_gpu_addr, &ring->mqd_ptr); 270 &ring->mqd_gpu_addr, &ring->mqd_ptr);
266 if (r) { 271 if (r) {
267 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 272 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 5e6b90c6794f..0d15eb7d31d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -28,7 +28,7 @@
28struct amdgpu_gtt_mgr { 28struct amdgpu_gtt_mgr {
29 struct drm_mm mm; 29 struct drm_mm mm;
30 spinlock_t lock; 30 spinlock_t lock;
31 uint64_t available; 31 atomic64_t available;
32}; 32};
33 33
34/** 34/**
@@ -54,7 +54,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
54 size = (adev->mc.gart_size >> PAGE_SHIFT) - start; 54 size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
55 drm_mm_init(&mgr->mm, start, size); 55 drm_mm_init(&mgr->mm, start, size);
56 spin_lock_init(&mgr->lock); 56 spin_lock_init(&mgr->lock);
57 mgr->available = p_size; 57 atomic64_set(&mgr->available, p_size);
58 man->priv = mgr; 58 man->priv = mgr;
59 return 0; 59 return 0;
60} 60}
@@ -108,10 +108,10 @@ bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem)
108 * 108 *
109 * Allocate the address space for a node. 109 * Allocate the address space for a node.
110 */ 110 */
111int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, 111static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
112 struct ttm_buffer_object *tbo, 112 struct ttm_buffer_object *tbo,
113 const struct ttm_place *place, 113 const struct ttm_place *place,
114 struct ttm_mem_reg *mem) 114 struct ttm_mem_reg *mem)
115{ 115{
116 struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); 116 struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
117 struct amdgpu_gtt_mgr *mgr = man->priv; 117 struct amdgpu_gtt_mgr *mgr = man->priv;
@@ -143,25 +143,12 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
143 fpfn, lpfn, mode); 143 fpfn, lpfn, mode);
144 spin_unlock(&mgr->lock); 144 spin_unlock(&mgr->lock);
145 145
146 if (!r) { 146 if (!r)
147 mem->start = node->start; 147 mem->start = node->start;
148 if (&tbo->mem == mem)
149 tbo->offset = (tbo->mem.start << PAGE_SHIFT) +
150 tbo->bdev->man[tbo->mem.mem_type].gpu_offset;
151 }
152 148
153 return r; 149 return r;
154} 150}
155 151
156void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man)
157{
158 struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
159 struct amdgpu_gtt_mgr *mgr = man->priv;
160
161 seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
162 man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20);
163
164}
165/** 152/**
166 * amdgpu_gtt_mgr_new - allocate a new node 153 * amdgpu_gtt_mgr_new - allocate a new node
167 * 154 *
@@ -182,11 +169,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
182 int r; 169 int r;
183 170
184 spin_lock(&mgr->lock); 171 spin_lock(&mgr->lock);
185 if (mgr->available < mem->num_pages) { 172 if (atomic64_read(&mgr->available) < mem->num_pages) {
186 spin_unlock(&mgr->lock); 173 spin_unlock(&mgr->lock);
187 return 0; 174 return 0;
188 } 175 }
189 mgr->available -= mem->num_pages; 176 atomic64_sub(mem->num_pages, &mgr->available);
190 spin_unlock(&mgr->lock); 177 spin_unlock(&mgr->lock);
191 178
192 node = kzalloc(sizeof(*node), GFP_KERNEL); 179 node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -213,9 +200,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
213 200
214 return 0; 201 return 0;
215err_out: 202err_out:
216 spin_lock(&mgr->lock); 203 atomic64_add(mem->num_pages, &mgr->available);
217 mgr->available += mem->num_pages;
218 spin_unlock(&mgr->lock);
219 204
220 return r; 205 return r;
221} 206}
@@ -242,30 +227,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man,
242 spin_lock(&mgr->lock); 227 spin_lock(&mgr->lock);
243 if (node->start != AMDGPU_BO_INVALID_OFFSET) 228 if (node->start != AMDGPU_BO_INVALID_OFFSET)
244 drm_mm_remove_node(node); 229 drm_mm_remove_node(node);
245 mgr->available += mem->num_pages;
246 spin_unlock(&mgr->lock); 230 spin_unlock(&mgr->lock);
231 atomic64_add(mem->num_pages, &mgr->available);
247 232
248 kfree(node); 233 kfree(node);
249 mem->mm_node = NULL; 234 mem->mm_node = NULL;
250} 235}
251 236
252/** 237/**
238 * amdgpu_gtt_mgr_usage - return usage of GTT domain
239 *
240 * @man: TTM memory type manager
241 *
242 * Return how many bytes are used in the GTT domain
243 */
244uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man)
245{
246 struct amdgpu_gtt_mgr *mgr = man->priv;
247
248 return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE;
249}
250
251/**
253 * amdgpu_gtt_mgr_debug - dump VRAM table 252 * amdgpu_gtt_mgr_debug - dump VRAM table
254 * 253 *
255 * @man: TTM memory type manager 254 * @man: TTM memory type manager
256 * @prefix: text prefix 255 * @printer: DRM printer to use
257 * 256 *
258 * Dump the table content using printk. 257 * Dump the table content using printk.
259 */ 258 */
260static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, 259static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man,
261 const char *prefix) 260 struct drm_printer *printer)
262{ 261{
263 struct amdgpu_gtt_mgr *mgr = man->priv; 262 struct amdgpu_gtt_mgr *mgr = man->priv;
264 struct drm_printer p = drm_debug_printer(prefix);
265 263
266 spin_lock(&mgr->lock); 264 spin_lock(&mgr->lock);
267 drm_mm_print(&mgr->mm, &p); 265 drm_mm_print(&mgr->mm, printer);
268 spin_unlock(&mgr->lock); 266 spin_unlock(&mgr->lock);
267
268 drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n",
269 man->size, (u64)atomic64_read(&mgr->available),
270 amdgpu_gtt_mgr_usage(man) >> 20);
269} 271}
270 272
271const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { 273const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 3ab4c65ecc8b..f5f27e4f0f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -169,6 +169,12 @@ restart_ih:
169 while (adev->irq.ih.rptr != wptr) { 169 while (adev->irq.ih.rptr != wptr) {
170 u32 ring_index = adev->irq.ih.rptr >> 2; 170 u32 ring_index = adev->irq.ih.rptr >> 2;
171 171
172 /* Prescreening of high-frequency interrupts */
173 if (!amdgpu_ih_prescreen_iv(adev)) {
174 adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
175 continue;
176 }
177
172 /* Before dispatching irq to IP blocks, send it to amdkfd */ 178 /* Before dispatching irq to IP blocks, send it to amdkfd */
173 amdgpu_amdkfd_interrupt(adev, 179 amdgpu_amdkfd_interrupt(adev,
174 (const void *) &adev->irq.ih.ring[ring_index]); 180 (const void *) &adev->irq.ih.ring[ring_index]);
@@ -190,3 +196,79 @@ restart_ih:
190 196
191 return IRQ_HANDLED; 197 return IRQ_HANDLED;
192} 198}
199
200/**
201 * amdgpu_ih_add_fault - Add a page fault record
202 *
203 * @adev: amdgpu device pointer
204 * @key: 64-bit encoding of PASID and address
205 *
206 * This should be called when a retry page fault interrupt is
207 * received. If this is a new page fault, it will be added to a hash
208 * table. The return value indicates whether this is a new fault, or
209 * a fault that was already known and is already being handled.
210 *
211 * If there are too many pending page faults, this will fail. Retry
212 * interrupts should be ignored in this case until there is enough
213 * free space.
214 *
215 * Returns 0 if the fault was added, 1 if the fault was already known,
216 * -ENOSPC if there are too many pending faults.
217 */
218int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
219{
220 unsigned long flags;
221 int r = -ENOSPC;
222
223 if (WARN_ON_ONCE(!adev->irq.ih.faults))
224 /* Should be allocated in <IP>_ih_sw_init on GPUs that
225 * support retry faults and require retry filtering.
226 */
227 return r;
228
229 spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
230
231 /* Only let the hash table fill up to 50% for best performance */
232 if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
233 goto unlock_out;
234
235 r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
236 if (!r)
237 adev->irq.ih.faults->count++;
238
239 /* chash_table_copy_in should never fail unless we're losing count */
240 WARN_ON_ONCE(r < 0);
241
242unlock_out:
243 spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
244 return r;
245}
246
247/**
248 * amdgpu_ih_clear_fault - Remove a page fault record
249 *
250 * @adev: amdgpu device pointer
251 * @key: 64-bit encoding of PASID and address
252 *
253 * This should be called when a page fault has been handled. Any
254 * future interrupt with this key will be processed as a new
255 * page fault.
256 */
257void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
258{
259 unsigned long flags;
260 int r;
261
262 if (!adev->irq.ih.faults)
263 return;
264
265 spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
266
267 r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
268 if (!WARN_ON_ONCE(r < 0)) {
269 adev->irq.ih.faults->count--;
270 WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
271 }
272
273 spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
274}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 3de8e74e5b3a..ada89358e220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -24,6 +24,8 @@
24#ifndef __AMDGPU_IH_H__ 24#ifndef __AMDGPU_IH_H__
25#define __AMDGPU_IH_H__ 25#define __AMDGPU_IH_H__
26 26
27#include <linux/chash.h>
28
27struct amdgpu_device; 29struct amdgpu_device;
28 /* 30 /*
29 * vega10+ IH clients 31 * vega10+ IH clients
@@ -69,6 +71,13 @@ enum amdgpu_ih_clientid
69 71
70#define AMDGPU_IH_CLIENTID_LEGACY 0 72#define AMDGPU_IH_CLIENTID_LEGACY 0
71 73
74#define AMDGPU_PAGEFAULT_HASH_BITS 8
75struct amdgpu_retryfault_hashtable {
76 DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
77 spinlock_t lock;
78 int count;
79};
80
72/* 81/*
73 * R6xx+ IH ring 82 * R6xx+ IH ring
74 */ 83 */
@@ -87,6 +96,7 @@ struct amdgpu_ih_ring {
87 bool use_doorbell; 96 bool use_doorbell;
88 bool use_bus_addr; 97 bool use_bus_addr;
89 dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ 98 dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
99 struct amdgpu_retryfault_hashtable *faults;
90}; 100};
91 101
92#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 102#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
@@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
109 bool use_bus_addr); 119 bool use_bus_addr);
110void amdgpu_ih_ring_fini(struct amdgpu_device *adev); 120void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
111int amdgpu_ih_process(struct amdgpu_device *adev); 121int amdgpu_ih_process(struct amdgpu_device *adev);
122int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
123void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
112 124
113#endif 125#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 4bdd851f56d0..538e5f27d120 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -221,8 +221,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
221 221
222 spin_lock_init(&adev->irq.lock); 222 spin_lock_init(&adev->irq.lock);
223 223
224 /* Disable vblank irqs aggressively for power-saving */ 224 if (!adev->enable_virtual_display)
225 adev->ddev->vblank_disable_immediate = true; 225 /* Disable vblank irqs aggressively for power-saving */
226 adev->ddev->vblank_disable_immediate = true;
226 227
227 r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc); 228 r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc);
228 if (r) { 229 if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index c908f972283c..4fd06f8d9768 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -455,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
455 ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); 455 ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
456 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 456 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
457 case AMDGPU_INFO_VRAM_USAGE: 457 case AMDGPU_INFO_VRAM_USAGE:
458 ui64 = atomic64_read(&adev->vram_usage); 458 ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
459 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 459 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
460 case AMDGPU_INFO_VIS_VRAM_USAGE: 460 case AMDGPU_INFO_VIS_VRAM_USAGE:
461 ui64 = atomic64_read(&adev->vram_vis_usage); 461 ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
462 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 462 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
463 case AMDGPU_INFO_GTT_USAGE: 463 case AMDGPU_INFO_GTT_USAGE:
464 ui64 = atomic64_read(&adev->gtt_usage); 464 ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
465 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; 465 return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
466 case AMDGPU_INFO_GDS_CONFIG: { 466 case AMDGPU_INFO_GDS_CONFIG: {
467 struct drm_amdgpu_info_gds gds_info; 467 struct drm_amdgpu_info_gds gds_info;
@@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
497 mem.vram.total_heap_size = adev->mc.real_vram_size; 497 mem.vram.total_heap_size = adev->mc.real_vram_size;
498 mem.vram.usable_heap_size = 498 mem.vram.usable_heap_size =
499 adev->mc.real_vram_size - adev->vram_pin_size; 499 adev->mc.real_vram_size - adev->vram_pin_size;
500 mem.vram.heap_usage = atomic64_read(&adev->vram_usage); 500 mem.vram.heap_usage =
501 amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
501 mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; 502 mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
502 503
503 mem.cpu_accessible_vram.total_heap_size = 504 mem.cpu_accessible_vram.total_heap_size =
@@ -506,7 +507,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
506 adev->mc.visible_vram_size - 507 adev->mc.visible_vram_size -
507 (adev->vram_pin_size - adev->invisible_pin_size); 508 (adev->vram_pin_size - adev->invisible_pin_size);
508 mem.cpu_accessible_vram.heap_usage = 509 mem.cpu_accessible_vram.heap_usage =
509 atomic64_read(&adev->vram_vis_usage); 510 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
510 mem.cpu_accessible_vram.max_allocation = 511 mem.cpu_accessible_vram.max_allocation =
511 mem.cpu_accessible_vram.usable_heap_size * 3 / 4; 512 mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
512 513
@@ -514,7 +515,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
514 mem.gtt.total_heap_size *= PAGE_SIZE; 515 mem.gtt.total_heap_size *= PAGE_SIZE;
515 mem.gtt.usable_heap_size = mem.gtt.total_heap_size 516 mem.gtt.usable_heap_size = mem.gtt.total_heap_size
516 - adev->gart_pin_size; 517 - adev->gart_pin_size;
517 mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); 518 mem.gtt.heap_usage =
519 amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
518 mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; 520 mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
519 521
520 return copy_to_user(out, &mem, 522 return copy_to_user(out, &mem,
@@ -588,11 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
588 dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; 590 dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
589 dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; 591 dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
590 dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); 592 dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
591 dev_info.pte_fragment_size = 593 dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
592 (1 << AMDGPU_LOG2_PAGES_PER_FRAG(adev)) *
593 AMDGPU_GPU_PAGE_SIZE;
594 dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; 594 dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE;
595
596 dev_info.cu_active_number = adev->gfx.cu_info.number; 595 dev_info.cu_active_number = adev->gfx.cu_info.number;
597 dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; 596 dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
598 dev_info.ce_ram_size = adev->gfx.ce_ram_size; 597 dev_info.ce_ram_size = adev->gfx.ce_ram_size;
@@ -826,7 +825,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
826 } 825 }
827 826
828 r = amdgpu_vm_init(adev, &fpriv->vm, 827 r = amdgpu_vm_init(adev, &fpriv->vm,
829 AMDGPU_VM_CONTEXT_GFX); 828 AMDGPU_VM_CONTEXT_GFX, 0);
830 if (r) { 829 if (r) {
831 kfree(fpriv); 830 kfree(fpriv);
832 goto out_suspend; 831 goto out_suspend;
@@ -841,9 +840,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
841 } 840 }
842 841
843 if (amdgpu_sriov_vf(adev)) { 842 if (amdgpu_sriov_vf(adev)) {
844 r = amdgpu_map_static_csa(adev, &fpriv->vm); 843 r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
845 if (r) 844 if (r) {
845 amdgpu_vm_fini(adev, &fpriv->vm);
846 kfree(fpriv);
846 goto out_suspend; 847 goto out_suspend;
848 }
847 } 849 }
848 850
849 mutex_init(&fpriv->bo_list_lock); 851 mutex_init(&fpriv->bo_list_lock);
@@ -894,8 +896,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
894 if (amdgpu_sriov_vf(adev)) { 896 if (amdgpu_sriov_vf(adev)) {
895 /* TODO: how to handle reserve failure */ 897 /* TODO: how to handle reserve failure */
896 BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); 898 BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
897 amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); 899 amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
898 fpriv->vm.csa_bo_va = NULL; 900 fpriv->csa_va = NULL;
899 amdgpu_bo_unreserve(adev->virt.csa_obj); 901 amdgpu_bo_unreserve(adev->virt.csa_obj);
900 } 902 }
901 903
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 6558a3ed57a7..bd67f4cb8e6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -50,8 +50,10 @@ struct amdgpu_mn {
50 struct hlist_node node; 50 struct hlist_node node;
51 51
52 /* objects protected by lock */ 52 /* objects protected by lock */
53 struct mutex lock; 53 struct rw_semaphore lock;
54 struct rb_root objects; 54 struct rb_root_cached objects;
55 struct mutex read_lock;
56 atomic_t recursion;
55}; 57};
56 58
57struct amdgpu_mn_node { 59struct amdgpu_mn_node {
@@ -74,17 +76,17 @@ static void amdgpu_mn_destroy(struct work_struct *work)
74 struct amdgpu_bo *bo, *next_bo; 76 struct amdgpu_bo *bo, *next_bo;
75 77
76 mutex_lock(&adev->mn_lock); 78 mutex_lock(&adev->mn_lock);
77 mutex_lock(&rmn->lock); 79 down_write(&rmn->lock);
78 hash_del(&rmn->node); 80 hash_del(&rmn->node);
79 rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, 81 rbtree_postorder_for_each_entry_safe(node, next_node,
80 it.rb) { 82 &rmn->objects.rb_root, it.rb) {
81 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { 83 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
82 bo->mn = NULL; 84 bo->mn = NULL;
83 list_del_init(&bo->mn_list); 85 list_del_init(&bo->mn_list);
84 } 86 }
85 kfree(node); 87 kfree(node);
86 } 88 }
87 mutex_unlock(&rmn->lock); 89 up_write(&rmn->lock);
88 mutex_unlock(&adev->mn_lock); 90 mutex_unlock(&adev->mn_lock);
89 mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); 91 mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
90 kfree(rmn); 92 kfree(rmn);
@@ -106,6 +108,53 @@ static void amdgpu_mn_release(struct mmu_notifier *mn,
106 schedule_work(&rmn->work); 108 schedule_work(&rmn->work);
107} 109}
108 110
111
112/**
113 * amdgpu_mn_lock - take the write side lock for this mn
114 */
115void amdgpu_mn_lock(struct amdgpu_mn *mn)
116{
117 if (mn)
118 down_write(&mn->lock);
119}
120
121/**
122 * amdgpu_mn_unlock - drop the write side lock for this mn
123 */
124void amdgpu_mn_unlock(struct amdgpu_mn *mn)
125{
126 if (mn)
127 up_write(&mn->lock);
128}
129
130/**
131 * amdgpu_mn_read_lock - take the rmn read lock
132 *
133 * @rmn: our notifier
134 *
135 * Take the rmn read side lock.
136 */
137static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
138{
139 mutex_lock(&rmn->read_lock);
140 if (atomic_inc_return(&rmn->recursion) == 1)
141 down_read_non_owner(&rmn->lock);
142 mutex_unlock(&rmn->read_lock);
143}
144
145/**
146 * amdgpu_mn_read_unlock - drop the rmn read lock
147 *
148 * @rmn: our notifier
149 *
150 * Drop the rmn read side lock.
151 */
152static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
153{
154 if (atomic_dec_return(&rmn->recursion) == 0)
155 up_read_non_owner(&rmn->lock);
156}
157
109/** 158/**
110 * amdgpu_mn_invalidate_node - unmap all BOs of a node 159 * amdgpu_mn_invalidate_node - unmap all BOs of a node
111 * 160 *
@@ -126,54 +175,13 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
126 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) 175 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
127 continue; 176 continue;
128 177
129 r = amdgpu_bo_reserve(bo, true);
130 if (r) {
131 DRM_ERROR("(%ld) failed to reserve user bo\n", r);
132 continue;
133 }
134
135 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, 178 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
136 true, false, MAX_SCHEDULE_TIMEOUT); 179 true, false, MAX_SCHEDULE_TIMEOUT);
137 if (r <= 0) 180 if (r <= 0)
138 DRM_ERROR("(%ld) failed to wait for user bo\n", r); 181 DRM_ERROR("(%ld) failed to wait for user bo\n", r);
139 182
140 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); 183 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
141 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
142 if (r)
143 DRM_ERROR("(%ld) failed to validate user bo\n", r);
144
145 amdgpu_bo_unreserve(bo);
146 }
147}
148
149/**
150 * amdgpu_mn_invalidate_page - callback to notify about mm change
151 *
152 * @mn: our notifier
153 * @mn: the mm this callback is about
154 * @address: address of invalidate page
155 *
156 * Invalidation of a single page. Blocks for all BOs mapping it
157 * and unmap them by move them into system domain again.
158 */
159static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
160 struct mm_struct *mm,
161 unsigned long address)
162{
163 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
164 struct interval_tree_node *it;
165
166 mutex_lock(&rmn->lock);
167
168 it = interval_tree_iter_first(&rmn->objects, address, address);
169 if (it) {
170 struct amdgpu_mn_node *node;
171
172 node = container_of(it, struct amdgpu_mn_node, it);
173 amdgpu_mn_invalidate_node(node, address, address);
174 } 184 }
175
176 mutex_unlock(&rmn->lock);
177} 185}
178 186
179/** 187/**
@@ -198,7 +206,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
198 /* notification is exclusive, but interval is inclusive */ 206 /* notification is exclusive, but interval is inclusive */
199 end -= 1; 207 end -= 1;
200 208
201 mutex_lock(&rmn->lock); 209 amdgpu_mn_read_lock(rmn);
202 210
203 it = interval_tree_iter_first(&rmn->objects, start, end); 211 it = interval_tree_iter_first(&rmn->objects, start, end);
204 while (it) { 212 while (it) {
@@ -209,14 +217,32 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
209 217
210 amdgpu_mn_invalidate_node(node, start, end); 218 amdgpu_mn_invalidate_node(node, start, end);
211 } 219 }
220}
212 221
213 mutex_unlock(&rmn->lock); 222/**
223 * amdgpu_mn_invalidate_range_end - callback to notify about mm change
224 *
225 * @mn: our notifier
226 * @mn: the mm this callback is about
227 * @start: start of updated range
228 * @end: end of updated range
229 *
230 * Release the lock again to allow new command submissions.
231 */
232static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
233 struct mm_struct *mm,
234 unsigned long start,
235 unsigned long end)
236{
237 struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
238
239 amdgpu_mn_read_unlock(rmn);
214} 240}
215 241
216static const struct mmu_notifier_ops amdgpu_mn_ops = { 242static const struct mmu_notifier_ops amdgpu_mn_ops = {
217 .release = amdgpu_mn_release, 243 .release = amdgpu_mn_release,
218 .invalidate_page = amdgpu_mn_invalidate_page,
219 .invalidate_range_start = amdgpu_mn_invalidate_range_start, 244 .invalidate_range_start = amdgpu_mn_invalidate_range_start,
245 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
220}; 246};
221 247
222/** 248/**
@@ -226,7 +252,7 @@ static const struct mmu_notifier_ops amdgpu_mn_ops = {
226 * 252 *
227 * Creates a notifier context for current->mm. 253 * Creates a notifier context for current->mm.
228 */ 254 */
229static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) 255struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
230{ 256{
231 struct mm_struct *mm = current->mm; 257 struct mm_struct *mm = current->mm;
232 struct amdgpu_mn *rmn; 258 struct amdgpu_mn *rmn;
@@ -251,8 +277,10 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
251 rmn->adev = adev; 277 rmn->adev = adev;
252 rmn->mm = mm; 278 rmn->mm = mm;
253 rmn->mn.ops = &amdgpu_mn_ops; 279 rmn->mn.ops = &amdgpu_mn_ops;
254 mutex_init(&rmn->lock); 280 init_rwsem(&rmn->lock);
255 rmn->objects = RB_ROOT; 281 rmn->objects = RB_ROOT_CACHED;
282 mutex_init(&rmn->read_lock);
283 atomic_set(&rmn->recursion, 0);
256 284
257 r = __mmu_notifier_register(&rmn->mn, mm); 285 r = __mmu_notifier_register(&rmn->mn, mm);
258 if (r) 286 if (r)
@@ -298,7 +326,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
298 326
299 INIT_LIST_HEAD(&bos); 327 INIT_LIST_HEAD(&bos);
300 328
301 mutex_lock(&rmn->lock); 329 down_write(&rmn->lock);
302 330
303 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { 331 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
304 kfree(node); 332 kfree(node);
@@ -312,7 +340,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
312 if (!node) { 340 if (!node) {
313 node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); 341 node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
314 if (!node) { 342 if (!node) {
315 mutex_unlock(&rmn->lock); 343 up_write(&rmn->lock);
316 return -ENOMEM; 344 return -ENOMEM;
317 } 345 }
318 } 346 }
@@ -327,7 +355,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
327 355
328 interval_tree_insert(&node->it, &rmn->objects); 356 interval_tree_insert(&node->it, &rmn->objects);
329 357
330 mutex_unlock(&rmn->lock); 358 up_write(&rmn->lock);
331 359
332 return 0; 360 return 0;
333} 361}
@@ -353,7 +381,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
353 return; 381 return;
354 } 382 }
355 383
356 mutex_lock(&rmn->lock); 384 down_write(&rmn->lock);
357 385
358 /* save the next list entry for later */ 386 /* save the next list entry for later */
359 head = bo->mn_list.next; 387 head = bo->mn_list.next;
@@ -368,6 +396,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
368 kfree(node); 396 kfree(node);
369 } 397 }
370 398
371 mutex_unlock(&rmn->lock); 399 up_write(&rmn->lock);
372 mutex_unlock(&adev->mn_lock); 400 mutex_unlock(&adev->mn_lock);
373} 401}
402
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
new file mode 100644
index 000000000000..d0095a3793b8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christian König
23 */
24#ifndef __AMDGPU_MN_H__
25#define __AMDGPU_MN_H__
26
27/*
28 * MMU Notifier
29 */
30struct amdgpu_mn;
31
32#if defined(CONFIG_MMU_NOTIFIER)
33void amdgpu_mn_lock(struct amdgpu_mn *mn);
34void amdgpu_mn_unlock(struct amdgpu_mn *mn);
35struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
36int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
37void amdgpu_mn_unregister(struct amdgpu_bo *bo);
38#else
39static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
40static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
41static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
42{
43 return NULL;
44}
45static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
46{
47 return -ENODEV;
48}
49static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
50#endif
51
52#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6e72fe7901ec..6982baeccd14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -37,55 +37,6 @@
37#include "amdgpu.h" 37#include "amdgpu.h"
38#include "amdgpu_trace.h" 38#include "amdgpu_trace.h"
39 39
40
41
42static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
43 struct ttm_mem_reg *mem)
44{
45 if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
46 return 0;
47
48 return ((mem->start << PAGE_SHIFT) + mem->size) >
49 adev->mc.visible_vram_size ?
50 adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
51 mem->size;
52}
53
54static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
55 struct ttm_mem_reg *old_mem,
56 struct ttm_mem_reg *new_mem)
57{
58 u64 vis_size;
59 if (!adev)
60 return;
61
62 if (new_mem) {
63 switch (new_mem->mem_type) {
64 case TTM_PL_TT:
65 atomic64_add(new_mem->size, &adev->gtt_usage);
66 break;
67 case TTM_PL_VRAM:
68 atomic64_add(new_mem->size, &adev->vram_usage);
69 vis_size = amdgpu_get_vis_part_size(adev, new_mem);
70 atomic64_add(vis_size, &adev->vram_vis_usage);
71 break;
72 }
73 }
74
75 if (old_mem) {
76 switch (old_mem->mem_type) {
77 case TTM_PL_TT:
78 atomic64_sub(old_mem->size, &adev->gtt_usage);
79 break;
80 case TTM_PL_VRAM:
81 atomic64_sub(old_mem->size, &adev->vram_usage);
82 vis_size = amdgpu_get_vis_part_size(adev, old_mem);
83 atomic64_sub(vis_size, &adev->vram_vis_usage);
84 break;
85 }
86 }
87}
88
89static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) 40static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
90{ 41{
91 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 42 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
@@ -94,7 +45,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
94 bo = container_of(tbo, struct amdgpu_bo, tbo); 45 bo = container_of(tbo, struct amdgpu_bo, tbo);
95 46
96 amdgpu_bo_kunmap(bo); 47 amdgpu_bo_kunmap(bo);
97 amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
98 48
99 drm_gem_object_release(&bo->gem_base); 49 drm_gem_object_release(&bo->gem_base);
100 amdgpu_bo_unref(&bo->parent); 50 amdgpu_bo_unref(&bo->parent);
@@ -114,11 +64,12 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
114 return false; 64 return false;
115} 65}
116 66
117static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, 67void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
118 struct ttm_placement *placement,
119 struct ttm_place *places,
120 u32 domain, u64 flags)
121{ 68{
69 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
70 struct ttm_placement *placement = &abo->placement;
71 struct ttm_place *places = abo->placements;
72 u64 flags = abo->flags;
122 u32 c = 0; 73 u32 c = 0;
123 74
124 if (domain & AMDGPU_GEM_DOMAIN_VRAM) { 75 if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
@@ -141,7 +92,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
141 92
142 if (domain & AMDGPU_GEM_DOMAIN_GTT) { 93 if (domain & AMDGPU_GEM_DOMAIN_GTT) {
143 places[c].fpfn = 0; 94 places[c].fpfn = 0;
144 places[c].lpfn = 0; 95 if (flags & AMDGPU_GEM_CREATE_SHADOW)
96 places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
97 else
98 places[c].lpfn = 0;
145 places[c].flags = TTM_PL_FLAG_TT; 99 places[c].flags = TTM_PL_FLAG_TT;
146 if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) 100 if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
147 places[c].flags |= TTM_PL_FLAG_WC | 101 places[c].flags |= TTM_PL_FLAG_WC |
@@ -198,27 +152,6 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
198 placement->busy_placement = places; 152 placement->busy_placement = places;
199} 153}
200 154
201void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
202{
203 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
204
205 amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements,
206 domain, abo->flags);
207}
208
209static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
210 struct ttm_placement *placement)
211{
212 BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1));
213
214 memcpy(bo->placements, placement->placement,
215 placement->num_placement * sizeof(struct ttm_place));
216 bo->placement.num_placement = placement->num_placement;
217 bo->placement.num_busy_placement = placement->num_busy_placement;
218 bo->placement.placement = bo->placements;
219 bo->placement.busy_placement = bo->placements;
220}
221
222/** 155/**
223 * amdgpu_bo_create_reserved - create reserved BO for kernel use 156 * amdgpu_bo_create_reserved - create reserved BO for kernel use
224 * 157 *
@@ -350,14 +283,13 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
350 *cpu_addr = NULL; 283 *cpu_addr = NULL;
351} 284}
352 285
353int amdgpu_bo_create_restricted(struct amdgpu_device *adev, 286static int amdgpu_bo_do_create(struct amdgpu_device *adev,
354 unsigned long size, int byte_align, 287 unsigned long size, int byte_align,
355 bool kernel, u32 domain, u64 flags, 288 bool kernel, u32 domain, u64 flags,
356 struct sg_table *sg, 289 struct sg_table *sg,
357 struct ttm_placement *placement, 290 struct reservation_object *resv,
358 struct reservation_object *resv, 291 uint64_t init_value,
359 uint64_t init_value, 292 struct amdgpu_bo **bo_ptr)
360 struct amdgpu_bo **bo_ptr)
361{ 293{
362 struct amdgpu_bo *bo; 294 struct amdgpu_bo *bo;
363 enum ttm_bo_type type; 295 enum ttm_bo_type type;
@@ -431,10 +363,11 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
431 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; 363 bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
432#endif 364#endif
433 365
434 amdgpu_fill_placement_to_bo(bo, placement); 366 bo->tbo.bdev = &adev->mman.bdev;
435 /* Kernel allocation are uninterruptible */ 367 amdgpu_ttm_placement_from_domain(bo, domain);
436 368
437 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 369 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
370 /* Kernel allocation are uninterruptible */
438 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, 371 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
439 &bo->placement, page_align, !kernel, NULL, 372 &bo->placement, page_align, !kernel, NULL,
440 acc_size, sg, resv, &amdgpu_ttm_bo_destroy); 373 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
@@ -489,28 +422,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
489 unsigned long size, int byte_align, 422 unsigned long size, int byte_align,
490 struct amdgpu_bo *bo) 423 struct amdgpu_bo *bo)
491{ 424{
492 struct ttm_placement placement = {0};
493 struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
494 int r; 425 int r;
495 426
496 if (bo->shadow) 427 if (bo->shadow)
497 return 0; 428 return 0;
498 429
499 bo->flags |= AMDGPU_GEM_CREATE_SHADOW; 430 r = amdgpu_bo_do_create(adev, size, byte_align, true,
500 memset(&placements, 0, 431 AMDGPU_GEM_DOMAIN_GTT,
501 (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); 432 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
502 433 AMDGPU_GEM_CREATE_SHADOW,
503 amdgpu_ttm_placement_init(adev, &placement, 434 NULL, bo->tbo.resv, 0,
504 placements, AMDGPU_GEM_DOMAIN_GTT, 435 &bo->shadow);
505 AMDGPU_GEM_CREATE_CPU_GTT_USWC);
506
507 r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
508 AMDGPU_GEM_DOMAIN_GTT,
509 AMDGPU_GEM_CREATE_CPU_GTT_USWC,
510 NULL, &placement,
511 bo->tbo.resv,
512 0,
513 &bo->shadow);
514 if (!r) { 436 if (!r) {
515 bo->shadow->parent = amdgpu_bo_ref(bo); 437 bo->shadow->parent = amdgpu_bo_ref(bo);
516 mutex_lock(&adev->shadow_list_lock); 438 mutex_lock(&adev->shadow_list_lock);
@@ -532,32 +454,23 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
532 uint64_t init_value, 454 uint64_t init_value,
533 struct amdgpu_bo **bo_ptr) 455 struct amdgpu_bo **bo_ptr)
534{ 456{
535 struct ttm_placement placement = {0}; 457 uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
536 struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
537 int r; 458 int r;
538 459
539 memset(&placements, 0, 460 r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
540 (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); 461 parent_flags, sg, resv, init_value, bo_ptr);
541
542 amdgpu_ttm_placement_init(adev, &placement,
543 placements, domain, flags);
544
545 r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
546 domain, flags, sg, &placement,
547 resv, init_value, bo_ptr);
548 if (r) 462 if (r)
549 return r; 463 return r;
550 464
551 if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { 465 if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
552 if (!resv) { 466 if (!resv)
553 r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL); 467 WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
554 WARN_ON(r != 0); 468 NULL));
555 }
556 469
557 r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); 470 r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
558 471
559 if (!resv) 472 if (!resv)
560 ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock); 473 reservation_object_unlock((*bo_ptr)->tbo.resv);
561 474
562 if (r) 475 if (r)
563 amdgpu_bo_unref(bo_ptr); 476 amdgpu_bo_unref(bo_ptr);
@@ -722,7 +635,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
722{ 635{
723 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 636 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
724 int r, i; 637 int r, i;
725 unsigned fpfn, lpfn;
726 638
727 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) 639 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
728 return -EPERM; 640 return -EPERM;
@@ -754,22 +666,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
754 } 666 }
755 667
756 bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 668 bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
669 /* force to pin into visible video ram */
670 if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
671 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
757 amdgpu_ttm_placement_from_domain(bo, domain); 672 amdgpu_ttm_placement_from_domain(bo, domain);
758 for (i = 0; i < bo->placement.num_placement; i++) { 673 for (i = 0; i < bo->placement.num_placement; i++) {
759 /* force to pin into visible video ram */ 674 unsigned fpfn, lpfn;
760 if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && 675
761 !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && 676 fpfn = min_offset >> PAGE_SHIFT;
762 (!max_offset || max_offset > 677 lpfn = max_offset >> PAGE_SHIFT;
763 adev->mc.visible_vram_size)) { 678
764 if (WARN_ON_ONCE(min_offset >
765 adev->mc.visible_vram_size))
766 return -EINVAL;
767 fpfn = min_offset >> PAGE_SHIFT;
768 lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
769 } else {
770 fpfn = min_offset >> PAGE_SHIFT;
771 lpfn = max_offset >> PAGE_SHIFT;
772 }
773 if (fpfn > bo->placements[i].fpfn) 679 if (fpfn > bo->placements[i].fpfn)
774 bo->placements[i].fpfn = fpfn; 680 bo->placements[i].fpfn = fpfn;
775 if (!bo->placements[i].lpfn || 681 if (!bo->placements[i].lpfn ||
@@ -979,7 +885,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
979 return; 885 return;
980 886
981 abo = container_of(bo, struct amdgpu_bo, tbo); 887 abo = container_of(bo, struct amdgpu_bo, tbo);
982 amdgpu_vm_bo_invalidate(adev, abo); 888 amdgpu_vm_bo_invalidate(adev, abo, evict);
983 889
984 amdgpu_bo_kunmap(abo); 890 amdgpu_bo_kunmap(abo);
985 891
@@ -992,8 +898,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
992 return; 898 return;
993 899
994 /* move_notify is called before move happens */ 900 /* move_notify is called before move happens */
995 amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
996
997 trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); 901 trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
998} 902}
999 903
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 9b7b4fcb047b..39b6bf6fb051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,7 +33,9 @@
33 33
34#define AMDGPU_BO_INVALID_OFFSET LONG_MAX 34#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
35 35
36/* bo virtual addresses in a vm */
36struct amdgpu_bo_va_mapping { 37struct amdgpu_bo_va_mapping {
38 struct amdgpu_bo_va *bo_va;
37 struct list_head list; 39 struct list_head list;
38 struct rb_node rb; 40 struct rb_node rb;
39 uint64_t start; 41 uint64_t start;
@@ -43,26 +45,24 @@ struct amdgpu_bo_va_mapping {
43 uint64_t flags; 45 uint64_t flags;
44}; 46};
45 47
46/* bo virtual addresses in a specific vm */ 48/* User space allocated BO in a VM */
47struct amdgpu_bo_va { 49struct amdgpu_bo_va {
50 struct amdgpu_vm_bo_base base;
51
48 /* protected by bo being reserved */ 52 /* protected by bo being reserved */
49 struct list_head bo_list;
50 struct dma_fence *last_pt_update;
51 unsigned ref_count; 53 unsigned ref_count;
52 54
53 /* protected by vm mutex and spinlock */ 55 /* all other members protected by the VM PD being reserved */
54 struct list_head vm_status; 56 struct dma_fence *last_pt_update;
55 57
56 /* mappings for this bo_va */ 58 /* mappings for this bo_va */
57 struct list_head invalids; 59 struct list_head invalids;
58 struct list_head valids; 60 struct list_head valids;
59 61
60 /* constant after initialization */ 62 /* If the mappings are cleared or filled */
61 struct amdgpu_vm *vm; 63 bool cleared;
62 struct amdgpu_bo *bo;
63}; 64};
64 65
65
66struct amdgpu_bo { 66struct amdgpu_bo {
67 /* Protected by tbo.reserved */ 67 /* Protected by tbo.reserved */
68 u32 preferred_domains; 68 u32 preferred_domains;
@@ -195,14 +195,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
195 struct reservation_object *resv, 195 struct reservation_object *resv,
196 uint64_t init_value, 196 uint64_t init_value,
197 struct amdgpu_bo **bo_ptr); 197 struct amdgpu_bo **bo_ptr);
198int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
199 unsigned long size, int byte_align,
200 bool kernel, u32 domain, u64 flags,
201 struct sg_table *sg,
202 struct ttm_placement *placement,
203 struct reservation_object *resv,
204 uint64_t init_value,
205 struct amdgpu_bo **bo_ptr);
206int amdgpu_bo_create_reserved(struct amdgpu_device *adev, 198int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
207 unsigned long size, int align, 199 unsigned long size, int align,
208 u32 domain, struct amdgpu_bo **bo_ptr, 200 u32 domain, struct amdgpu_bo **bo_ptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7df503aedb69..f6ce52956e6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -74,7 +74,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
74 adev->pm.dpm.ac_power = true; 74 adev->pm.dpm.ac_power = true;
75 else 75 else
76 adev->pm.dpm.ac_power = false; 76 adev->pm.dpm.ac_power = false;
77 if (adev->pm.funcs->enable_bapm) 77 if (adev->powerplay.pp_funcs->enable_bapm)
78 amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); 78 amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power);
79 mutex_unlock(&adev->pm.mutex); 79 mutex_unlock(&adev->pm.mutex);
80 } 80 }
@@ -88,9 +88,9 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,
88 struct amdgpu_device *adev = ddev->dev_private; 88 struct amdgpu_device *adev = ddev->dev_private;
89 enum amd_pm_state_type pm; 89 enum amd_pm_state_type pm;
90 90
91 if (adev->pp_enabled) { 91 if (adev->powerplay.pp_funcs->get_current_power_state)
92 pm = amdgpu_dpm_get_current_power_state(adev); 92 pm = amdgpu_dpm_get_current_power_state(adev);
93 } else 93 else
94 pm = adev->pm.dpm.user_state; 94 pm = adev->pm.dpm.user_state;
95 95
96 return snprintf(buf, PAGE_SIZE, "%s\n", 96 return snprintf(buf, PAGE_SIZE, "%s\n",
@@ -119,7 +119,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
119 } 119 }
120 120
121 if (adev->pp_enabled) { 121 if (adev->pp_enabled) {
122 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); 122 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
123 } else { 123 } else {
124 mutex_lock(&adev->pm.mutex); 124 mutex_lock(&adev->pm.mutex);
125 adev->pm.dpm.user_state = state; 125 adev->pm.dpm.user_state = state;
@@ -140,13 +140,17 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
140{ 140{
141 struct drm_device *ddev = dev_get_drvdata(dev); 141 struct drm_device *ddev = dev_get_drvdata(dev);
142 struct amdgpu_device *adev = ddev->dev_private; 142 struct amdgpu_device *adev = ddev->dev_private;
143 enum amd_dpm_forced_level level; 143 enum amd_dpm_forced_level level = 0xff;
144 144
145 if ((adev->flags & AMD_IS_PX) && 145 if ((adev->flags & AMD_IS_PX) &&
146 (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) 146 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
147 return snprintf(buf, PAGE_SIZE, "off\n"); 147 return snprintf(buf, PAGE_SIZE, "off\n");
148 148
149 level = amdgpu_dpm_get_performance_level(adev); 149 if (adev->powerplay.pp_funcs->get_performance_level)
150 level = amdgpu_dpm_get_performance_level(adev);
151 else
152 level = adev->pm.dpm.forced_level;
153
150 return snprintf(buf, PAGE_SIZE, "%s\n", 154 return snprintf(buf, PAGE_SIZE, "%s\n",
151 (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : 155 (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
152 (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : 156 (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
@@ -167,7 +171,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
167 struct drm_device *ddev = dev_get_drvdata(dev); 171 struct drm_device *ddev = dev_get_drvdata(dev);
168 struct amdgpu_device *adev = ddev->dev_private; 172 struct amdgpu_device *adev = ddev->dev_private;
169 enum amd_dpm_forced_level level; 173 enum amd_dpm_forced_level level;
170 enum amd_dpm_forced_level current_level; 174 enum amd_dpm_forced_level current_level = 0xff;
171 int ret = 0; 175 int ret = 0;
172 176
173 /* Can't force performance level when the card is off */ 177 /* Can't force performance level when the card is off */
@@ -175,7 +179,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
175 (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) 179 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
176 return -EINVAL; 180 return -EINVAL;
177 181
178 current_level = amdgpu_dpm_get_performance_level(adev); 182 if (adev->powerplay.pp_funcs->get_performance_level)
183 current_level = amdgpu_dpm_get_performance_level(adev);
179 184
180 if (strncmp("low", buf, strlen("low")) == 0) { 185 if (strncmp("low", buf, strlen("low")) == 0) {
181 level = AMD_DPM_FORCED_LEVEL_LOW; 186 level = AMD_DPM_FORCED_LEVEL_LOW;
@@ -203,9 +208,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
203 if (current_level == level) 208 if (current_level == level)
204 return count; 209 return count;
205 210
206 if (adev->pp_enabled) 211 if (adev->powerplay.pp_funcs->force_performance_level) {
207 amdgpu_dpm_force_performance_level(adev, level);
208 else {
209 mutex_lock(&adev->pm.mutex); 212 mutex_lock(&adev->pm.mutex);
210 if (adev->pm.dpm.thermal_active) { 213 if (adev->pm.dpm.thermal_active) {
211 count = -EINVAL; 214 count = -EINVAL;
@@ -233,7 +236,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
233 struct pp_states_info data; 236 struct pp_states_info data;
234 int i, buf_len; 237 int i, buf_len;
235 238
236 if (adev->pp_enabled) 239 if (adev->powerplay.pp_funcs->get_pp_num_states)
237 amdgpu_dpm_get_pp_num_states(adev, &data); 240 amdgpu_dpm_get_pp_num_states(adev, &data);
238 241
239 buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); 242 buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
@@ -257,8 +260,8 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
257 enum amd_pm_state_type pm = 0; 260 enum amd_pm_state_type pm = 0;
258 int i = 0; 261 int i = 0;
259 262
260 if (adev->pp_enabled) { 263 if (adev->powerplay.pp_funcs->get_current_power_state
261 264 && adev->powerplay.pp_funcs->get_pp_num_states) {
262 pm = amdgpu_dpm_get_current_power_state(adev); 265 pm = amdgpu_dpm_get_current_power_state(adev);
263 amdgpu_dpm_get_pp_num_states(adev, &data); 266 amdgpu_dpm_get_pp_num_states(adev, &data);
264 267
@@ -280,25 +283,10 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
280{ 283{
281 struct drm_device *ddev = dev_get_drvdata(dev); 284 struct drm_device *ddev = dev_get_drvdata(dev);
282 struct amdgpu_device *adev = ddev->dev_private; 285 struct amdgpu_device *adev = ddev->dev_private;
283 struct pp_states_info data;
284 enum amd_pm_state_type pm = 0;
285 int i;
286
287 if (adev->pp_force_state_enabled && adev->pp_enabled) {
288 pm = amdgpu_dpm_get_current_power_state(adev);
289 amdgpu_dpm_get_pp_num_states(adev, &data);
290
291 for (i = 0; i < data.nums; i++) {
292 if (pm == data.states[i])
293 break;
294 }
295 286
296 if (i == data.nums) 287 if (adev->pp_force_state_enabled)
297 i = -EINVAL; 288 return amdgpu_get_pp_cur_state(dev, attr, buf);
298 289 else
299 return snprintf(buf, PAGE_SIZE, "%d\n", i);
300
301 } else
302 return snprintf(buf, PAGE_SIZE, "\n"); 290 return snprintf(buf, PAGE_SIZE, "\n");
303} 291}
304 292
@@ -330,7 +318,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
330 if (state != POWER_STATE_TYPE_INTERNAL_BOOT && 318 if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
331 state != POWER_STATE_TYPE_DEFAULT) { 319 state != POWER_STATE_TYPE_DEFAULT) {
332 amdgpu_dpm_dispatch_task(adev, 320 amdgpu_dpm_dispatch_task(adev,
333 AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); 321 AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
334 adev->pp_force_state_enabled = true; 322 adev->pp_force_state_enabled = true;
335 } 323 }
336 } 324 }
@@ -347,7 +335,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
347 char *table = NULL; 335 char *table = NULL;
348 int size; 336 int size;
349 337
350 if (adev->pp_enabled) 338 if (adev->powerplay.pp_funcs->get_pp_table)
351 size = amdgpu_dpm_get_pp_table(adev, &table); 339 size = amdgpu_dpm_get_pp_table(adev, &table);
352 else 340 else
353 return 0; 341 return 0;
@@ -368,7 +356,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
368 struct drm_device *ddev = dev_get_drvdata(dev); 356 struct drm_device *ddev = dev_get_drvdata(dev);
369 struct amdgpu_device *adev = ddev->dev_private; 357 struct amdgpu_device *adev = ddev->dev_private;
370 358
371 if (adev->pp_enabled) 359 if (adev->powerplay.pp_funcs->set_pp_table)
372 amdgpu_dpm_set_pp_table(adev, buf, count); 360 amdgpu_dpm_set_pp_table(adev, buf, count);
373 361
374 return count; 362 return count;
@@ -380,14 +368,11 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
380{ 368{
381 struct drm_device *ddev = dev_get_drvdata(dev); 369 struct drm_device *ddev = dev_get_drvdata(dev);
382 struct amdgpu_device *adev = ddev->dev_private; 370 struct amdgpu_device *adev = ddev->dev_private;
383 ssize_t size = 0;
384 371
385 if (adev->pp_enabled) 372 if (adev->powerplay.pp_funcs->print_clock_levels)
386 size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); 373 return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
387 else if (adev->pm.funcs->print_clock_levels) 374 else
388 size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf); 375 return snprintf(buf, PAGE_SIZE, "\n");
389
390 return size;
391} 376}
392 377
393static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, 378static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
@@ -416,10 +401,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
416 mask |= 1 << level; 401 mask |= 1 << level;
417 } 402 }
418 403
419 if (adev->pp_enabled) 404 if (adev->powerplay.pp_funcs->force_clock_level)
420 amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); 405 amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
421 else if (adev->pm.funcs->force_clock_level) 406
422 adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
423fail: 407fail:
424 return count; 408 return count;
425} 409}
@@ -430,14 +414,11 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
430{ 414{
431 struct drm_device *ddev = dev_get_drvdata(dev); 415 struct drm_device *ddev = dev_get_drvdata(dev);
432 struct amdgpu_device *adev = ddev->dev_private; 416 struct amdgpu_device *adev = ddev->dev_private;
433 ssize_t size = 0;
434
435 if (adev->pp_enabled)
436 size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
437 else if (adev->pm.funcs->print_clock_levels)
438 size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);
439 417
440 return size; 418 if (adev->powerplay.pp_funcs->print_clock_levels)
419 return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
420 else
421 return snprintf(buf, PAGE_SIZE, "\n");
441} 422}
442 423
443static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, 424static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
@@ -465,11 +446,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
465 } 446 }
466 mask |= 1 << level; 447 mask |= 1 << level;
467 } 448 }
468 449 if (adev->powerplay.pp_funcs->force_clock_level)
469 if (adev->pp_enabled)
470 amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); 450 amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
471 else if (adev->pm.funcs->force_clock_level) 451
472 adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
473fail: 452fail:
474 return count; 453 return count;
475} 454}
@@ -480,14 +459,11 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
480{ 459{
481 struct drm_device *ddev = dev_get_drvdata(dev); 460 struct drm_device *ddev = dev_get_drvdata(dev);
482 struct amdgpu_device *adev = ddev->dev_private; 461 struct amdgpu_device *adev = ddev->dev_private;
483 ssize_t size = 0;
484
485 if (adev->pp_enabled)
486 size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
487 else if (adev->pm.funcs->print_clock_levels)
488 size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);
489 462
490 return size; 463 if (adev->powerplay.pp_funcs->print_clock_levels)
464 return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
465 else
466 return snprintf(buf, PAGE_SIZE, "\n");
491} 467}
492 468
493static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, 469static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
@@ -515,11 +491,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
515 } 491 }
516 mask |= 1 << level; 492 mask |= 1 << level;
517 } 493 }
518 494 if (adev->powerplay.pp_funcs->force_clock_level)
519 if (adev->pp_enabled)
520 amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); 495 amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
521 else if (adev->pm.funcs->force_clock_level) 496
522 adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
523fail: 497fail:
524 return count; 498 return count;
525} 499}
@@ -532,10 +506,8 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
532 struct amdgpu_device *adev = ddev->dev_private; 506 struct amdgpu_device *adev = ddev->dev_private;
533 uint32_t value = 0; 507 uint32_t value = 0;
534 508
535 if (adev->pp_enabled) 509 if (adev->powerplay.pp_funcs->get_sclk_od)
536 value = amdgpu_dpm_get_sclk_od(adev); 510 value = amdgpu_dpm_get_sclk_od(adev);
537 else if (adev->pm.funcs->get_sclk_od)
538 value = adev->pm.funcs->get_sclk_od(adev);
539 511
540 return snprintf(buf, PAGE_SIZE, "%d\n", value); 512 return snprintf(buf, PAGE_SIZE, "%d\n", value);
541} 513}
@@ -556,12 +528,12 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
556 count = -EINVAL; 528 count = -EINVAL;
557 goto fail; 529 goto fail;
558 } 530 }
531 if (adev->powerplay.pp_funcs->set_sclk_od)
532 amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
559 533
560 if (adev->pp_enabled) { 534 if (adev->pp_enabled) {
561 amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); 535 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
562 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL); 536 } else {
563 } else if (adev->pm.funcs->set_sclk_od) {
564 adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
565 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; 537 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
566 amdgpu_pm_compute_clocks(adev); 538 amdgpu_pm_compute_clocks(adev);
567 } 539 }
@@ -578,10 +550,8 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
578 struct amdgpu_device *adev = ddev->dev_private; 550 struct amdgpu_device *adev = ddev->dev_private;
579 uint32_t value = 0; 551 uint32_t value = 0;
580 552
581 if (adev->pp_enabled) 553 if (adev->powerplay.pp_funcs->get_mclk_od)
582 value = amdgpu_dpm_get_mclk_od(adev); 554 value = amdgpu_dpm_get_mclk_od(adev);
583 else if (adev->pm.funcs->get_mclk_od)
584 value = adev->pm.funcs->get_mclk_od(adev);
585 555
586 return snprintf(buf, PAGE_SIZE, "%d\n", value); 556 return snprintf(buf, PAGE_SIZE, "%d\n", value);
587} 557}
@@ -602,12 +572,12 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
602 count = -EINVAL; 572 count = -EINVAL;
603 goto fail; 573 goto fail;
604 } 574 }
575 if (adev->powerplay.pp_funcs->set_mclk_od)
576 amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
605 577
606 if (adev->pp_enabled) { 578 if (adev->pp_enabled) {
607 amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); 579 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
608 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL); 580 } else {
609 } else if (adev->pm.funcs->set_mclk_od) {
610 adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
611 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; 581 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
612 amdgpu_pm_compute_clocks(adev); 582 amdgpu_pm_compute_clocks(adev);
613 } 583 }
@@ -621,14 +591,11 @@ static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
621{ 591{
622 struct drm_device *ddev = dev_get_drvdata(dev); 592 struct drm_device *ddev = dev_get_drvdata(dev);
623 struct amdgpu_device *adev = ddev->dev_private; 593 struct amdgpu_device *adev = ddev->dev_private;
624 int ret = 0; 594 int ret = 0xff;
625 595
626 if (adev->pp_enabled) 596 if (adev->powerplay.pp_funcs->get_power_profile_state)
627 ret = amdgpu_dpm_get_power_profile_state( 597 ret = amdgpu_dpm_get_power_profile_state(
628 adev, query); 598 adev, query);
629 else if (adev->pm.funcs->get_power_profile_state)
630 ret = adev->pm.funcs->get_power_profile_state(
631 adev, query);
632 599
633 if (ret) 600 if (ret)
634 return ret; 601 return ret;
@@ -675,15 +642,12 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
675 char *sub_str, buf_cpy[128], *tmp_str; 642 char *sub_str, buf_cpy[128], *tmp_str;
676 const char delimiter[3] = {' ', '\n', '\0'}; 643 const char delimiter[3] = {' ', '\n', '\0'};
677 long int value; 644 long int value;
678 int ret = 0; 645 int ret = 0xff;
679 646
680 if (strncmp("reset", buf, strlen("reset")) == 0) { 647 if (strncmp("reset", buf, strlen("reset")) == 0) {
681 if (adev->pp_enabled) 648 if (adev->powerplay.pp_funcs->reset_power_profile_state)
682 ret = amdgpu_dpm_reset_power_profile_state( 649 ret = amdgpu_dpm_reset_power_profile_state(
683 adev, request); 650 adev, request);
684 else if (adev->pm.funcs->reset_power_profile_state)
685 ret = adev->pm.funcs->reset_power_profile_state(
686 adev, request);
687 if (ret) { 651 if (ret) {
688 count = -EINVAL; 652 count = -EINVAL;
689 goto fail; 653 goto fail;
@@ -692,12 +656,10 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
692 } 656 }
693 657
694 if (strncmp("set", buf, strlen("set")) == 0) { 658 if (strncmp("set", buf, strlen("set")) == 0) {
695 if (adev->pp_enabled) 659 if (adev->powerplay.pp_funcs->set_power_profile_state)
696 ret = amdgpu_dpm_set_power_profile_state( 660 ret = amdgpu_dpm_set_power_profile_state(
697 adev, request); 661 adev, request);
698 else if (adev->pm.funcs->set_power_profile_state) 662
699 ret = adev->pm.funcs->set_power_profile_state(
700 adev, request);
701 if (ret) { 663 if (ret) {
702 count = -EINVAL; 664 count = -EINVAL;
703 goto fail; 665 goto fail;
@@ -745,13 +707,8 @@ static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
745 707
746 loop++; 708 loop++;
747 } 709 }
748 710 if (adev->powerplay.pp_funcs->set_power_profile_state)
749 if (adev->pp_enabled) 711 ret = amdgpu_dpm_set_power_profile_state(adev, request);
750 ret = amdgpu_dpm_set_power_profile_state(
751 adev, request);
752 else if (adev->pm.funcs->set_power_profile_state)
753 ret = adev->pm.funcs->set_power_profile_state(
754 adev, request);
755 712
756 if (ret) 713 if (ret)
757 count = -EINVAL; 714 count = -EINVAL;
@@ -831,7 +788,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
831 (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) 788 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
832 return -EINVAL; 789 return -EINVAL;
833 790
834 if (!adev->pp_enabled && !adev->pm.funcs->get_temperature) 791 if (!adev->powerplay.pp_funcs->get_temperature)
835 temp = 0; 792 temp = 0;
836 else 793 else
837 temp = amdgpu_dpm_get_temperature(adev); 794 temp = amdgpu_dpm_get_temperature(adev);
@@ -862,7 +819,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
862 struct amdgpu_device *adev = dev_get_drvdata(dev); 819 struct amdgpu_device *adev = dev_get_drvdata(dev);
863 u32 pwm_mode = 0; 820 u32 pwm_mode = 0;
864 821
865 if (!adev->pp_enabled && !adev->pm.funcs->get_fan_control_mode) 822 if (!adev->powerplay.pp_funcs->get_fan_control_mode)
866 return -EINVAL; 823 return -EINVAL;
867 824
868 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); 825 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
@@ -879,7 +836,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
879 int err; 836 int err;
880 int value; 837 int value;
881 838
882 if (!adev->pp_enabled && !adev->pm.funcs->set_fan_control_mode) 839 if (!adev->powerplay.pp_funcs->set_fan_control_mode)
883 return -EINVAL; 840 return -EINVAL;
884 841
885 err = kstrtoint(buf, 10, &value); 842 err = kstrtoint(buf, 10, &value);
@@ -919,9 +876,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
919 876
920 value = (value * 100) / 255; 877 value = (value * 100) / 255;
921 878
922 err = amdgpu_dpm_set_fan_speed_percent(adev, value); 879 if (adev->powerplay.pp_funcs->set_fan_speed_percent) {
923 if (err) 880 err = amdgpu_dpm_set_fan_speed_percent(adev, value);
924 return err; 881 if (err)
882 return err;
883 }
925 884
926 return count; 885 return count;
927} 886}
@@ -932,11 +891,13 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
932{ 891{
933 struct amdgpu_device *adev = dev_get_drvdata(dev); 892 struct amdgpu_device *adev = dev_get_drvdata(dev);
934 int err; 893 int err;
935 u32 speed; 894 u32 speed = 0;
936 895
937 err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); 896 if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
938 if (err) 897 err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
939 return err; 898 if (err)
899 return err;
900 }
940 901
941 speed = (speed * 255) / 100; 902 speed = (speed * 255) / 100;
942 903
@@ -949,11 +910,13 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
949{ 910{
950 struct amdgpu_device *adev = dev_get_drvdata(dev); 911 struct amdgpu_device *adev = dev_get_drvdata(dev);
951 int err; 912 int err;
952 u32 speed; 913 u32 speed = 0;
953 914
954 err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); 915 if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
955 if (err) 916 err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
956 return err; 917 if (err)
918 return err;
919 }
957 920
958 return sprintf(buf, "%i\n", speed); 921 return sprintf(buf, "%i\n", speed);
959} 922}
@@ -1008,21 +971,21 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
1008 return 0; 971 return 0;
1009 972
1010 /* mask fan attributes if we have no bindings for this asic to expose */ 973 /* mask fan attributes if we have no bindings for this asic to expose */
1011 if ((!adev->pm.funcs->get_fan_speed_percent && 974 if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
1012 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ 975 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
1013 (!adev->pm.funcs->get_fan_control_mode && 976 (!adev->powerplay.pp_funcs->get_fan_control_mode &&
1014 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ 977 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */
1015 effective_mode &= ~S_IRUGO; 978 effective_mode &= ~S_IRUGO;
1016 979
1017 if ((!adev->pm.funcs->set_fan_speed_percent && 980 if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
1018 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ 981 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */
1019 (!adev->pm.funcs->set_fan_control_mode && 982 (!adev->powerplay.pp_funcs->set_fan_control_mode &&
1020 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ 983 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
1021 effective_mode &= ~S_IWUSR; 984 effective_mode &= ~S_IWUSR;
1022 985
1023 /* hide max/min values if we can't both query and manage the fan */ 986 /* hide max/min values if we can't both query and manage the fan */
1024 if ((!adev->pm.funcs->set_fan_speed_percent && 987 if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
1025 !adev->pm.funcs->get_fan_speed_percent) && 988 !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
1026 (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || 989 (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
1027 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) 990 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
1028 return 0; 991 return 0;
@@ -1055,7 +1018,7 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
1055 if (!adev->pm.dpm_enabled) 1018 if (!adev->pm.dpm_enabled)
1056 return; 1019 return;
1057 1020
1058 if (adev->pm.funcs->get_temperature) { 1021 if (adev->powerplay.pp_funcs->get_temperature) {
1059 int temp = amdgpu_dpm_get_temperature(adev); 1022 int temp = amdgpu_dpm_get_temperature(adev);
1060 1023
1061 if (temp < adev->pm.dpm.thermal.min_temp) 1024 if (temp < adev->pm.dpm.thermal.min_temp)
@@ -1087,7 +1050,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev,
1087 true : false; 1050 true : false;
1088 1051
1089 /* check if the vblank period is too short to adjust the mclk */ 1052 /* check if the vblank period is too short to adjust the mclk */
1090 if (single_display && adev->pm.funcs->vblank_too_short) { 1053 if (single_display && adev->powerplay.pp_funcs->vblank_too_short) {
1091 if (amdgpu_dpm_vblank_too_short(adev)) 1054 if (amdgpu_dpm_vblank_too_short(adev))
1092 single_display = false; 1055 single_display = false;
1093 } 1056 }
@@ -1216,7 +1179,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
1216 struct amdgpu_ps *ps; 1179 struct amdgpu_ps *ps;
1217 enum amd_pm_state_type dpm_state; 1180 enum amd_pm_state_type dpm_state;
1218 int ret; 1181 int ret;
1219 bool equal; 1182 bool equal = false;
1220 1183
1221 /* if dpm init failed */ 1184 /* if dpm init failed */
1222 if (!adev->pm.dpm_enabled) 1185 if (!adev->pm.dpm_enabled)
@@ -1236,7 +1199,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
1236 else 1199 else
1237 return; 1200 return;
1238 1201
1239 if (amdgpu_dpm == 1) { 1202 if (amdgpu_dpm == 1 && adev->powerplay.pp_funcs->print_power_state) {
1240 printk("switching from power state:\n"); 1203 printk("switching from power state:\n");
1241 amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps); 1204 amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps);
1242 printk("switching to power state:\n"); 1205 printk("switching to power state:\n");
@@ -1245,15 +1208,17 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
1245 1208
1246 /* update whether vce is active */ 1209 /* update whether vce is active */
1247 ps->vce_active = adev->pm.dpm.vce_active; 1210 ps->vce_active = adev->pm.dpm.vce_active;
1248 1211 if (adev->powerplay.pp_funcs->display_configuration_changed)
1249 amdgpu_dpm_display_configuration_changed(adev); 1212 amdgpu_dpm_display_configuration_changed(adev);
1250 1213
1251 ret = amdgpu_dpm_pre_set_power_state(adev); 1214 ret = amdgpu_dpm_pre_set_power_state(adev);
1252 if (ret) 1215 if (ret)
1253 return; 1216 return;
1254 1217
1255 if ((0 != amgdpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal))) 1218 if (adev->powerplay.pp_funcs->check_state_equal) {
1256 equal = false; 1219 if (0 != amdgpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal))
1220 equal = false;
1221 }
1257 1222
1258 if (equal) 1223 if (equal)
1259 return; 1224 return;
@@ -1264,7 +1229,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
1264 adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; 1229 adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
1265 adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; 1230 adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
1266 1231
1267 if (adev->pm.funcs->force_performance_level) { 1232 if (adev->powerplay.pp_funcs->force_performance_level) {
1268 if (adev->pm.dpm.thermal_active) { 1233 if (adev->pm.dpm.thermal_active) {
1269 enum amd_dpm_forced_level level = adev->pm.dpm.forced_level; 1234 enum amd_dpm_forced_level level = adev->pm.dpm.forced_level;
1270 /* force low perf level for thermal */ 1235 /* force low perf level for thermal */
@@ -1280,7 +1245,7 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
1280 1245
1281void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) 1246void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
1282{ 1247{
1283 if (adev->pp_enabled || adev->pm.funcs->powergate_uvd) { 1248 if (adev->powerplay.pp_funcs->powergate_uvd) {
1284 /* enable/disable UVD */ 1249 /* enable/disable UVD */
1285 mutex_lock(&adev->pm.mutex); 1250 mutex_lock(&adev->pm.mutex);
1286 amdgpu_dpm_powergate_uvd(adev, !enable); 1251 amdgpu_dpm_powergate_uvd(adev, !enable);
@@ -1302,7 +1267,7 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
1302 1267
1303void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) 1268void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
1304{ 1269{
1305 if (adev->pp_enabled || adev->pm.funcs->powergate_vce) { 1270 if (adev->powerplay.pp_funcs->powergate_vce) {
1306 /* enable/disable VCE */ 1271 /* enable/disable VCE */
1307 mutex_lock(&adev->pm.mutex); 1272 mutex_lock(&adev->pm.mutex);
1308 amdgpu_dpm_powergate_vce(adev, !enable); 1273 amdgpu_dpm_powergate_vce(adev, !enable);
@@ -1337,8 +1302,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
1337{ 1302{
1338 int i; 1303 int i;
1339 1304
1340 if (adev->pp_enabled) 1305 if (adev->powerplay.pp_funcs->print_power_state == NULL)
1341 /* TO DO */
1342 return; 1306 return;
1343 1307
1344 for (i = 0; i < adev->pm.dpm.num_ps; i++) 1308 for (i = 0; i < adev->pm.dpm.num_ps; i++)
@@ -1353,10 +1317,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
1353 if (adev->pm.sysfs_initialized) 1317 if (adev->pm.sysfs_initialized)
1354 return 0; 1318 return 0;
1355 1319
1356 if (!adev->pp_enabled) { 1320 if (adev->powerplay.pp_funcs->get_temperature == NULL)
1357 if (adev->pm.funcs->get_temperature == NULL) 1321 return 0;
1358 return 0;
1359 }
1360 1322
1361 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, 1323 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
1362 DRIVER_NAME, adev, 1324 DRIVER_NAME, adev,
@@ -1496,7 +1458,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
1496 } 1458 }
1497 1459
1498 if (adev->pp_enabled) { 1460 if (adev->pp_enabled) {
1499 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); 1461 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
1500 } else { 1462 } else {
1501 mutex_lock(&adev->pm.mutex); 1463 mutex_lock(&adev->pm.mutex);
1502 adev->pm.dpm.new_active_crtcs = 0; 1464 adev->pm.dpm.new_active_crtcs = 0;
@@ -1634,8 +1596,8 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
1634 return amdgpu_debugfs_pm_info_pp(m, adev); 1596 return amdgpu_debugfs_pm_info_pp(m, adev);
1635 } else { 1597 } else {
1636 mutex_lock(&adev->pm.mutex); 1598 mutex_lock(&adev->pm.mutex);
1637 if (adev->pm.funcs->debugfs_print_current_performance_level) 1599 if (adev->powerplay.pp_funcs->debugfs_print_current_performance_level)
1638 adev->pm.funcs->debugfs_print_current_performance_level(adev, m); 1600 adev->powerplay.pp_funcs->debugfs_print_current_performance_level(adev, m);
1639 else 1601 else
1640 seq_printf(m, "Debugfs support not implemented for this asic\n"); 1602 seq_printf(m, "Debugfs support not implemented for this asic\n");
1641 mutex_unlock(&adev->pm.mutex); 1603 mutex_unlock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index b7e1c026c0c8..2d2f0960b025 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -87,17 +87,28 @@ static int amdgpu_pp_early_init(void *handle)
87 case CHIP_OLAND: 87 case CHIP_OLAND:
88 case CHIP_HAINAN: 88 case CHIP_HAINAN:
89 amd_pp->ip_funcs = &si_dpm_ip_funcs; 89 amd_pp->ip_funcs = &si_dpm_ip_funcs;
90 amd_pp->pp_funcs = &si_dpm_funcs;
90 break; 91 break;
91#endif 92#endif
92#ifdef CONFIG_DRM_AMDGPU_CIK 93#ifdef CONFIG_DRM_AMDGPU_CIK
93 case CHIP_BONAIRE: 94 case CHIP_BONAIRE:
94 case CHIP_HAWAII: 95 case CHIP_HAWAII:
95 amd_pp->ip_funcs = &ci_dpm_ip_funcs; 96 if (amdgpu_dpm == -1) {
97 amd_pp->ip_funcs = &ci_dpm_ip_funcs;
98 amd_pp->pp_funcs = &ci_dpm_funcs;
99 } else {
100 adev->pp_enabled = true;
101 if (amdgpu_create_pp_handle(adev))
102 return -EINVAL;
103 amd_pp->ip_funcs = &pp_ip_funcs;
104 amd_pp->pp_funcs = &pp_dpm_funcs;
105 }
96 break; 106 break;
97 case CHIP_KABINI: 107 case CHIP_KABINI:
98 case CHIP_MULLINS: 108 case CHIP_MULLINS:
99 case CHIP_KAVERI: 109 case CHIP_KAVERI:
100 amd_pp->ip_funcs = &kv_dpm_ip_funcs; 110 amd_pp->ip_funcs = &kv_dpm_ip_funcs;
111 amd_pp->pp_funcs = &kv_dpm_funcs;
101 break; 112 break;
102#endif 113#endif
103 default: 114 default:
@@ -128,7 +139,7 @@ static int amdgpu_pp_late_init(void *handle)
128 139
129 if (adev->pp_enabled && adev->pm.dpm_enabled) { 140 if (adev->pp_enabled && adev->pm.dpm_enabled) {
130 amdgpu_pm_sysfs_init(adev); 141 amdgpu_pm_sysfs_init(adev);
131 amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL); 142 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_COMPLETE_INIT, NULL, NULL);
132 } 143 }
133 144
134 return ret; 145 return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 5b3f92891f89..90af8e82b16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
57 ttm_bo_kunmap(&bo->dma_buf_vmap); 57 ttm_bo_kunmap(&bo->dma_buf_vmap);
58} 58}
59 59
60int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
61{
62 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
63 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
64 unsigned asize = amdgpu_bo_size(bo);
65 int ret;
66
67 if (!vma->vm_file)
68 return -ENODEV;
69
70 if (adev == NULL)
71 return -ENODEV;
72
73 /* Check for valid size. */
74 if (asize < vma->vm_end - vma->vm_start)
75 return -EINVAL;
76
77 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
78 (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
79 return -EPERM;
80 }
81 vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
82
83 /* prime mmap does not need to check access, so allow here */
84 ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
85 if (ret)
86 return ret;
87
88 ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
89 drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
90
91 return ret;
92}
93
60struct drm_gem_object * 94struct drm_gem_object *
61amdgpu_gem_prime_import_sg_table(struct drm_device *dev, 95amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
62 struct dma_buf_attachment *attach, 96 struct dma_buf_attachment *attach,
@@ -136,7 +170,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
136{ 170{
137 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); 171 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
138 172
139 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) 173 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
174 bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
140 return ERR_PTR(-EPERM); 175 return ERR_PTR(-EPERM);
141 176
142 return drm_gem_prime_export(dev, gobj, flags); 177 return drm_gem_prime_export(dev, gobj, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 8c2204c7b384..447d446b5015 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -57,21 +57,23 @@ static int psp_sw_init(void *handle)
57 psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; 57 psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
58 psp->ring_init = psp_v3_1_ring_init; 58 psp->ring_init = psp_v3_1_ring_init;
59 psp->ring_create = psp_v3_1_ring_create; 59 psp->ring_create = psp_v3_1_ring_create;
60 psp->ring_stop = psp_v3_1_ring_stop;
60 psp->ring_destroy = psp_v3_1_ring_destroy; 61 psp->ring_destroy = psp_v3_1_ring_destroy;
61 psp->cmd_submit = psp_v3_1_cmd_submit; 62 psp->cmd_submit = psp_v3_1_cmd_submit;
62 psp->compare_sram_data = psp_v3_1_compare_sram_data; 63 psp->compare_sram_data = psp_v3_1_compare_sram_data;
63 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; 64 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
65 psp->mode1_reset = psp_v3_1_mode1_reset;
64 break; 66 break;
65 case CHIP_RAVEN: 67 case CHIP_RAVEN:
66#if 0
67 psp->init_microcode = psp_v10_0_init_microcode; 68 psp->init_microcode = psp_v10_0_init_microcode;
68#endif
69 psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; 69 psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
70 psp->ring_init = psp_v10_0_ring_init; 70 psp->ring_init = psp_v10_0_ring_init;
71 psp->ring_create = psp_v10_0_ring_create; 71 psp->ring_create = psp_v10_0_ring_create;
72 psp->ring_stop = psp_v10_0_ring_stop;
72 psp->ring_destroy = psp_v10_0_ring_destroy; 73 psp->ring_destroy = psp_v10_0_ring_destroy;
73 psp->cmd_submit = psp_v10_0_cmd_submit; 74 psp->cmd_submit = psp_v10_0_cmd_submit;
74 psp->compare_sram_data = psp_v10_0_compare_sram_data; 75 psp->compare_sram_data = psp_v10_0_compare_sram_data;
76 psp->mode1_reset = psp_v10_0_mode1_reset;
75 break; 77 break;
76 default: 78 default:
77 return -EINVAL; 79 return -EINVAL;
@@ -90,6 +92,12 @@ static int psp_sw_init(void *handle)
90 92
91static int psp_sw_fini(void *handle) 93static int psp_sw_fini(void *handle)
92{ 94{
95 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
96
97 release_firmware(adev->psp.sos_fw);
98 adev->psp.sos_fw = NULL;
99 release_firmware(adev->psp.asd_fw);
100 adev->psp.asd_fw = NULL;
93 return 0; 101 return 0;
94} 102}
95 103
@@ -253,15 +261,18 @@ static int psp_asd_load(struct psp_context *psp)
253 261
254static int psp_hw_start(struct psp_context *psp) 262static int psp_hw_start(struct psp_context *psp)
255{ 263{
264 struct amdgpu_device *adev = psp->adev;
256 int ret; 265 int ret;
257 266
258 ret = psp_bootloader_load_sysdrv(psp); 267 if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) {
259 if (ret) 268 ret = psp_bootloader_load_sysdrv(psp);
260 return ret; 269 if (ret)
270 return ret;
261 271
262 ret = psp_bootloader_load_sos(psp); 272 ret = psp_bootloader_load_sos(psp);
263 if (ret) 273 if (ret)
264 return ret; 274 return ret;
275 }
265 276
266 ret = psp_ring_create(psp, PSP_RING_TYPE__KM); 277 ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
267 if (ret) 278 if (ret)
@@ -453,6 +464,16 @@ static int psp_hw_fini(void *handle)
453 464
454static int psp_suspend(void *handle) 465static int psp_suspend(void *handle)
455{ 466{
467 int ret;
468 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
469 struct psp_context *psp = &adev->psp;
470
471 ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
472 if (ret) {
473 DRM_ERROR("PSP ring stop failed\n");
474 return ret;
475 }
476
456 return 0; 477 return 0;
457} 478}
458 479
@@ -487,6 +508,22 @@ failed:
487 return ret; 508 return ret;
488} 509}
489 510
511static bool psp_check_reset(void* handle)
512{
513 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
514
515 if (adev->flags & AMD_IS_APU)
516 return true;
517
518 return false;
519}
520
521static int psp_reset(void* handle)
522{
523 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524 return psp_mode1_reset(&adev->psp);
525}
526
490static bool psp_check_fw_loading_status(struct amdgpu_device *adev, 527static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
491 enum AMDGPU_UCODE_ID ucode_type) 528 enum AMDGPU_UCODE_ID ucode_type)
492{ 529{
@@ -530,8 +567,9 @@ const struct amd_ip_funcs psp_ip_funcs = {
530 .suspend = psp_suspend, 567 .suspend = psp_suspend,
531 .resume = psp_resume, 568 .resume = psp_resume,
532 .is_idle = NULL, 569 .is_idle = NULL,
570 .check_soft_reset = psp_check_reset,
533 .wait_for_idle = NULL, 571 .wait_for_idle = NULL,
534 .soft_reset = NULL, 572 .soft_reset = psp_reset,
535 .set_clockgating_state = psp_set_clockgating_state, 573 .set_clockgating_state = psp_set_clockgating_state,
536 .set_powergating_state = psp_set_powergating_state, 574 .set_powergating_state = psp_set_powergating_state,
537}; 575};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 538fa9dbfb21..ce4654550416 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -66,6 +66,8 @@ struct psp_context
66 struct psp_gfx_cmd_resp *cmd); 66 struct psp_gfx_cmd_resp *cmd);
67 int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); 67 int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
68 int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); 68 int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
69 int (*ring_stop)(struct psp_context *psp,
70 enum psp_ring_type ring_type);
69 int (*ring_destroy)(struct psp_context *psp, 71 int (*ring_destroy)(struct psp_context *psp,
70 enum psp_ring_type ring_type); 72 enum psp_ring_type ring_type);
71 int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, 73 int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
@@ -74,6 +76,7 @@ struct psp_context
74 struct amdgpu_firmware_info *ucode, 76 struct amdgpu_firmware_info *ucode,
75 enum AMDGPU_UCODE_ID ucode_type); 77 enum AMDGPU_UCODE_ID ucode_type);
76 bool (*smu_reload_quirk)(struct psp_context *psp); 78 bool (*smu_reload_quirk)(struct psp_context *psp);
79 int (*mode1_reset)(struct psp_context *psp);
77 80
78 /* fence buffer */ 81 /* fence buffer */
79 struct amdgpu_bo *fw_pri_bo; 82 struct amdgpu_bo *fw_pri_bo;
@@ -123,6 +126,7 @@ struct amdgpu_psp_funcs {
123#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) 126#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
124#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) 127#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
125#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) 128#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
129#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type))
126#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) 130#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
127#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ 131#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
128 (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) 132 (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
@@ -136,6 +140,8 @@ struct amdgpu_psp_funcs {
136 ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) 140 ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
137#define psp_smu_reload_quirk(psp) \ 141#define psp_smu_reload_quirk(psp) \
138 ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) 142 ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
143#define psp_mode1_reset(psp) \
144 ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false)
139 145
140extern const struct amd_ip_funcs psp_ip_funcs; 146extern const struct amd_ip_funcs psp_ip_funcs;
141 147
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6c5646b48d1a..5ce65280b396 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -170,6 +170,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
170 unsigned irq_type) 170 unsigned irq_type)
171{ 171{
172 int r; 172 int r;
173 int sched_hw_submission = amdgpu_sched_hw_submission;
174
175 /* Set the hw submission limit higher for KIQ because
176 * it's used for a number of gfx/compute tasks by both
177 * KFD and KGD which may have outstanding fences and
178 * it doesn't really use the gpu scheduler anyway;
179 * KIQ tasks get submitted directly to the ring.
180 */
181 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
182 sched_hw_submission = max(sched_hw_submission, 256);
173 183
174 if (ring->adev == NULL) { 184 if (ring->adev == NULL) {
175 if (adev->num_rings >= AMDGPU_MAX_RINGS) 185 if (adev->num_rings >= AMDGPU_MAX_RINGS)
@@ -178,8 +188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
178 ring->adev = adev; 188 ring->adev = adev;
179 ring->idx = adev->num_rings++; 189 ring->idx = adev->num_rings++;
180 adev->rings[ring->idx] = ring; 190 adev->rings[ring->idx] = ring;
181 r = amdgpu_fence_driver_init_ring(ring, 191 r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
182 amdgpu_sched_hw_submission);
183 if (r) 192 if (r)
184 return r; 193 return r;
185 } 194 }
@@ -218,8 +227,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
218 return r; 227 return r;
219 } 228 }
220 229
221 ring->ring_size = roundup_pow_of_two(max_dw * 4 * 230 ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
222 amdgpu_sched_hw_submission);
223 231
224 ring->buf_mask = (ring->ring_size / 4) - 1; 232 ring->buf_mask = (ring->ring_size / 4) - 1;
225 ring->ptr_mask = ring->funcs->support_64bit_ptrs ? 233 ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a6899180b265..c586f44312f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
244 struct dma_fence *f = e->fence; 244 struct dma_fence *f = e->fence;
245 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 245 struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
246 246
247 if (dma_fence_is_signaled(f)) {
248 hash_del(&e->node);
249 dma_fence_put(f);
250 kmem_cache_free(amdgpu_sync_slab, e);
251 continue;
252 }
247 if (ring && s_fence) { 253 if (ring && s_fence) {
248 /* For fences from the same ring it is sufficient 254 /* For fences from the same ring it is sufficient
249 * when they are scheduled. 255 * when they are scheduled.
@@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
256 } 262 }
257 } 263 }
258 264
259 if (dma_fence_is_signaled(f)) {
260 hash_del(&e->node);
261 dma_fence_put(f);
262 kmem_cache_free(amdgpu_sync_slab, e);
263 continue;
264 }
265
266 return f; 265 return f;
267 } 266 }
268 267
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 9ab58245e518..213988f336ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -228,7 +228,7 @@ TRACE_EVENT(amdgpu_vm_bo_map,
228 ), 228 ),
229 229
230 TP_fast_assign( 230 TP_fast_assign(
231 __entry->bo = bo_va ? bo_va->bo : NULL; 231 __entry->bo = bo_va ? bo_va->base.bo : NULL;
232 __entry->start = mapping->start; 232 __entry->start = mapping->start;
233 __entry->last = mapping->last; 233 __entry->last = mapping->last;
234 __entry->offset = mapping->offset; 234 __entry->offset = mapping->offset;
@@ -252,7 +252,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
252 ), 252 ),
253 253
254 TP_fast_assign( 254 TP_fast_assign(
255 __entry->bo = bo_va->bo; 255 __entry->bo = bo_va->base.bo;
256 __entry->start = mapping->start; 256 __entry->start = mapping->start;
257 __entry->last = mapping->last; 257 __entry->last = mapping->last;
258 __entry->offset = mapping->offset; 258 __entry->offset = mapping->offset;
@@ -417,5 +417,5 @@ TRACE_EVENT(amdgpu_ttm_bo_move,
417 417
418/* This part must be outside protection */ 418/* This part must be outside protection */
419#undef TRACE_INCLUDE_PATH 419#undef TRACE_INCLUDE_PATH
420#define TRACE_INCLUDE_PATH . 420#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/amd/amdgpu
421#include <trace/define_trace.h> 421#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index 385b7e1d72f9..9ec96b9e85d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -1,4 +1,23 @@
1/* Copyright Red Hat Inc 2010. 1/* Copyright Red Hat Inc 2010.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
2 * Author : Dave Airlie <airlied@redhat.com> 21 * Author : Dave Airlie <airlied@redhat.com>
3 */ 22 */
4#include <drm/drmP.h> 23#include <drm/drmP.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c803b082324d..15a28578d458 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -42,7 +42,9 @@
42#include <linux/swap.h> 42#include <linux/swap.h>
43#include <linux/pagemap.h> 43#include <linux/pagemap.h>
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/iommu.h>
45#include "amdgpu.h" 46#include "amdgpu.h"
47#include "amdgpu_trace.h"
46#include "bif/bif_4_1_d.h" 48#include "bif/bif_4_1_d.h"
47 49
48#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) 50#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -607,6 +609,7 @@ struct amdgpu_ttm_tt {
607 spinlock_t guptasklock; 609 spinlock_t guptasklock;
608 struct list_head guptasks; 610 struct list_head guptasks;
609 atomic_t mmu_invalidations; 611 atomic_t mmu_invalidations;
612 uint32_t last_set_pages;
610 struct list_head list; 613 struct list_head list;
611}; 614};
612 615
@@ -620,6 +623,8 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
620 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) 623 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
621 flags |= FOLL_WRITE; 624 flags |= FOLL_WRITE;
622 625
626 down_read(&current->mm->mmap_sem);
627
623 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { 628 if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
624 /* check that we only use anonymous memory 629 /* check that we only use anonymous memory
625 to prevent problems with writeback */ 630 to prevent problems with writeback */
@@ -627,8 +632,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
627 struct vm_area_struct *vma; 632 struct vm_area_struct *vma;
628 633
629 vma = find_vma(gtt->usermm, gtt->userptr); 634 vma = find_vma(gtt->usermm, gtt->userptr);
630 if (!vma || vma->vm_file || vma->vm_end < end) 635 if (!vma || vma->vm_file || vma->vm_end < end) {
636 up_read(&current->mm->mmap_sem);
631 return -EPERM; 637 return -EPERM;
638 }
632 } 639 }
633 640
634 do { 641 do {
@@ -655,13 +662,47 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
655 662
656 } while (pinned < ttm->num_pages); 663 } while (pinned < ttm->num_pages);
657 664
665 up_read(&current->mm->mmap_sem);
658 return 0; 666 return 0;
659 667
660release_pages: 668release_pages:
661 release_pages(pages, pinned, 0); 669 release_pages(pages, pinned, 0);
670 up_read(&current->mm->mmap_sem);
662 return r; 671 return r;
663} 672}
664 673
674void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
675{
676 struct amdgpu_ttm_tt *gtt = (void *)ttm;
677 unsigned i;
678
679 gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
680 for (i = 0; i < ttm->num_pages; ++i) {
681 if (ttm->pages[i])
682 put_page(ttm->pages[i]);
683
684 ttm->pages[i] = pages ? pages[i] : NULL;
685 }
686}
687
688void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
689{
690 struct amdgpu_ttm_tt *gtt = (void *)ttm;
691 unsigned i;
692
693 for (i = 0; i < ttm->num_pages; ++i) {
694 struct page *page = ttm->pages[i];
695
696 if (!page)
697 continue;
698
699 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
700 set_page_dirty(page);
701
702 mark_page_accessed(page);
703 }
704}
705
665/* prepare the sg table with the user pages */ 706/* prepare the sg table with the user pages */
666static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) 707static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
667{ 708{
@@ -699,7 +740,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
699{ 740{
700 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 741 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
701 struct amdgpu_ttm_tt *gtt = (void *)ttm; 742 struct amdgpu_ttm_tt *gtt = (void *)ttm;
702 struct sg_page_iter sg_iter;
703 743
704 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 744 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
705 enum dma_data_direction direction = write ? 745 enum dma_data_direction direction = write ?
@@ -712,47 +752,16 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
712 /* free the sg table and pages again */ 752 /* free the sg table and pages again */
713 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 753 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
714 754
715 for_each_sg_page(ttm->sg->sgl, &sg_iter, ttm->sg->nents, 0) { 755 amdgpu_ttm_tt_mark_user_pages(ttm);
716 struct page *page = sg_page_iter_page(&sg_iter);
717 if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
718 set_page_dirty(page);
719
720 mark_page_accessed(page);
721 put_page(page);
722 }
723 756
724 sg_free_table(ttm->sg); 757 sg_free_table(ttm->sg);
725} 758}
726 759
727static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
728{
729 struct amdgpu_ttm_tt *gtt = (void *)ttm;
730 uint64_t flags;
731 int r;
732
733 spin_lock(&gtt->adev->gtt_list_lock);
734 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem);
735 gtt->offset = (u64)mem->start << PAGE_SHIFT;
736 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
737 ttm->pages, gtt->ttm.dma_address, flags);
738
739 if (r) {
740 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
741 ttm->num_pages, gtt->offset);
742 goto error_gart_bind;
743 }
744
745 list_add_tail(&gtt->list, &gtt->adev->gtt_list);
746error_gart_bind:
747 spin_unlock(&gtt->adev->gtt_list_lock);
748 return r;
749
750}
751
752static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, 760static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
753 struct ttm_mem_reg *bo_mem) 761 struct ttm_mem_reg *bo_mem)
754{ 762{
755 struct amdgpu_ttm_tt *gtt = (void*)ttm; 763 struct amdgpu_ttm_tt *gtt = (void*)ttm;
764 uint64_t flags;
756 int r = 0; 765 int r = 0;
757 766
758 if (gtt->userptr) { 767 if (gtt->userptr) {
@@ -772,9 +781,24 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
772 bo_mem->mem_type == AMDGPU_PL_OA) 781 bo_mem->mem_type == AMDGPU_PL_OA)
773 return -EINVAL; 782 return -EINVAL;
774 783
775 if (amdgpu_gtt_mgr_is_allocated(bo_mem)) 784 if (!amdgpu_gtt_mgr_is_allocated(bo_mem))
776 r = amdgpu_ttm_do_bind(ttm, bo_mem); 785 return 0;
786
787 spin_lock(&gtt->adev->gtt_list_lock);
788 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
789 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
790 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
791 ttm->pages, gtt->ttm.dma_address, flags);
777 792
793 if (r) {
794 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
795 ttm->num_pages, gtt->offset);
796 goto error_gart_bind;
797 }
798
799 list_add_tail(&gtt->list, &gtt->adev->gtt_list);
800error_gart_bind:
801 spin_unlock(&gtt->adev->gtt_list_lock);
778 return r; 802 return r;
779} 803}
780 804
@@ -787,20 +811,38 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm)
787 811
788int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) 812int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
789{ 813{
814 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
790 struct ttm_tt *ttm = bo->ttm; 815 struct ttm_tt *ttm = bo->ttm;
816 struct ttm_mem_reg tmp;
817 struct ttm_placement placement;
818 struct ttm_place placements;
791 int r; 819 int r;
792 820
793 if (!ttm || amdgpu_ttm_is_bound(ttm)) 821 if (!ttm || amdgpu_ttm_is_bound(ttm))
794 return 0; 822 return 0;
795 823
796 r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, 824 tmp = bo->mem;
797 NULL, bo_mem); 825 tmp.mm_node = NULL;
798 if (r) { 826 placement.num_placement = 1;
799 DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); 827 placement.placement = &placements;
828 placement.num_busy_placement = 1;
829 placement.busy_placement = &placements;
830 placements.fpfn = 0;
831 placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
832 placements.flags = bo->mem.placement | TTM_PL_FLAG_TT;
833
834 r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
835 if (unlikely(r))
800 return r; 836 return r;
801 }
802 837
803 return amdgpu_ttm_do_bind(ttm, bo_mem); 838 r = ttm_bo_move_ttm(bo, true, false, &tmp);
839 if (unlikely(r))
840 ttm_bo_mem_put(bo, &tmp);
841 else
842 bo->offset = (bo->mem.start << PAGE_SHIFT) +
843 bo->bdev->man[bo->mem.mem_type].gpu_offset;
844
845 return r;
804} 846}
805 847
806int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) 848int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
@@ -892,10 +934,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
892 934
893static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) 935static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
894{ 936{
895 struct amdgpu_device *adev; 937 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
896 struct amdgpu_ttm_tt *gtt = (void *)ttm; 938 struct amdgpu_ttm_tt *gtt = (void *)ttm;
897 unsigned i;
898 int r;
899 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); 939 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
900 940
901 if (ttm->state != tt_unpopulated) 941 if (ttm->state != tt_unpopulated)
@@ -918,44 +958,23 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
918 return 0; 958 return 0;
919 } 959 }
920 960
921 adev = amdgpu_ttm_adev(ttm->bdev);
922
923#ifdef CONFIG_SWIOTLB 961#ifdef CONFIG_SWIOTLB
924 if (swiotlb_nr_tbl()) { 962 if (swiotlb_nr_tbl()) {
925 return ttm_dma_populate(&gtt->ttm, adev->dev); 963 return ttm_dma_populate(&gtt->ttm, adev->dev);
926 } 964 }
927#endif 965#endif
928 966
929 r = ttm_pool_populate(ttm); 967 return ttm_populate_and_map_pages(adev->dev, &gtt->ttm);
930 if (r) {
931 return r;
932 }
933
934 for (i = 0; i < ttm->num_pages; i++) {
935 gtt->ttm.dma_address[i] = pci_map_page(adev->pdev, ttm->pages[i],
936 0, PAGE_SIZE,
937 PCI_DMA_BIDIRECTIONAL);
938 if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) {
939 while (i--) {
940 pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
941 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
942 gtt->ttm.dma_address[i] = 0;
943 }
944 ttm_pool_unpopulate(ttm);
945 return -EFAULT;
946 }
947 }
948 return 0;
949} 968}
950 969
951static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) 970static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
952{ 971{
953 struct amdgpu_device *adev; 972 struct amdgpu_device *adev;
954 struct amdgpu_ttm_tt *gtt = (void *)ttm; 973 struct amdgpu_ttm_tt *gtt = (void *)ttm;
955 unsigned i;
956 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); 974 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
957 975
958 if (gtt && gtt->userptr) { 976 if (gtt && gtt->userptr) {
977 amdgpu_ttm_tt_set_user_pages(ttm, NULL);
959 kfree(ttm->sg); 978 kfree(ttm->sg);
960 ttm->page_flags &= ~TTM_PAGE_FLAG_SG; 979 ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
961 return; 980 return;
@@ -973,14 +992,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
973 } 992 }
974#endif 993#endif
975 994
976 for (i = 0; i < ttm->num_pages; i++) { 995 ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
977 if (gtt->ttm.dma_address[i]) {
978 pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
979 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
980 }
981 }
982
983 ttm_pool_unpopulate(ttm);
984} 996}
985 997
986int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 998int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
@@ -997,6 +1009,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
997 spin_lock_init(&gtt->guptasklock); 1009 spin_lock_init(&gtt->guptasklock);
998 INIT_LIST_HEAD(&gtt->guptasks); 1010 INIT_LIST_HEAD(&gtt->guptasks);
999 atomic_set(&gtt->mmu_invalidations, 0); 1011 atomic_set(&gtt->mmu_invalidations, 0);
1012 gtt->last_set_pages = 0;
1000 1013
1001 return 0; 1014 return 0;
1002} 1015}
@@ -1049,6 +1062,16 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
1049 return prev_invalidated != *last_invalidated; 1062 return prev_invalidated != *last_invalidated;
1050} 1063}
1051 1064
1065bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
1066{
1067 struct amdgpu_ttm_tt *gtt = (void *)ttm;
1068
1069 if (gtt == NULL || !gtt->userptr)
1070 return false;
1071
1072 return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
1073}
1074
1052bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 1075bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
1053{ 1076{
1054 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1077 struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1148,14 +1171,14 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1148 } 1171 }
1149 1172
1150 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 1173 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1151 WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); 1174 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
1152 WREG32(mmMM_INDEX_HI, aligned_pos >> 31); 1175 WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1153 if (!write || mask != 0xffffffff) 1176 if (!write || mask != 0xffffffff)
1154 value = RREG32(mmMM_DATA); 1177 value = RREG32_NO_KIQ(mmMM_DATA);
1155 if (write) { 1178 if (write) {
1156 value &= ~mask; 1179 value &= ~mask;
1157 value |= (*(uint32_t *)buf << shift) & mask; 1180 value |= (*(uint32_t *)buf << shift) & mask;
1158 WREG32(mmMM_DATA, value); 1181 WREG32_NO_KIQ(mmMM_DATA, value);
1159 } 1182 }
1160 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 1183 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1161 if (!write) { 1184 if (!write) {
@@ -1503,8 +1526,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1503 struct dma_fence **fence) 1526 struct dma_fence **fence)
1504{ 1527{
1505 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 1528 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1506 /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ 1529 uint32_t max_bytes = 8 *
1507 uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; 1530 adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
1508 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 1531 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1509 1532
1510 struct drm_mm_node *mm_node; 1533 struct drm_mm_node *mm_node;
@@ -1536,8 +1559,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1536 ++mm_node; 1559 ++mm_node;
1537 } 1560 }
1538 1561
1539 /* 10 double words for each SDMA_OP_PTEPDE cmd */ 1562 /* num of dwords for each SDMA_OP_PTEPDE cmd */
1540 num_dw = num_loops * 10; 1563 num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1541 1564
1542 /* for IB padding */ 1565 /* for IB padding */
1543 num_dw += 64; 1566 num_dw += 64;
@@ -1597,32 +1620,16 @@ error_free:
1597 1620
1598#if defined(CONFIG_DEBUG_FS) 1621#if defined(CONFIG_DEBUG_FS)
1599 1622
1600extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager
1601 *man);
1602static int amdgpu_mm_dump_table(struct seq_file *m, void *data) 1623static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1603{ 1624{
1604 struct drm_info_node *node = (struct drm_info_node *)m->private; 1625 struct drm_info_node *node = (struct drm_info_node *)m->private;
1605 unsigned ttm_pl = *(int *)node->info_ent->data; 1626 unsigned ttm_pl = *(int *)node->info_ent->data;
1606 struct drm_device *dev = node->minor->dev; 1627 struct drm_device *dev = node->minor->dev;
1607 struct amdgpu_device *adev = dev->dev_private; 1628 struct amdgpu_device *adev = dev->dev_private;
1608 struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; 1629 struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
1609 struct ttm_bo_global *glob = adev->mman.bdev.glob;
1610 struct drm_printer p = drm_seq_file_printer(m); 1630 struct drm_printer p = drm_seq_file_printer(m);
1611 1631
1612 spin_lock(&glob->lru_lock); 1632 man->func->debug(man, &p);
1613 drm_mm_print(mm, &p);
1614 spin_unlock(&glob->lru_lock);
1615 switch (ttm_pl) {
1616 case TTM_PL_VRAM:
1617 seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
1618 adev->mman.bdev.man[ttm_pl].size,
1619 (u64)atomic64_read(&adev->vram_usage) >> 20,
1620 (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
1621 break;
1622 case TTM_PL_TT:
1623 amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]);
1624 break;
1625 }
1626 return 0; 1633 return 0;
1627} 1634}
1628 1635
@@ -1659,9 +1666,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1659 return result; 1666 return result;
1660 1667
1661 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 1668 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1662 WREG32(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); 1669 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1663 WREG32(mmMM_INDEX_HI, *pos >> 31); 1670 WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1664 value = RREG32(mmMM_DATA); 1671 value = RREG32_NO_KIQ(mmMM_DATA);
1665 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 1672 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1666 1673
1667 r = put_user(value, (uint32_t *)buf); 1674 r = put_user(value, (uint32_t *)buf);
@@ -1677,10 +1684,50 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1677 return result; 1684 return result;
1678} 1685}
1679 1686
1687static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1688 size_t size, loff_t *pos)
1689{
1690 struct amdgpu_device *adev = file_inode(f)->i_private;
1691 ssize_t result = 0;
1692 int r;
1693
1694 if (size & 0x3 || *pos & 0x3)
1695 return -EINVAL;
1696
1697 if (*pos >= adev->mc.mc_vram_size)
1698 return -ENXIO;
1699
1700 while (size) {
1701 unsigned long flags;
1702 uint32_t value;
1703
1704 if (*pos >= adev->mc.mc_vram_size)
1705 return result;
1706
1707 r = get_user(value, (uint32_t *)buf);
1708 if (r)
1709 return r;
1710
1711 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
1712 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
1713 WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
1714 WREG32_NO_KIQ(mmMM_DATA, value);
1715 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
1716
1717 result += 4;
1718 buf += 4;
1719 *pos += 4;
1720 size -= 4;
1721 }
1722
1723 return result;
1724}
1725
1680static const struct file_operations amdgpu_ttm_vram_fops = { 1726static const struct file_operations amdgpu_ttm_vram_fops = {
1681 .owner = THIS_MODULE, 1727 .owner = THIS_MODULE,
1682 .read = amdgpu_ttm_vram_read, 1728 .read = amdgpu_ttm_vram_read,
1683 .llseek = default_llseek 1729 .write = amdgpu_ttm_vram_write,
1730 .llseek = default_llseek,
1684}; 1731};
1685 1732
1686#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 1733#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
@@ -1732,6 +1779,53 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
1732 1779
1733#endif 1780#endif
1734 1781
1782static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
1783 size_t size, loff_t *pos)
1784{
1785 struct amdgpu_device *adev = file_inode(f)->i_private;
1786 int r;
1787 uint64_t phys;
1788 struct iommu_domain *dom;
1789
1790 // always return 8 bytes
1791 if (size != 8)
1792 return -EINVAL;
1793
1794 // only accept page addresses
1795 if (*pos & 0xFFF)
1796 return -EINVAL;
1797
1798 dom = iommu_get_domain_for_dev(adev->dev);
1799 if (dom)
1800 phys = iommu_iova_to_phys(dom, *pos);
1801 else
1802 phys = *pos;
1803
1804 r = copy_to_user(buf, &phys, 8);
1805 if (r)
1806 return -EFAULT;
1807
1808 return 8;
1809}
1810
1811static const struct file_operations amdgpu_ttm_iova_fops = {
1812 .owner = THIS_MODULE,
1813 .read = amdgpu_iova_to_phys_read,
1814 .llseek = default_llseek
1815};
1816
1817static const struct {
1818 char *name;
1819 const struct file_operations *fops;
1820 int domain;
1821} ttm_debugfs_entries[] = {
1822 { "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM },
1823#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1824 { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
1825#endif
1826 { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
1827};
1828
1735#endif 1829#endif
1736 1830
1737static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) 1831static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
@@ -1742,22 +1836,21 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1742 struct drm_minor *minor = adev->ddev->primary; 1836 struct drm_minor *minor = adev->ddev->primary;
1743 struct dentry *ent, *root = minor->debugfs_root; 1837 struct dentry *ent, *root = minor->debugfs_root;
1744 1838
1745 ent = debugfs_create_file("amdgpu_vram", S_IFREG | S_IRUGO, root, 1839 for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) {
1746 adev, &amdgpu_ttm_vram_fops); 1840 ent = debugfs_create_file(
1747 if (IS_ERR(ent)) 1841 ttm_debugfs_entries[count].name,
1748 return PTR_ERR(ent); 1842 S_IFREG | S_IRUGO, root,
1749 i_size_write(ent->d_inode, adev->mc.mc_vram_size); 1843 adev,
1750 adev->mman.vram = ent; 1844 ttm_debugfs_entries[count].fops);
1751 1845 if (IS_ERR(ent))
1752#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 1846 return PTR_ERR(ent);
1753 ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root, 1847 if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
1754 adev, &amdgpu_ttm_gtt_fops); 1848 i_size_write(ent->d_inode, adev->mc.mc_vram_size);
1755 if (IS_ERR(ent)) 1849 else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
1756 return PTR_ERR(ent); 1850 i_size_write(ent->d_inode, adev->mc.gart_size);
1757 i_size_write(ent->d_inode, adev->mc.gart_size); 1851 adev->mman.debugfs_entries[count] = ent;
1758 adev->mman.gtt = ent; 1852 }
1759 1853
1760#endif
1761 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); 1854 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
1762 1855
1763#ifdef CONFIG_SWIOTLB 1856#ifdef CONFIG_SWIOTLB
@@ -1767,7 +1860,6 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1767 1860
1768 return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); 1861 return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
1769#else 1862#else
1770
1771 return 0; 1863 return 0;
1772#endif 1864#endif
1773} 1865}
@@ -1775,14 +1867,9 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
1775static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) 1867static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
1776{ 1868{
1777#if defined(CONFIG_DEBUG_FS) 1869#if defined(CONFIG_DEBUG_FS)
1870 unsigned i;
1778 1871
1779 debugfs_remove(adev->mman.vram); 1872 for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
1780 adev->mman.vram = NULL; 1873 debugfs_remove(adev->mman.debugfs_entries[i]);
1781
1782#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1783 debugfs_remove(adev->mman.gtt);
1784 adev->mman.gtt = NULL;
1785#endif
1786
1787#endif 1874#endif
1788} 1875}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 0e2399f32de7..7abae6867339 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -24,6 +24,7 @@
24#ifndef __AMDGPU_TTM_H__ 24#ifndef __AMDGPU_TTM_H__
25#define __AMDGPU_TTM_H__ 25#define __AMDGPU_TTM_H__
26 26
27#include "amdgpu.h"
27#include "gpu_scheduler.h" 28#include "gpu_scheduler.h"
28 29
29#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) 30#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
@@ -45,8 +46,7 @@ struct amdgpu_mman {
45 bool initialized; 46 bool initialized;
46 47
47#if defined(CONFIG_DEBUG_FS) 48#if defined(CONFIG_DEBUG_FS)
48 struct dentry *vram; 49 struct dentry *debugfs_entries[8];
49 struct dentry *gtt;
50#endif 50#endif
51 51
52 /* buffer handling */ 52 /* buffer handling */
@@ -62,10 +62,10 @@ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
62extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; 62extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
63 63
64bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); 64bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
65int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, 65uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
66 struct ttm_buffer_object *tbo, 66
67 const struct ttm_place *place, 67uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
68 struct ttm_mem_reg *mem); 68uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
69 69
70int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, 70int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
71 uint64_t dst_offset, uint32_t byte_count, 71 uint64_t dst_offset, uint32_t byte_count,
@@ -82,4 +82,20 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
82int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); 82int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem);
83int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); 83int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
84 84
85int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
86void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
87void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
88int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
89 uint32_t flags);
90bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
91struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
92bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
93 unsigned long end);
94bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
95 int *last_invalidated);
96bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
97bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
98uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
99 struct ttm_mem_reg *mem);
100
85#endif 101#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 36c763310df5..65649026b836 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -270,12 +270,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
270 else 270 else
271 return AMDGPU_FW_LOAD_SMU; 271 return AMDGPU_FW_LOAD_SMU;
272 case CHIP_VEGA10: 272 case CHIP_VEGA10:
273 if (!load_type)
274 return AMDGPU_FW_LOAD_DIRECT;
275 else
276 return AMDGPU_FW_LOAD_PSP;
277 case CHIP_RAVEN: 273 case CHIP_RAVEN:
278 if (load_type != 2) 274 if (!load_type)
279 return AMDGPU_FW_LOAD_DIRECT; 275 return AMDGPU_FW_LOAD_DIRECT;
280 else 276 else
281 return AMDGPU_FW_LOAD_PSP; 277 return AMDGPU_FW_LOAD_PSP;
@@ -364,8 +360,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
364int amdgpu_ucode_init_bo(struct amdgpu_device *adev) 360int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
365{ 361{
366 struct amdgpu_bo **bo = &adev->firmware.fw_buf; 362 struct amdgpu_bo **bo = &adev->firmware.fw_buf;
367 uint64_t fw_mc_addr;
368 void *fw_buf_ptr = NULL;
369 uint64_t fw_offset = 0; 363 uint64_t fw_offset = 0;
370 int i, err; 364 int i, err;
371 struct amdgpu_firmware_info *ucode = NULL; 365 struct amdgpu_firmware_info *ucode = NULL;
@@ -376,37 +370,39 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
376 return 0; 370 return 0;
377 } 371 }
378 372
379 err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, 373 if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) {
380 amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 374 err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
381 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 375 amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
382 NULL, NULL, 0, bo); 376 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
383 if (err) { 377 NULL, NULL, 0, bo);
384 dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); 378 if (err) {
385 goto failed; 379 dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
386 } 380 goto failed;
381 }
387 382
388 err = amdgpu_bo_reserve(*bo, false); 383 err = amdgpu_bo_reserve(*bo, false);
389 if (err) { 384 if (err) {
390 dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); 385 dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
391 goto failed_reserve; 386 goto failed_reserve;
392 } 387 }
393 388
394 err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, 389 err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
395 &fw_mc_addr); 390 &adev->firmware.fw_buf_mc);
396 if (err) { 391 if (err) {
397 dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); 392 dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
398 goto failed_pin; 393 goto failed_pin;
399 } 394 }
400 395
401 err = amdgpu_bo_kmap(*bo, &fw_buf_ptr); 396 err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr);
402 if (err) { 397 if (err) {
403 dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); 398 dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
404 goto failed_kmap; 399 goto failed_kmap;
405 } 400 }
406 401
407 amdgpu_bo_unreserve(*bo); 402 amdgpu_bo_unreserve(*bo);
403 }
408 404
409 memset(fw_buf_ptr, 0, adev->firmware.fw_size); 405 memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
410 406
411 /* 407 /*
412 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE 408 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
@@ -425,14 +421,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
425 ucode = &adev->firmware.ucode[i]; 421 ucode = &adev->firmware.ucode[i];
426 if (ucode->fw) { 422 if (ucode->fw) {
427 header = (const struct common_firmware_header *)ucode->fw->data; 423 header = (const struct common_firmware_header *)ucode->fw->data;
428 amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset, 424 amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
429 (void *)((uint8_t *)fw_buf_ptr + fw_offset)); 425 adev->firmware.fw_buf_ptr + fw_offset);
430 if (i == AMDGPU_UCODE_ID_CP_MEC1 && 426 if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
431 adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 427 adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
432 const struct gfx_firmware_header_v1_0 *cp_hdr; 428 const struct gfx_firmware_header_v1_0 *cp_hdr;
433 cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; 429 cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
434 amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset, 430 amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset,
435 fw_buf_ptr + fw_offset); 431 adev->firmware.fw_buf_ptr + fw_offset);
436 fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 432 fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
437 } 433 }
438 fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE); 434 fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index aefecf6c1e7b..e8bd50cf9785 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -269,6 +269,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
269 269
270int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) 270int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
271{ 271{
272 int i;
272 kfree(adev->uvd.saved_bo); 273 kfree(adev->uvd.saved_bo);
273 274
274 amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); 275 amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
@@ -279,6 +280,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
279 280
280 amdgpu_ring_fini(&adev->uvd.ring); 281 amdgpu_ring_fini(&adev->uvd.ring);
281 282
283 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
284 amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
285
282 release_firmware(adev->uvd.fw); 286 release_firmware(adev->uvd.fw);
283 287
284 return 0; 288 return 0;
@@ -410,10 +414,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
410 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); 414 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
411 int r = 0; 415 int r = 0;
412 416
413 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 417 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
414 if (mapping == NULL) { 418 if (r) {
415 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 419 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
416 return -EINVAL; 420 return r;
417 } 421 }
418 422
419 if (!ctx->parser->adev->uvd.address_64_bit) { 423 if (!ctx->parser->adev->uvd.address_64_bit) {
@@ -588,6 +592,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
588 } 592 }
589 break; 593 break;
590 594
595 case 8: /* MJPEG */
596 min_dpb_size = 0;
597 break;
598
591 case 16: /* H265 */ 599 case 16: /* H265 */
592 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; 600 image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
593 image_size = ALIGN(image_size, 256); 601 image_size = ALIGN(image_size, 256);
@@ -733,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
733 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx); 741 uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
734 int r; 742 int r;
735 743
736 mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo); 744 r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
737 if (mapping == NULL) { 745 if (r) {
738 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); 746 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
739 return -EINVAL; 747 return r;
740 } 748 }
741 749
742 start = amdgpu_bo_gpu_offset(bo); 750 start = amdgpu_bo_gpu_offset(bo);
@@ -913,10 +921,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
913 return -EINVAL; 921 return -EINVAL;
914 } 922 }
915 923
916 r = amdgpu_cs_sysvm_access_required(parser);
917 if (r)
918 return r;
919
920 ctx.parser = parser; 924 ctx.parser = parser;
921 ctx.buf_sizes = buf_sizes; 925 ctx.buf_sizes = buf_sizes;
922 ctx.ib_idx = ib_idx; 926 ctx.ib_idx = ib_idx;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index c855366521ab..b46280c1279f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -559,6 +559,7 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
559 struct amdgpu_bo_va_mapping *mapping; 559 struct amdgpu_bo_va_mapping *mapping;
560 struct amdgpu_bo *bo; 560 struct amdgpu_bo *bo;
561 uint64_t addr; 561 uint64_t addr;
562 int r;
562 563
563 if (index == 0xffffffff) 564 if (index == 0xffffffff)
564 index = 0; 565 index = 0;
@@ -567,11 +568,11 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
567 ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; 568 ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
568 addr += ((uint64_t)size) * ((uint64_t)index); 569 addr += ((uint64_t)size) * ((uint64_t)index);
569 570
570 mapping = amdgpu_cs_find_mapping(p, addr, &bo); 571 r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
571 if (mapping == NULL) { 572 if (r) {
572 DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", 573 DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
573 addr, lo, hi, size, index); 574 addr, lo, hi, size, index);
574 return -EINVAL; 575 return r;
575 } 576 }
576 577
577 if ((addr + (uint64_t)size) > 578 if ((addr + (uint64_t)size) >
@@ -652,10 +653,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
652 p->job->vm = NULL; 653 p->job->vm = NULL;
653 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); 654 ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
654 655
655 r = amdgpu_cs_sysvm_access_required(p);
656 if (r)
657 return r;
658
659 while (idx < ib->length_dw) { 656 while (idx < ib->length_dw) {
660 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); 657 uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
661 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); 658 uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8a081e162d13..ab05121b9272 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
46 * address within META_DATA init package to support SRIOV gfx preemption. 46 * address within META_DATA init package to support SRIOV gfx preemption.
47 */ 47 */
48 48
49int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) 49int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
50 struct amdgpu_bo_va **bo_va)
50{ 51{
51 int r;
52 struct amdgpu_bo_va *bo_va;
53 struct ww_acquire_ctx ticket; 52 struct ww_acquire_ctx ticket;
54 struct list_head list; 53 struct list_head list;
55 struct amdgpu_bo_list_entry pd; 54 struct amdgpu_bo_list_entry pd;
56 struct ttm_validate_buffer csa_tv; 55 struct ttm_validate_buffer csa_tv;
56 int r;
57 57
58 INIT_LIST_HEAD(&list); 58 INIT_LIST_HEAD(&list);
59 INIT_LIST_HEAD(&csa_tv.head); 59 INIT_LIST_HEAD(&csa_tv.head);
@@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
69 return r; 69 return r;
70 } 70 }
71 71
72 bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); 72 *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
73 if (!bo_va) { 73 if (!*bo_va) {
74 ttm_eu_backoff_reservation(&ticket, &list); 74 ttm_eu_backoff_reservation(&ticket, &list);
75 DRM_ERROR("failed to create bo_va for static CSA\n"); 75 DRM_ERROR("failed to create bo_va for static CSA\n");
76 return -ENOMEM; 76 return -ENOMEM;
77 } 77 }
78 78
79 r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, 79 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
80 AMDGPU_CSA_SIZE); 80 AMDGPU_CSA_SIZE);
81 if (r) { 81 if (r) {
82 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); 82 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
83 amdgpu_vm_bo_rmv(adev, bo_va); 83 amdgpu_vm_bo_rmv(adev, *bo_va);
84 ttm_eu_backoff_reservation(&ticket, &list); 84 ttm_eu_backoff_reservation(&ticket, &list);
85 return r; 85 return r;
86 } 86 }
87 87
88 r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, 88 r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
89 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 89 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
90 AMDGPU_PTE_EXECUTABLE); 90 AMDGPU_PTE_EXECUTABLE);
91 91
92 if (r) { 92 if (r) {
93 DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); 93 DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
94 amdgpu_vm_bo_rmv(adev, bo_va); 94 amdgpu_vm_bo_rmv(adev, *bo_va);
95 ttm_eu_backoff_reservation(&ticket, &list); 95 ttm_eu_backoff_reservation(&ticket, &list);
96 return r; 96 return r;
97 } 97 }
98 98
99 vm->csa_bo_va = bo_va;
100 ttm_eu_backoff_reservation(&ticket, &list); 99 ttm_eu_backoff_reservation(&ticket, &list);
101 return 0; 100 return 0;
102} 101}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index e5b1baf387c1..afcfb8bcfb65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -90,7 +90,8 @@ static inline bool is_virtual_machine(void)
90 90
91struct amdgpu_vm; 91struct amdgpu_vm;
92int amdgpu_allocate_static_csa(struct amdgpu_device *adev); 92int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
93int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); 93int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
94 struct amdgpu_bo_va **bo_va);
94void amdgpu_virt_init_setting(struct amdgpu_device *adev); 95void amdgpu_virt_init_setting(struct amdgpu_device *adev);
95uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); 96uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
96void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); 97void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ce36652029e..bbcc67038203 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -27,12 +27,59 @@
27 */ 27 */
28#include <linux/dma-fence-array.h> 28#include <linux/dma-fence-array.h>
29#include <linux/interval_tree_generic.h> 29#include <linux/interval_tree_generic.h>
30#include <linux/idr.h>
30#include <drm/drmP.h> 31#include <drm/drmP.h>
31#include <drm/amdgpu_drm.h> 32#include <drm/amdgpu_drm.h>
32#include "amdgpu.h" 33#include "amdgpu.h"
33#include "amdgpu_trace.h" 34#include "amdgpu_trace.h"
34 35
35/* 36/*
37 * PASID manager
38 *
39 * PASIDs are global address space identifiers that can be shared
40 * between the GPU, an IOMMU and the driver. VMs on different devices
41 * may use the same PASID if they share the same address
42 * space. Therefore PASIDs are allocated using a global IDA. VMs are
43 * looked up from the PASID per amdgpu_device.
44 */
45static DEFINE_IDA(amdgpu_vm_pasid_ida);
46
47/**
48 * amdgpu_vm_alloc_pasid - Allocate a PASID
49 * @bits: Maximum width of the PASID in bits, must be at least 1
50 *
51 * Allocates a PASID of the given width while keeping smaller PASIDs
52 * available if possible.
53 *
54 * Returns a positive integer on success. Returns %-EINVAL if bits==0.
55 * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
56 * memory allocation failure.
57 */
58int amdgpu_vm_alloc_pasid(unsigned int bits)
59{
60 int pasid = -EINVAL;
61
62 for (bits = min(bits, 31U); bits > 0; bits--) {
63 pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
64 1U << (bits - 1), 1U << bits,
65 GFP_KERNEL);
66 if (pasid != -ENOSPC)
67 break;
68 }
69
70 return pasid;
71}
72
73/**
74 * amdgpu_vm_free_pasid - Free a PASID
75 * @pasid: PASID to free
76 */
77void amdgpu_vm_free_pasid(unsigned int pasid)
78{
79 ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
80}
81
82/*
36 * GPUVM 83 * GPUVM
37 * GPUVM is similar to the legacy gart on older asics, however 84 * GPUVM is similar to the legacy gart on older asics, however
38 * rather than there being a single global gart table 85 * rather than there being a single global gart table
@@ -140,7 +187,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
140 struct list_head *validated, 187 struct list_head *validated,
141 struct amdgpu_bo_list_entry *entry) 188 struct amdgpu_bo_list_entry *entry)
142{ 189{
143 entry->robj = vm->root.bo; 190 entry->robj = vm->root.base.bo;
144 entry->priority = 0; 191 entry->priority = 0;
145 entry->tv.bo = &entry->robj->tbo; 192 entry->tv.bo = &entry->robj->tbo;
146 entry->tv.shared = true; 193 entry->tv.shared = true;
@@ -149,54 +196,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
149} 196}
150 197
151/** 198/**
152 * amdgpu_vm_validate_layer - validate a single page table level
153 *
154 * @parent: parent page table level
155 * @validate: callback to do the validation
156 * @param: parameter for the validation callback
157 *
158 * Validate the page table BOs on command submission if neccessary.
159 */
160static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
161 int (*validate)(void *, struct amdgpu_bo *),
162 void *param, bool use_cpu_for_update)
163{
164 unsigned i;
165 int r;
166
167 if (use_cpu_for_update) {
168 r = amdgpu_bo_kmap(parent->bo, NULL);
169 if (r)
170 return r;
171 }
172
173 if (!parent->entries)
174 return 0;
175
176 for (i = 0; i <= parent->last_entry_used; ++i) {
177 struct amdgpu_vm_pt *entry = &parent->entries[i];
178
179 if (!entry->bo)
180 continue;
181
182 r = validate(param, entry->bo);
183 if (r)
184 return r;
185
186 /*
187 * Recurse into the sub directory. This is harmless because we
188 * have only a maximum of 5 layers.
189 */
190 r = amdgpu_vm_validate_level(entry, validate, param,
191 use_cpu_for_update);
192 if (r)
193 return r;
194 }
195
196 return r;
197}
198
199/**
200 * amdgpu_vm_validate_pt_bos - validate the page table BOs 199 * amdgpu_vm_validate_pt_bos - validate the page table BOs
201 * 200 *
202 * @adev: amdgpu device pointer 201 * @adev: amdgpu device pointer
@@ -210,64 +209,70 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
210 int (*validate)(void *p, struct amdgpu_bo *bo), 209 int (*validate)(void *p, struct amdgpu_bo *bo),
211 void *param) 210 void *param)
212{ 211{
213 uint64_t num_evictions; 212 struct ttm_bo_global *glob = adev->mman.bdev.glob;
214 213 int r;
215 /* We only need to validate the page tables
216 * if they aren't already valid.
217 */
218 num_evictions = atomic64_read(&adev->num_evictions);
219 if (num_evictions == vm->last_eviction_counter)
220 return 0;
221 214
222 return amdgpu_vm_validate_level(&vm->root, validate, param, 215 spin_lock(&vm->status_lock);
223 vm->use_cpu_for_update); 216 while (!list_empty(&vm->evicted)) {
224} 217 struct amdgpu_vm_bo_base *bo_base;
218 struct amdgpu_bo *bo;
225 219
226/** 220 bo_base = list_first_entry(&vm->evicted,
227 * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail 221 struct amdgpu_vm_bo_base,
228 * 222 vm_status);
229 * @adev: amdgpu device instance 223 spin_unlock(&vm->status_lock);
230 * @vm: vm providing the BOs
231 *
232 * Move the PT BOs to the tail of the LRU.
233 */
234static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent)
235{
236 unsigned i;
237 224
238 if (!parent->entries) 225 bo = bo_base->bo;
239 return; 226 BUG_ON(!bo);
227 if (bo->parent) {
228 r = validate(param, bo);
229 if (r)
230 return r;
240 231
241 for (i = 0; i <= parent->last_entry_used; ++i) { 232 spin_lock(&glob->lru_lock);
242 struct amdgpu_vm_pt *entry = &parent->entries[i]; 233 ttm_bo_move_to_lru_tail(&bo->tbo);
234 if (bo->shadow)
235 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
236 spin_unlock(&glob->lru_lock);
237 }
243 238
244 if (!entry->bo) 239 if (bo->tbo.type == ttm_bo_type_kernel &&
245 continue; 240 vm->use_cpu_for_update) {
241 r = amdgpu_bo_kmap(bo, NULL);
242 if (r)
243 return r;
244 }
246 245
247 ttm_bo_move_to_lru_tail(&entry->bo->tbo); 246 spin_lock(&vm->status_lock);
248 amdgpu_vm_move_level_in_lru(entry); 247 if (bo->tbo.type != ttm_bo_type_kernel)
248 list_move(&bo_base->vm_status, &vm->moved);
249 else
250 list_move(&bo_base->vm_status, &vm->relocated);
249 } 251 }
252 spin_unlock(&vm->status_lock);
253
254 return 0;
250} 255}
251 256
252/** 257/**
253 * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail 258 * amdgpu_vm_ready - check VM is ready for updates
254 * 259 *
255 * @adev: amdgpu device instance 260 * @vm: VM to check
256 * @vm: vm providing the BOs
257 * 261 *
258 * Move the PT BOs to the tail of the LRU. 262 * Check if all VM PDs/PTs are ready for updates
259 */ 263 */
260void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, 264bool amdgpu_vm_ready(struct amdgpu_vm *vm)
261 struct amdgpu_vm *vm)
262{ 265{
263 struct ttm_bo_global *glob = adev->mman.bdev.glob; 266 bool ready;
264 267
265 spin_lock(&glob->lru_lock); 268 spin_lock(&vm->status_lock);
266 amdgpu_vm_move_level_in_lru(&vm->root); 269 ready = list_empty(&vm->evicted);
267 spin_unlock(&glob->lru_lock); 270 spin_unlock(&vm->status_lock);
271
272 return ready;
268} 273}
269 274
270 /** 275/**
271 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 276 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
272 * 277 *
273 * @adev: amdgpu_device pointer 278 * @adev: amdgpu_device pointer
@@ -330,11 +335,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
330 335
331 /* walk over the address space and allocate the page tables */ 336 /* walk over the address space and allocate the page tables */
332 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 337 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
333 struct reservation_object *resv = vm->root.bo->tbo.resv; 338 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
334 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 339 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
335 struct amdgpu_bo *pt; 340 struct amdgpu_bo *pt;
336 341
337 if (!entry->bo) { 342 if (!entry->base.bo) {
338 r = amdgpu_bo_create(adev, 343 r = amdgpu_bo_create(adev,
339 amdgpu_vm_bo_size(adev, level), 344 amdgpu_vm_bo_size(adev, level),
340 AMDGPU_GPU_PAGE_SIZE, true, 345 AMDGPU_GPU_PAGE_SIZE, true,
@@ -355,11 +360,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
355 /* Keep a reference to the root directory to avoid 360 /* Keep a reference to the root directory to avoid
356 * freeing them up in the wrong order. 361 * freeing them up in the wrong order.
357 */ 362 */
358 pt->parent = amdgpu_bo_ref(vm->root.bo); 363 pt->parent = amdgpu_bo_ref(parent->base.bo);
359 364
360 entry->bo = pt; 365 entry->base.vm = vm;
366 entry->base.bo = pt;
367 list_add_tail(&entry->base.bo_list, &pt->va);
368 spin_lock(&vm->status_lock);
369 list_add(&entry->base.vm_status, &vm->relocated);
370 spin_unlock(&vm->status_lock);
361 entry->addr = 0; 371 entry->addr = 0;
362 entry->huge_page = false;
363 } 372 }
364 373
365 if (level < adev->vm_manager.num_level) { 374 if (level < adev->vm_manager.num_level) {
@@ -899,8 +908,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
899{ 908{
900 struct amdgpu_bo_va *bo_va; 909 struct amdgpu_bo_va *bo_va;
901 910
902 list_for_each_entry(bo_va, &bo->va, bo_list) { 911 list_for_each_entry(bo_va, &bo->va, base.bo_list) {
903 if (bo_va->vm == vm) { 912 if (bo_va->base.vm == vm) {
904 return bo_va; 913 return bo_va;
905 } 914 }
906 } 915 }
@@ -1025,7 +1034,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1025 int r; 1034 int r;
1026 1035
1027 amdgpu_sync_create(&sync); 1036 amdgpu_sync_create(&sync);
1028 amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); 1037 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
1029 r = amdgpu_sync_wait(&sync, true); 1038 r = amdgpu_sync_wait(&sync, true);
1030 amdgpu_sync_free(&sync); 1039 amdgpu_sync_free(&sync);
1031 1040
@@ -1044,18 +1053,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1044 */ 1053 */
1045static int amdgpu_vm_update_level(struct amdgpu_device *adev, 1054static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1046 struct amdgpu_vm *vm, 1055 struct amdgpu_vm *vm,
1047 struct amdgpu_vm_pt *parent, 1056 struct amdgpu_vm_pt *parent)
1048 unsigned level)
1049{ 1057{
1050 struct amdgpu_bo *shadow; 1058 struct amdgpu_bo *shadow;
1051 struct amdgpu_ring *ring = NULL; 1059 struct amdgpu_ring *ring = NULL;
1052 uint64_t pd_addr, shadow_addr = 0; 1060 uint64_t pd_addr, shadow_addr = 0;
1053 uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
1054 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; 1061 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
1055 unsigned count = 0, pt_idx, ndw = 0; 1062 unsigned count = 0, pt_idx, ndw = 0;
1056 struct amdgpu_job *job; 1063 struct amdgpu_job *job;
1057 struct amdgpu_pte_update_params params; 1064 struct amdgpu_pte_update_params params;
1058 struct dma_fence *fence = NULL; 1065 struct dma_fence *fence = NULL;
1066 uint32_t incr;
1059 1067
1060 int r; 1068 int r;
1061 1069
@@ -1064,21 +1072,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1064 1072
1065 memset(&params, 0, sizeof(params)); 1073 memset(&params, 0, sizeof(params));
1066 params.adev = adev; 1074 params.adev = adev;
1067 shadow = parent->bo->shadow; 1075 shadow = parent->base.bo->shadow;
1068 1076
1069 if (vm->use_cpu_for_update) { 1077 if (vm->use_cpu_for_update) {
1070 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); 1078 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
1071 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 1079 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
1072 if (unlikely(r)) 1080 if (unlikely(r))
1073 return r; 1081 return r;
1074 1082
1075 params.func = amdgpu_vm_cpu_set_ptes; 1083 params.func = amdgpu_vm_cpu_set_ptes;
1076 } else { 1084 } else {
1077 if (shadow) {
1078 r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
1079 if (r)
1080 return r;
1081 }
1082 ring = container_of(vm->entity.sched, struct amdgpu_ring, 1085 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1083 sched); 1086 sched);
1084 1087
@@ -1088,7 +1091,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1088 /* assume the worst case */ 1091 /* assume the worst case */
1089 ndw += parent->last_entry_used * 6; 1092 ndw += parent->last_entry_used * 6;
1090 1093
1091 pd_addr = amdgpu_bo_gpu_offset(parent->bo); 1094 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
1092 1095
1093 if (shadow) { 1096 if (shadow) {
1094 shadow_addr = amdgpu_bo_gpu_offset(shadow); 1097 shadow_addr = amdgpu_bo_gpu_offset(shadow);
@@ -1108,30 +1111,28 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1108 1111
1109 /* walk over the address space and update the directory */ 1112 /* walk over the address space and update the directory */
1110 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1113 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1111 struct amdgpu_bo *bo = parent->entries[pt_idx].bo; 1114 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1115 struct amdgpu_bo *bo = entry->base.bo;
1112 uint64_t pde, pt; 1116 uint64_t pde, pt;
1113 1117
1114 if (bo == NULL) 1118 if (bo == NULL)
1115 continue; 1119 continue;
1116 1120
1117 if (bo->shadow) { 1121 spin_lock(&vm->status_lock);
1118 struct amdgpu_bo *pt_shadow = bo->shadow; 1122 list_del_init(&entry->base.vm_status);
1119 1123 spin_unlock(&vm->status_lock);
1120 r = amdgpu_ttm_bind(&pt_shadow->tbo,
1121 &pt_shadow->tbo.mem);
1122 if (r)
1123 return r;
1124 }
1125 1124
1126 pt = amdgpu_bo_gpu_offset(bo); 1125 pt = amdgpu_bo_gpu_offset(bo);
1127 pt = amdgpu_gart_get_vm_pde(adev, pt); 1126 pt = amdgpu_gart_get_vm_pde(adev, pt);
1128 if (parent->entries[pt_idx].addr == pt || 1127 /* Don't update huge pages here */
1129 parent->entries[pt_idx].huge_page) 1128 if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
1129 parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
1130 continue; 1130 continue;
1131 1131
1132 parent->entries[pt_idx].addr = pt; 1132 parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
1133 1133
1134 pde = pd_addr + pt_idx * 8; 1134 pde = pd_addr + pt_idx * 8;
1135 incr = amdgpu_bo_size(bo);
1135 if (((last_pde + 8 * count) != pde) || 1136 if (((last_pde + 8 * count) != pde) ||
1136 ((last_pt + incr * count) != pt) || 1137 ((last_pt + incr * count) != pt) ||
1137 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { 1138 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
@@ -1159,7 +1160,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1159 } 1160 }
1160 1161
1161 if (count) { 1162 if (count) {
1162 if (vm->root.bo->shadow) 1163 if (vm->root.base.bo->shadow)
1163 params.func(&params, last_shadow, last_pt, 1164 params.func(&params, last_shadow, last_pt,
1164 count, incr, AMDGPU_PTE_VALID); 1165 count, incr, AMDGPU_PTE_VALID);
1165 1166
@@ -1172,7 +1173,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1172 amdgpu_job_free(job); 1173 amdgpu_job_free(job);
1173 } else { 1174 } else {
1174 amdgpu_ring_pad_ib(ring, params.ib); 1175 amdgpu_ring_pad_ib(ring, params.ib);
1175 amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, 1176 amdgpu_sync_resv(adev, &job->sync,
1177 parent->base.bo->tbo.resv,
1176 AMDGPU_FENCE_OWNER_VM); 1178 AMDGPU_FENCE_OWNER_VM);
1177 if (shadow) 1179 if (shadow)
1178 amdgpu_sync_resv(adev, &job->sync, 1180 amdgpu_sync_resv(adev, &job->sync,
@@ -1185,26 +1187,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
1185 if (r) 1187 if (r)
1186 goto error_free; 1188 goto error_free;
1187 1189
1188 amdgpu_bo_fence(parent->bo, fence, true); 1190 amdgpu_bo_fence(parent->base.bo, fence, true);
1189 dma_fence_put(vm->last_dir_update); 1191 dma_fence_put(vm->last_update);
1190 vm->last_dir_update = dma_fence_get(fence); 1192 vm->last_update = fence;
1191 dma_fence_put(fence);
1192 } 1193 }
1193 } 1194 }
1194 /*
1195 * Recurse into the subdirectories. This recursion is harmless because
1196 * we only have a maximum of 5 layers.
1197 */
1198 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1199 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1200
1201 if (!entry->bo)
1202 continue;
1203
1204 r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
1205 if (r)
1206 return r;
1207 }
1208 1195
1209 return 0; 1196 return 0;
1210 1197
@@ -1220,7 +1207,8 @@ error_free:
1220 * 1207 *
1221 * Mark all PD level as invalid after an error. 1208 * Mark all PD level as invalid after an error.
1222 */ 1209 */
1223static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) 1210static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
1211 struct amdgpu_vm_pt *parent)
1224{ 1212{
1225 unsigned pt_idx; 1213 unsigned pt_idx;
1226 1214
@@ -1231,11 +1219,15 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
1231 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1219 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
1232 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 1220 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
1233 1221
1234 if (!entry->bo) 1222 if (!entry->base.bo)
1235 continue; 1223 continue;
1236 1224
1237 entry->addr = ~0ULL; 1225 entry->addr = ~0ULL;
1238 amdgpu_vm_invalidate_level(entry); 1226 spin_lock(&vm->status_lock);
1227 if (list_empty(&entry->base.vm_status))
1228 list_add(&entry->base.vm_status, &vm->relocated);
1229 spin_unlock(&vm->status_lock);
1230 amdgpu_vm_invalidate_level(vm, entry);
1239 } 1231 }
1240} 1232}
1241 1233
@@ -1253,9 +1245,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
1253{ 1245{
1254 int r; 1246 int r;
1255 1247
1256 r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); 1248 spin_lock(&vm->status_lock);
1257 if (r) 1249 while (!list_empty(&vm->relocated)) {
1258 amdgpu_vm_invalidate_level(&vm->root); 1250 struct amdgpu_vm_bo_base *bo_base;
1251 struct amdgpu_bo *bo;
1252
1253 bo_base = list_first_entry(&vm->relocated,
1254 struct amdgpu_vm_bo_base,
1255 vm_status);
1256 spin_unlock(&vm->status_lock);
1257
1258 bo = bo_base->bo->parent;
1259 if (bo) {
1260 struct amdgpu_vm_bo_base *parent;
1261 struct amdgpu_vm_pt *pt;
1262
1263 parent = list_first_entry(&bo->va,
1264 struct amdgpu_vm_bo_base,
1265 bo_list);
1266 pt = container_of(parent, struct amdgpu_vm_pt, base);
1267
1268 r = amdgpu_vm_update_level(adev, vm, pt);
1269 if (r) {
1270 amdgpu_vm_invalidate_level(vm, &vm->root);
1271 return r;
1272 }
1273 spin_lock(&vm->status_lock);
1274 } else {
1275 spin_lock(&vm->status_lock);
1276 list_del_init(&bo_base->vm_status);
1277 }
1278 }
1279 spin_unlock(&vm->status_lock);
1259 1280
1260 if (vm->use_cpu_for_update) { 1281 if (vm->use_cpu_for_update) {
1261 /* Flush HDP */ 1282 /* Flush HDP */
@@ -1286,7 +1307,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1286 *entry = &p->vm->root; 1307 *entry = &p->vm->root;
1287 while ((*entry)->entries) { 1308 while ((*entry)->entries) {
1288 idx = addr >> (p->adev->vm_manager.block_size * level--); 1309 idx = addr >> (p->adev->vm_manager.block_size * level--);
1289 idx %= amdgpu_bo_size((*entry)->bo) / 8; 1310 idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
1290 *parent = *entry; 1311 *parent = *entry;
1291 *entry = &(*entry)->entries[idx]; 1312 *entry = &(*entry)->entries[idx];
1292 } 1313 }
@@ -1307,55 +1328,62 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1307 * 1328 *
1308 * Check if we can update the PD with a huge page. 1329 * Check if we can update the PD with a huge page.
1309 */ 1330 */
1310static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, 1331static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1311 struct amdgpu_vm_pt *entry, 1332 struct amdgpu_vm_pt *entry,
1312 struct amdgpu_vm_pt *parent, 1333 struct amdgpu_vm_pt *parent,
1313 unsigned nptes, uint64_t dst, 1334 unsigned nptes, uint64_t dst,
1314 uint64_t flags) 1335 uint64_t flags)
1315{ 1336{
1316 bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); 1337 bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
1317 uint64_t pd_addr, pde; 1338 uint64_t pd_addr, pde;
1318 int r;
1319 1339
1320 /* In the case of a mixed PT the PDE must point to it*/ 1340 /* In the case of a mixed PT the PDE must point to it*/
1321 if (p->adev->asic_type < CHIP_VEGA10 || 1341 if (p->adev->asic_type < CHIP_VEGA10 ||
1322 nptes != AMDGPU_VM_PTE_COUNT(p->adev) || 1342 nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
1323 p->func == amdgpu_vm_do_copy_ptes || 1343 p->src ||
1324 !(flags & AMDGPU_PTE_VALID)) { 1344 !(flags & AMDGPU_PTE_VALID)) {
1325 1345
1326 dst = amdgpu_bo_gpu_offset(entry->bo); 1346 dst = amdgpu_bo_gpu_offset(entry->base.bo);
1327 dst = amdgpu_gart_get_vm_pde(p->adev, dst); 1347 dst = amdgpu_gart_get_vm_pde(p->adev, dst);
1328 flags = AMDGPU_PTE_VALID; 1348 flags = AMDGPU_PTE_VALID;
1329 } else { 1349 } else {
1350 /* Set the huge page flag to stop scanning at this PDE */
1330 flags |= AMDGPU_PDE_PTE; 1351 flags |= AMDGPU_PDE_PTE;
1331 } 1352 }
1332 1353
1333 if (entry->addr == dst && 1354 if (entry->addr == (dst | flags))
1334 entry->huge_page == !!(flags & AMDGPU_PDE_PTE)) 1355 return;
1335 return 0;
1336 1356
1337 entry->addr = dst; 1357 entry->addr = (dst | flags);
1338 entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
1339 1358
1340 if (use_cpu_update) { 1359 if (use_cpu_update) {
1341 r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr); 1360 /* In case a huge page is replaced with a system
1342 if (r) 1361 * memory mapping, p->pages_addr != NULL and
1343 return r; 1362 * amdgpu_vm_cpu_set_ptes would try to translate dst
1363 * through amdgpu_vm_map_gart. But dst is already a
1364 * GPU address (of the page table). Disable
1365 * amdgpu_vm_map_gart temporarily.
1366 */
1367 dma_addr_t *tmp;
1368
1369 tmp = p->pages_addr;
1370 p->pages_addr = NULL;
1344 1371
1372 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
1345 pde = pd_addr + (entry - parent->entries) * 8; 1373 pde = pd_addr + (entry - parent->entries) * 8;
1346 amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); 1374 amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
1375
1376 p->pages_addr = tmp;
1347 } else { 1377 } else {
1348 if (parent->bo->shadow) { 1378 if (parent->base.bo->shadow) {
1349 pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); 1379 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
1350 pde = pd_addr + (entry - parent->entries) * 8; 1380 pde = pd_addr + (entry - parent->entries) * 8;
1351 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); 1381 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
1352 } 1382 }
1353 pd_addr = amdgpu_bo_gpu_offset(parent->bo); 1383 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
1354 pde = pd_addr + (entry - parent->entries) * 8; 1384 pde = pd_addr + (entry - parent->entries) * 8;
1355 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); 1385 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
1356 } 1386 }
1357
1358 return 0;
1359} 1387}
1360 1388
1361/** 1389/**
@@ -1382,7 +1410,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1382 struct amdgpu_bo *pt; 1410 struct amdgpu_bo *pt;
1383 unsigned nptes; 1411 unsigned nptes;
1384 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); 1412 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
1385 int r;
1386 1413
1387 /* walk over the address space and update the page tables */ 1414 /* walk over the address space and update the page tables */
1388 for (addr = start; addr < end; addr += nptes, 1415 for (addr = start; addr < end; addr += nptes,
@@ -1398,15 +1425,13 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1398 else 1425 else
1399 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1426 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1400 1427
1401 r = amdgpu_vm_handle_huge_pages(params, entry, parent, 1428 amdgpu_vm_handle_huge_pages(params, entry, parent,
1402 nptes, dst, flags); 1429 nptes, dst, flags);
1403 if (r) 1430 /* We don't need to update PTEs for huge pages */
1404 return r; 1431 if (entry->addr & AMDGPU_PDE_PTE)
1405
1406 if (entry->huge_page)
1407 continue; 1432 continue;
1408 1433
1409 pt = entry->bo; 1434 pt = entry->base.bo;
1410 if (use_cpu_update) { 1435 if (use_cpu_update) {
1411 pe_start = (unsigned long)amdgpu_bo_kptr(pt); 1436 pe_start = (unsigned long)amdgpu_bo_kptr(pt);
1412 } else { 1437 } else {
@@ -1442,8 +1467,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1442 uint64_t start, uint64_t end, 1467 uint64_t start, uint64_t end,
1443 uint64_t dst, uint64_t flags) 1468 uint64_t dst, uint64_t flags)
1444{ 1469{
1445 int r;
1446
1447 /** 1470 /**
1448 * The MC L1 TLB supports variable sized pages, based on a fragment 1471 * The MC L1 TLB supports variable sized pages, based on a fragment
1449 * field in the PTE. When this field is set to a non-zero value, page 1472 * field in the PTE. When this field is set to a non-zero value, page
@@ -1462,41 +1485,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1462 * Userspace can support this by aligning virtual base address and 1485 * Userspace can support this by aligning virtual base address and
1463 * allocation size to the fragment size. 1486 * allocation size to the fragment size.
1464 */ 1487 */
1465 1488 unsigned max_frag = params->adev->vm_manager.fragment_size;
1466 /* SI and newer are optimized for 64KB */ 1489 int r;
1467 unsigned pages_per_frag = AMDGPU_LOG2_PAGES_PER_FRAG(params->adev);
1468 uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
1469 uint64_t frag_align = 1 << pages_per_frag;
1470
1471 uint64_t frag_start = ALIGN(start, frag_align);
1472 uint64_t frag_end = end & ~(frag_align - 1);
1473 1490
1474 /* system pages are non continuously */ 1491 /* system pages are non continuously */
1475 if (params->src || !(flags & AMDGPU_PTE_VALID) || 1492 if (params->src || !(flags & AMDGPU_PTE_VALID))
1476 (frag_start >= frag_end))
1477 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1493 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1478 1494
1479 /* handle the 4K area at the beginning */ 1495 while (start != end) {
1480 if (start != frag_start) { 1496 uint64_t frag_flags, frag_end;
1481 r = amdgpu_vm_update_ptes(params, start, frag_start, 1497 unsigned frag;
1482 dst, flags); 1498
1499 /* This intentionally wraps around if no bit is set */
1500 frag = min((unsigned)ffs(start) - 1,
1501 (unsigned)fls64(end - start) - 1);
1502 if (frag >= max_frag) {
1503 frag_flags = AMDGPU_PTE_FRAG(max_frag);
1504 frag_end = end & ~((1ULL << max_frag) - 1);
1505 } else {
1506 frag_flags = AMDGPU_PTE_FRAG(frag);
1507 frag_end = start + (1 << frag);
1508 }
1509
1510 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1511 flags | frag_flags);
1483 if (r) 1512 if (r)
1484 return r; 1513 return r;
1485 dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
1486 }
1487
1488 /* handle the area in the middle */
1489 r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
1490 flags | frag_flags);
1491 if (r)
1492 return r;
1493 1514
1494 /* handle the 4K area at the end */ 1515 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1495 if (frag_end != end) { 1516 start = frag_end;
1496 dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
1497 r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
1498 } 1517 }
1499 return r; 1518
1519 return 0;
1500} 1520}
1501 1521
1502/** 1522/**
@@ -1504,7 +1524,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1504 * 1524 *
1505 * @adev: amdgpu_device pointer 1525 * @adev: amdgpu_device pointer
1506 * @exclusive: fence we need to sync to 1526 * @exclusive: fence we need to sync to
1507 * @src: address where to copy page table entries from
1508 * @pages_addr: DMA addresses to use for mapping 1527 * @pages_addr: DMA addresses to use for mapping
1509 * @vm: requested vm 1528 * @vm: requested vm
1510 * @start: start of mapped range 1529 * @start: start of mapped range
@@ -1518,7 +1537,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
1518 */ 1537 */
1519static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1538static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1520 struct dma_fence *exclusive, 1539 struct dma_fence *exclusive,
1521 uint64_t src,
1522 dma_addr_t *pages_addr, 1540 dma_addr_t *pages_addr,
1523 struct amdgpu_vm *vm, 1541 struct amdgpu_vm *vm,
1524 uint64_t start, uint64_t last, 1542 uint64_t start, uint64_t last,
@@ -1536,7 +1554,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1536 memset(&params, 0, sizeof(params)); 1554 memset(&params, 0, sizeof(params));
1537 params.adev = adev; 1555 params.adev = adev;
1538 params.vm = vm; 1556 params.vm = vm;
1539 params.src = src;
1540 1557
1541 /* sync to everything on unmapping */ 1558 /* sync to everything on unmapping */
1542 if (!(flags & AMDGPU_PTE_VALID)) 1559 if (!(flags & AMDGPU_PTE_VALID))
@@ -1565,10 +1582,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1565 nptes = last - start + 1; 1582 nptes = last - start + 1;
1566 1583
1567 /* 1584 /*
1568 * reserve space for one command every (1 << BLOCK_SIZE) 1585 * reserve space for two commands every (1 << BLOCK_SIZE)
1569 * entries or 2k dwords (whatever is smaller) 1586 * entries or 2k dwords (whatever is smaller)
1587 *
1588 * The second command is for the shadow pagetables.
1570 */ 1589 */
1571 ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; 1590 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1572 1591
1573 /* padding, etc. */ 1592 /* padding, etc. */
1574 ndw = 64; 1593 ndw = 64;
@@ -1576,15 +1595,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1576 /* one PDE write for each huge page */ 1595 /* one PDE write for each huge page */
1577 ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; 1596 ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
1578 1597
1579 if (src) { 1598 if (pages_addr) {
1580 /* only copy commands needed */
1581 ndw += ncmds * 7;
1582
1583 params.func = amdgpu_vm_do_copy_ptes;
1584
1585 } else if (pages_addr) {
1586 /* copy commands needed */ 1599 /* copy commands needed */
1587 ndw += ncmds * 7; 1600 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1588 1601
1589 /* and also PTEs */ 1602 /* and also PTEs */
1590 ndw += nptes * 2; 1603 ndw += nptes * 2;
@@ -1593,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1593 1606
1594 } else { 1607 } else {
1595 /* set page commands needed */ 1608 /* set page commands needed */
1596 ndw += ncmds * 10; 1609 ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1597 1610
1598 /* two extra commands for begin/end of fragment */ 1611 /* extra commands for begin/end fragments */
1599 ndw += 2 * 10; 1612 ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
1613 * adev->vm_manager.fragment_size;
1600 1614
1601 params.func = amdgpu_vm_do_set_ptes; 1615 params.func = amdgpu_vm_do_set_ptes;
1602 } 1616 }
@@ -1607,7 +1621,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1607 1621
1608 params.ib = &job->ibs[0]; 1622 params.ib = &job->ibs[0];
1609 1623
1610 if (!src && pages_addr) { 1624 if (pages_addr) {
1611 uint64_t *pte; 1625 uint64_t *pte;
1612 unsigned i; 1626 unsigned i;
1613 1627
@@ -1628,12 +1642,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1628 if (r) 1642 if (r)
1629 goto error_free; 1643 goto error_free;
1630 1644
1631 r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, 1645 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1632 owner); 1646 owner);
1633 if (r) 1647 if (r)
1634 goto error_free; 1648 goto error_free;
1635 1649
1636 r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); 1650 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1637 if (r) 1651 if (r)
1638 goto error_free; 1652 goto error_free;
1639 1653
@@ -1648,14 +1662,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1648 if (r) 1662 if (r)
1649 goto error_free; 1663 goto error_free;
1650 1664
1651 amdgpu_bo_fence(vm->root.bo, f, true); 1665 amdgpu_bo_fence(vm->root.base.bo, f, true);
1652 dma_fence_put(*fence); 1666 dma_fence_put(*fence);
1653 *fence = f; 1667 *fence = f;
1654 return 0; 1668 return 0;
1655 1669
1656error_free: 1670error_free:
1657 amdgpu_job_free(job); 1671 amdgpu_job_free(job);
1658 amdgpu_vm_invalidate_level(&vm->root); 1672 amdgpu_vm_invalidate_level(vm, &vm->root);
1659 return r; 1673 return r;
1660} 1674}
1661 1675
@@ -1664,7 +1678,6 @@ error_free:
1664 * 1678 *
1665 * @adev: amdgpu_device pointer 1679 * @adev: amdgpu_device pointer
1666 * @exclusive: fence we need to sync to 1680 * @exclusive: fence we need to sync to
1667 * @gtt_flags: flags as they are used for GTT
1668 * @pages_addr: DMA addresses to use for mapping 1681 * @pages_addr: DMA addresses to use for mapping
1669 * @vm: requested vm 1682 * @vm: requested vm
1670 * @mapping: mapped range and flags to use for the update 1683 * @mapping: mapped range and flags to use for the update
@@ -1678,7 +1691,6 @@ error_free:
1678 */ 1691 */
1679static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1692static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1680 struct dma_fence *exclusive, 1693 struct dma_fence *exclusive,
1681 uint64_t gtt_flags,
1682 dma_addr_t *pages_addr, 1694 dma_addr_t *pages_addr,
1683 struct amdgpu_vm *vm, 1695 struct amdgpu_vm *vm,
1684 struct amdgpu_bo_va_mapping *mapping, 1696 struct amdgpu_bo_va_mapping *mapping,
@@ -1686,7 +1698,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1686 struct drm_mm_node *nodes, 1698 struct drm_mm_node *nodes,
1687 struct dma_fence **fence) 1699 struct dma_fence **fence)
1688{ 1700{
1689 uint64_t pfn, src = 0, start = mapping->start; 1701 uint64_t pfn, start = mapping->start;
1690 int r; 1702 int r;
1691 1703
1692 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1704 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1733,11 +1745,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1733 } 1745 }
1734 1746
1735 if (pages_addr) { 1747 if (pages_addr) {
1736 if (flags == gtt_flags) 1748 max_entries = min(max_entries, 16ull * 1024ull);
1737 src = adev->gart.table_addr +
1738 (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
1739 else
1740 max_entries = min(max_entries, 16ull * 1024ull);
1741 addr = 0; 1749 addr = 0;
1742 } else if (flags & AMDGPU_PTE_VALID) { 1750 } else if (flags & AMDGPU_PTE_VALID) {
1743 addr += adev->vm_manager.vram_base_offset; 1751 addr += adev->vm_manager.vram_base_offset;
@@ -1745,8 +1753,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1745 addr += pfn << PAGE_SHIFT; 1753 addr += pfn << PAGE_SHIFT;
1746 1754
1747 last = min((uint64_t)mapping->last, start + max_entries - 1); 1755 last = min((uint64_t)mapping->last, start + max_entries - 1);
1748 r = amdgpu_vm_bo_update_mapping(adev, exclusive, 1756 r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
1749 src, pages_addr, vm,
1750 start, last, flags, addr, 1757 start, last, flags, addr,
1751 fence); 1758 fence);
1752 if (r) 1759 if (r)
@@ -1778,75 +1785,75 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1778 struct amdgpu_bo_va *bo_va, 1785 struct amdgpu_bo_va *bo_va,
1779 bool clear) 1786 bool clear)
1780{ 1787{
1781 struct amdgpu_vm *vm = bo_va->vm; 1788 struct amdgpu_bo *bo = bo_va->base.bo;
1789 struct amdgpu_vm *vm = bo_va->base.vm;
1782 struct amdgpu_bo_va_mapping *mapping; 1790 struct amdgpu_bo_va_mapping *mapping;
1783 dma_addr_t *pages_addr = NULL; 1791 dma_addr_t *pages_addr = NULL;
1784 uint64_t gtt_flags, flags;
1785 struct ttm_mem_reg *mem; 1792 struct ttm_mem_reg *mem;
1786 struct drm_mm_node *nodes; 1793 struct drm_mm_node *nodes;
1787 struct dma_fence *exclusive; 1794 struct dma_fence *exclusive, **last_update;
1795 uint64_t flags;
1788 int r; 1796 int r;
1789 1797
1790 if (clear || !bo_va->bo) { 1798 if (clear || !bo_va->base.bo) {
1791 mem = NULL; 1799 mem = NULL;
1792 nodes = NULL; 1800 nodes = NULL;
1793 exclusive = NULL; 1801 exclusive = NULL;
1794 } else { 1802 } else {
1795 struct ttm_dma_tt *ttm; 1803 struct ttm_dma_tt *ttm;
1796 1804
1797 mem = &bo_va->bo->tbo.mem; 1805 mem = &bo_va->base.bo->tbo.mem;
1798 nodes = mem->mm_node; 1806 nodes = mem->mm_node;
1799 if (mem->mem_type == TTM_PL_TT) { 1807 if (mem->mem_type == TTM_PL_TT) {
1800 ttm = container_of(bo_va->bo->tbo.ttm, struct 1808 ttm = container_of(bo_va->base.bo->tbo.ttm,
1801 ttm_dma_tt, ttm); 1809 struct ttm_dma_tt, ttm);
1802 pages_addr = ttm->dma_address; 1810 pages_addr = ttm->dma_address;
1803 } 1811 }
1804 exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); 1812 exclusive = reservation_object_get_excl(bo->tbo.resv);
1805 } 1813 }
1806 1814
1807 if (bo_va->bo) { 1815 if (bo)
1808 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); 1816 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1809 gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && 1817 else
1810 adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ?
1811 flags : 0;
1812 } else {
1813 flags = 0x0; 1818 flags = 0x0;
1814 gtt_flags = ~0x0;
1815 }
1816 1819
1817 spin_lock(&vm->status_lock); 1820 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1818 if (!list_empty(&bo_va->vm_status)) 1821 last_update = &vm->last_update;
1822 else
1823 last_update = &bo_va->last_pt_update;
1824
1825 if (!clear && bo_va->base.moved) {
1826 bo_va->base.moved = false;
1819 list_splice_init(&bo_va->valids, &bo_va->invalids); 1827 list_splice_init(&bo_va->valids, &bo_va->invalids);
1820 spin_unlock(&vm->status_lock); 1828
1829 } else if (bo_va->cleared != clear) {
1830 list_splice_init(&bo_va->valids, &bo_va->invalids);
1831 }
1821 1832
1822 list_for_each_entry(mapping, &bo_va->invalids, list) { 1833 list_for_each_entry(mapping, &bo_va->invalids, list) {
1823 r = amdgpu_vm_bo_split_mapping(adev, exclusive, 1834 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1824 gtt_flags, pages_addr, vm,
1825 mapping, flags, nodes, 1835 mapping, flags, nodes,
1826 &bo_va->last_pt_update); 1836 last_update);
1827 if (r) 1837 if (r)
1828 return r; 1838 return r;
1829 } 1839 }
1830 1840
1831 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1841 if (vm->use_cpu_for_update) {
1832 list_for_each_entry(mapping, &bo_va->valids, list) 1842 /* Flush HDP */
1833 trace_amdgpu_vm_bo_mapping(mapping); 1843 mb();
1834 1844 amdgpu_gart_flush_gpu_tlb(adev, 0);
1835 list_for_each_entry(mapping, &bo_va->invalids, list)
1836 trace_amdgpu_vm_bo_mapping(mapping);
1837 } 1845 }
1838 1846
1839 spin_lock(&vm->status_lock); 1847 spin_lock(&vm->status_lock);
1840 list_splice_init(&bo_va->invalids, &bo_va->valids); 1848 list_del_init(&bo_va->base.vm_status);
1841 list_del_init(&bo_va->vm_status);
1842 if (clear)
1843 list_add(&bo_va->vm_status, &vm->cleared);
1844 spin_unlock(&vm->status_lock); 1849 spin_unlock(&vm->status_lock);
1845 1850
1846 if (vm->use_cpu_for_update) { 1851 list_splice_init(&bo_va->invalids, &bo_va->valids);
1847 /* Flush HDP */ 1852 bo_va->cleared = clear;
1848 mb(); 1853
1849 amdgpu_gart_flush_gpu_tlb(adev, 0); 1854 if (trace_amdgpu_vm_bo_mapping_enabled()) {
1855 list_for_each_entry(mapping, &bo_va->valids, list)
1856 trace_amdgpu_vm_bo_mapping(mapping);
1850 } 1857 }
1851 1858
1852 return 0; 1859 return 0;
@@ -1954,7 +1961,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1954 */ 1961 */
1955static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1962static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1956{ 1963{
1957 struct reservation_object *resv = vm->root.bo->tbo.resv; 1964 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1958 struct dma_fence *excl, **shared; 1965 struct dma_fence *excl, **shared;
1959 unsigned i, shared_count; 1966 unsigned i, shared_count;
1960 int r; 1967 int r;
@@ -2012,7 +2019,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
2012 if (vm->pte_support_ats) 2019 if (vm->pte_support_ats)
2013 init_pte_value = AMDGPU_PTE_SYSTEM; 2020 init_pte_value = AMDGPU_PTE_SYSTEM;
2014 2021
2015 r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, 2022 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
2016 mapping->start, mapping->last, 2023 mapping->start, mapping->last,
2017 init_pte_value, 0, &f); 2024 init_pte_value, 0, &f);
2018 amdgpu_vm_free_mapping(adev, vm, mapping, f); 2025 amdgpu_vm_free_mapping(adev, vm, mapping, f);
@@ -2034,29 +2041,35 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
2034} 2041}
2035 2042
2036/** 2043/**
2037 * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT 2044 * amdgpu_vm_handle_moved - handle moved BOs in the PT
2038 * 2045 *
2039 * @adev: amdgpu_device pointer 2046 * @adev: amdgpu_device pointer
2040 * @vm: requested vm 2047 * @vm: requested vm
2048 * @sync: sync object to add fences to
2041 * 2049 *
2042 * Make sure all invalidated BOs are cleared in the PT. 2050 * Make sure all BOs which are moved are updated in the PTs.
2043 * Returns 0 for success. 2051 * Returns 0 for success.
2044 * 2052 *
2045 * PTs have to be reserved and mutex must be locked! 2053 * PTs have to be reserved!
2046 */ 2054 */
2047int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, 2055int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
2048 struct amdgpu_vm *vm, struct amdgpu_sync *sync) 2056 struct amdgpu_vm *vm)
2049{ 2057{
2050 struct amdgpu_bo_va *bo_va = NULL; 2058 bool clear;
2051 int r = 0; 2059 int r = 0;
2052 2060
2053 spin_lock(&vm->status_lock); 2061 spin_lock(&vm->status_lock);
2054 while (!list_empty(&vm->invalidated)) { 2062 while (!list_empty(&vm->moved)) {
2055 bo_va = list_first_entry(&vm->invalidated, 2063 struct amdgpu_bo_va *bo_va;
2056 struct amdgpu_bo_va, vm_status); 2064
2065 bo_va = list_first_entry(&vm->moved,
2066 struct amdgpu_bo_va, base.vm_status);
2057 spin_unlock(&vm->status_lock); 2067 spin_unlock(&vm->status_lock);
2058 2068
2059 r = amdgpu_vm_bo_update(adev, bo_va, true); 2069 /* Per VM BOs never need to bo cleared in the page tables */
2070 clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
2071
2072 r = amdgpu_vm_bo_update(adev, bo_va, clear);
2060 if (r) 2073 if (r)
2061 return r; 2074 return r;
2062 2075
@@ -2064,9 +2077,6 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
2064 } 2077 }
2065 spin_unlock(&vm->status_lock); 2078 spin_unlock(&vm->status_lock);
2066 2079
2067 if (bo_va)
2068 r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
2069
2070 return r; 2080 return r;
2071} 2081}
2072 2082
@@ -2093,20 +2103,54 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
2093 if (bo_va == NULL) { 2103 if (bo_va == NULL) {
2094 return NULL; 2104 return NULL;
2095 } 2105 }
2096 bo_va->vm = vm; 2106 bo_va->base.vm = vm;
2097 bo_va->bo = bo; 2107 bo_va->base.bo = bo;
2108 INIT_LIST_HEAD(&bo_va->base.bo_list);
2109 INIT_LIST_HEAD(&bo_va->base.vm_status);
2110
2098 bo_va->ref_count = 1; 2111 bo_va->ref_count = 1;
2099 INIT_LIST_HEAD(&bo_va->bo_list);
2100 INIT_LIST_HEAD(&bo_va->valids); 2112 INIT_LIST_HEAD(&bo_va->valids);
2101 INIT_LIST_HEAD(&bo_va->invalids); 2113 INIT_LIST_HEAD(&bo_va->invalids);
2102 INIT_LIST_HEAD(&bo_va->vm_status);
2103 2114
2104 if (bo) 2115 if (bo)
2105 list_add_tail(&bo_va->bo_list, &bo->va); 2116 list_add_tail(&bo_va->base.bo_list, &bo->va);
2106 2117
2107 return bo_va; 2118 return bo_va;
2108} 2119}
2109 2120
2121
2122/**
2123 * amdgpu_vm_bo_insert_mapping - insert a new mapping
2124 *
2125 * @adev: amdgpu_device pointer
2126 * @bo_va: bo_va to store the address
2127 * @mapping: the mapping to insert
2128 *
2129 * Insert a new mapping into all structures.
2130 */
2131static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
2132 struct amdgpu_bo_va *bo_va,
2133 struct amdgpu_bo_va_mapping *mapping)
2134{
2135 struct amdgpu_vm *vm = bo_va->base.vm;
2136 struct amdgpu_bo *bo = bo_va->base.bo;
2137
2138 mapping->bo_va = bo_va;
2139 list_add(&mapping->list, &bo_va->invalids);
2140 amdgpu_vm_it_insert(mapping, &vm->va);
2141
2142 if (mapping->flags & AMDGPU_PTE_PRT)
2143 amdgpu_vm_prt_get(adev);
2144
2145 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2146 spin_lock(&vm->status_lock);
2147 if (list_empty(&bo_va->base.vm_status))
2148 list_add(&bo_va->base.vm_status, &vm->moved);
2149 spin_unlock(&vm->status_lock);
2150 }
2151 trace_amdgpu_vm_bo_map(bo_va, mapping);
2152}
2153
2110/** 2154/**
2111 * amdgpu_vm_bo_map - map bo inside a vm 2155 * amdgpu_vm_bo_map - map bo inside a vm
2112 * 2156 *
@@ -2127,7 +2171,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2127 uint64_t size, uint64_t flags) 2171 uint64_t size, uint64_t flags)
2128{ 2172{
2129 struct amdgpu_bo_va_mapping *mapping, *tmp; 2173 struct amdgpu_bo_va_mapping *mapping, *tmp;
2130 struct amdgpu_vm *vm = bo_va->vm; 2174 struct amdgpu_bo *bo = bo_va->base.bo;
2175 struct amdgpu_vm *vm = bo_va->base.vm;
2131 uint64_t eaddr; 2176 uint64_t eaddr;
2132 2177
2133 /* validate the parameters */ 2178 /* validate the parameters */
@@ -2138,7 +2183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2138 /* make sure object fit at this offset */ 2183 /* make sure object fit at this offset */
2139 eaddr = saddr + size - 1; 2184 eaddr = saddr + size - 1;
2140 if (saddr >= eaddr || 2185 if (saddr >= eaddr ||
2141 (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) 2186 (bo && offset + size > amdgpu_bo_size(bo)))
2142 return -EINVAL; 2187 return -EINVAL;
2143 2188
2144 saddr /= AMDGPU_GPU_PAGE_SIZE; 2189 saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2148,7 +2193,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2148 if (tmp) { 2193 if (tmp) {
2149 /* bo and tmp overlap, invalid addr */ 2194 /* bo and tmp overlap, invalid addr */
2150 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 2195 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
2151 "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, 2196 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
2152 tmp->start, tmp->last + 1); 2197 tmp->start, tmp->last + 1);
2153 return -EINVAL; 2198 return -EINVAL;
2154 } 2199 }
@@ -2157,17 +2202,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2157 if (!mapping) 2202 if (!mapping)
2158 return -ENOMEM; 2203 return -ENOMEM;
2159 2204
2160 INIT_LIST_HEAD(&mapping->list);
2161 mapping->start = saddr; 2205 mapping->start = saddr;
2162 mapping->last = eaddr; 2206 mapping->last = eaddr;
2163 mapping->offset = offset; 2207 mapping->offset = offset;
2164 mapping->flags = flags; 2208 mapping->flags = flags;
2165 2209
2166 list_add(&mapping->list, &bo_va->invalids); 2210 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2167 amdgpu_vm_it_insert(mapping, &vm->va);
2168
2169 if (flags & AMDGPU_PTE_PRT)
2170 amdgpu_vm_prt_get(adev);
2171 2211
2172 return 0; 2212 return 0;
2173} 2213}
@@ -2193,7 +2233,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
2193 uint64_t size, uint64_t flags) 2233 uint64_t size, uint64_t flags)
2194{ 2234{
2195 struct amdgpu_bo_va_mapping *mapping; 2235 struct amdgpu_bo_va_mapping *mapping;
2196 struct amdgpu_vm *vm = bo_va->vm; 2236 struct amdgpu_bo *bo = bo_va->base.bo;
2197 uint64_t eaddr; 2237 uint64_t eaddr;
2198 int r; 2238 int r;
2199 2239
@@ -2205,7 +2245,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
2205 /* make sure object fit at this offset */ 2245 /* make sure object fit at this offset */
2206 eaddr = saddr + size - 1; 2246 eaddr = saddr + size - 1;
2207 if (saddr >= eaddr || 2247 if (saddr >= eaddr ||
2208 (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) 2248 (bo && offset + size > amdgpu_bo_size(bo)))
2209 return -EINVAL; 2249 return -EINVAL;
2210 2250
2211 /* Allocate all the needed memory */ 2251 /* Allocate all the needed memory */
@@ -2213,7 +2253,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
2213 if (!mapping) 2253 if (!mapping)
2214 return -ENOMEM; 2254 return -ENOMEM;
2215 2255
2216 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); 2256 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
2217 if (r) { 2257 if (r) {
2218 kfree(mapping); 2258 kfree(mapping);
2219 return r; 2259 return r;
@@ -2227,11 +2267,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
2227 mapping->offset = offset; 2267 mapping->offset = offset;
2228 mapping->flags = flags; 2268 mapping->flags = flags;
2229 2269
2230 list_add(&mapping->list, &bo_va->invalids); 2270 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2231 amdgpu_vm_it_insert(mapping, &vm->va);
2232
2233 if (flags & AMDGPU_PTE_PRT)
2234 amdgpu_vm_prt_get(adev);
2235 2271
2236 return 0; 2272 return 0;
2237} 2273}
@@ -2253,7 +2289,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2253 uint64_t saddr) 2289 uint64_t saddr)
2254{ 2290{
2255 struct amdgpu_bo_va_mapping *mapping; 2291 struct amdgpu_bo_va_mapping *mapping;
2256 struct amdgpu_vm *vm = bo_va->vm; 2292 struct amdgpu_vm *vm = bo_va->base.vm;
2257 bool valid = true; 2293 bool valid = true;
2258 2294
2259 saddr /= AMDGPU_GPU_PAGE_SIZE; 2295 saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2277,6 +2313,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2277 2313
2278 list_del(&mapping->list); 2314 list_del(&mapping->list);
2279 amdgpu_vm_it_remove(mapping, &vm->va); 2315 amdgpu_vm_it_remove(mapping, &vm->va);
2316 mapping->bo_va = NULL;
2280 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2317 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2281 2318
2282 if (valid) 2319 if (valid)
@@ -2362,6 +2399,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2362 if (tmp->last > eaddr) 2399 if (tmp->last > eaddr)
2363 tmp->last = eaddr; 2400 tmp->last = eaddr;
2364 2401
2402 tmp->bo_va = NULL;
2365 list_add(&tmp->list, &vm->freed); 2403 list_add(&tmp->list, &vm->freed);
2366 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2404 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2367 } 2405 }
@@ -2388,6 +2426,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2388} 2426}
2389 2427
2390/** 2428/**
2429 * amdgpu_vm_bo_lookup_mapping - find mapping by address
2430 *
2431 * @vm: the requested VM
2432 *
2433 * Find a mapping by it's address.
2434 */
2435struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2436 uint64_t addr)
2437{
2438 return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2439}
2440
2441/**
2391 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2442 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2392 * 2443 *
2393 * @adev: amdgpu_device pointer 2444 * @adev: amdgpu_device pointer
@@ -2401,17 +2452,18 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2401 struct amdgpu_bo_va *bo_va) 2452 struct amdgpu_bo_va *bo_va)
2402{ 2453{
2403 struct amdgpu_bo_va_mapping *mapping, *next; 2454 struct amdgpu_bo_va_mapping *mapping, *next;
2404 struct amdgpu_vm *vm = bo_va->vm; 2455 struct amdgpu_vm *vm = bo_va->base.vm;
2405 2456
2406 list_del(&bo_va->bo_list); 2457 list_del(&bo_va->base.bo_list);
2407 2458
2408 spin_lock(&vm->status_lock); 2459 spin_lock(&vm->status_lock);
2409 list_del(&bo_va->vm_status); 2460 list_del(&bo_va->base.vm_status);
2410 spin_unlock(&vm->status_lock); 2461 spin_unlock(&vm->status_lock);
2411 2462
2412 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2463 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2413 list_del(&mapping->list); 2464 list_del(&mapping->list);
2414 amdgpu_vm_it_remove(mapping, &vm->va); 2465 amdgpu_vm_it_remove(mapping, &vm->va);
2466 mapping->bo_va = NULL;
2415 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2467 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2416 list_add(&mapping->list, &vm->freed); 2468 list_add(&mapping->list, &vm->freed);
2417 } 2469 }
@@ -2436,15 +2488,37 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2436 * Mark @bo as invalid. 2488 * Mark @bo as invalid.
2437 */ 2489 */
2438void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2490void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2439 struct amdgpu_bo *bo) 2491 struct amdgpu_bo *bo, bool evicted)
2440{ 2492{
2441 struct amdgpu_bo_va *bo_va; 2493 struct amdgpu_vm_bo_base *bo_base;
2494
2495 list_for_each_entry(bo_base, &bo->va, bo_list) {
2496 struct amdgpu_vm *vm = bo_base->vm;
2497
2498 bo_base->moved = true;
2499 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2500 spin_lock(&bo_base->vm->status_lock);
2501 if (bo->tbo.type == ttm_bo_type_kernel)
2502 list_move(&bo_base->vm_status, &vm->evicted);
2503 else
2504 list_move_tail(&bo_base->vm_status,
2505 &vm->evicted);
2506 spin_unlock(&bo_base->vm->status_lock);
2507 continue;
2508 }
2509
2510 if (bo->tbo.type == ttm_bo_type_kernel) {
2511 spin_lock(&bo_base->vm->status_lock);
2512 if (list_empty(&bo_base->vm_status))
2513 list_add(&bo_base->vm_status, &vm->relocated);
2514 spin_unlock(&bo_base->vm->status_lock);
2515 continue;
2516 }
2442 2517
2443 list_for_each_entry(bo_va, &bo->va, bo_list) { 2518 spin_lock(&bo_base->vm->status_lock);
2444 spin_lock(&bo_va->vm->status_lock); 2519 if (list_empty(&bo_base->vm_status))
2445 if (list_empty(&bo_va->vm_status)) 2520 list_add(&bo_base->vm_status, &vm->moved);
2446 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 2521 spin_unlock(&bo_base->vm->status_lock);
2447 spin_unlock(&bo_va->vm->status_lock);
2448 } 2522 }
2449} 2523}
2450 2524
@@ -2462,12 +2536,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2462} 2536}
2463 2537
2464/** 2538/**
2465 * amdgpu_vm_adjust_size - adjust vm size and block size 2539 * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
2540 *
2541 * @adev: amdgpu_device pointer
2542 * @fragment_size_default: the default fragment size if it's set auto
2543 */
2544void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default)
2545{
2546 if (amdgpu_vm_fragment_size == -1)
2547 adev->vm_manager.fragment_size = fragment_size_default;
2548 else
2549 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2550}
2551
2552/**
2553 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2466 * 2554 *
2467 * @adev: amdgpu_device pointer 2555 * @adev: amdgpu_device pointer
2468 * @vm_size: the default vm size if it's set auto 2556 * @vm_size: the default vm size if it's set auto
2469 */ 2557 */
2470void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) 2558void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default)
2471{ 2559{
2472 /* adjust vm size firstly */ 2560 /* adjust vm size firstly */
2473 if (amdgpu_vm_size == -1) 2561 if (amdgpu_vm_size == -1)
@@ -2482,8 +2570,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
2482 else 2570 else
2483 adev->vm_manager.block_size = amdgpu_vm_block_size; 2571 adev->vm_manager.block_size = amdgpu_vm_block_size;
2484 2572
2485 DRM_INFO("vm size is %llu GB, block size is %u-bit\n", 2573 amdgpu_vm_set_fragment_size(adev, fragment_size_default);
2486 adev->vm_manager.vm_size, adev->vm_manager.block_size); 2574
2575 DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
2576 adev->vm_manager.vm_size, adev->vm_manager.block_size,
2577 adev->vm_manager.fragment_size);
2487} 2578}
2488 2579
2489/** 2580/**
@@ -2496,7 +2587,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
2496 * Init @vm fields. 2587 * Init @vm fields.
2497 */ 2588 */
2498int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2589int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2499 int vm_context) 2590 int vm_context, unsigned int pasid)
2500{ 2591{
2501 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2592 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2502 AMDGPU_VM_PTE_COUNT(adev) * 8); 2593 AMDGPU_VM_PTE_COUNT(adev) * 8);
@@ -2507,13 +2598,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2507 u64 flags; 2598 u64 flags;
2508 uint64_t init_pde_value = 0; 2599 uint64_t init_pde_value = 0;
2509 2600
2510 vm->va = RB_ROOT; 2601 vm->va = RB_ROOT_CACHED;
2511 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); 2602 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
2512 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2603 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2513 vm->reserved_vmid[i] = NULL; 2604 vm->reserved_vmid[i] = NULL;
2514 spin_lock_init(&vm->status_lock); 2605 spin_lock_init(&vm->status_lock);
2515 INIT_LIST_HEAD(&vm->invalidated); 2606 INIT_LIST_HEAD(&vm->evicted);
2516 INIT_LIST_HEAD(&vm->cleared); 2607 INIT_LIST_HEAD(&vm->relocated);
2608 INIT_LIST_HEAD(&vm->moved);
2517 INIT_LIST_HEAD(&vm->freed); 2609 INIT_LIST_HEAD(&vm->freed);
2518 2610
2519 /* create scheduler entity for page table updates */ 2611 /* create scheduler entity for page table updates */
@@ -2544,7 +2636,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2544 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2636 vm->use_cpu_for_update ? "CPU" : "SDMA");
2545 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2637 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2546 "CPU update of VM recommended only for large BAR system\n"); 2638 "CPU update of VM recommended only for large BAR system\n");
2547 vm->last_dir_update = NULL; 2639 vm->last_update = NULL;
2548 2640
2549 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2641 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
2550 AMDGPU_GEM_CREATE_VRAM_CLEARED; 2642 AMDGPU_GEM_CREATE_VRAM_CLEARED;
@@ -2557,30 +2649,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2557 r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, 2649 r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
2558 AMDGPU_GEM_DOMAIN_VRAM, 2650 AMDGPU_GEM_DOMAIN_VRAM,
2559 flags, 2651 flags,
2560 NULL, NULL, init_pde_value, &vm->root.bo); 2652 NULL, NULL, init_pde_value, &vm->root.base.bo);
2561 if (r) 2653 if (r)
2562 goto error_free_sched_entity; 2654 goto error_free_sched_entity;
2563 2655
2564 r = amdgpu_bo_reserve(vm->root.bo, false); 2656 vm->root.base.vm = vm;
2565 if (r) 2657 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
2566 goto error_free_root; 2658 INIT_LIST_HEAD(&vm->root.base.vm_status);
2567
2568 vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
2569 2659
2570 if (vm->use_cpu_for_update) { 2660 if (vm->use_cpu_for_update) {
2571 r = amdgpu_bo_kmap(vm->root.bo, NULL); 2661 r = amdgpu_bo_reserve(vm->root.base.bo, false);
2662 if (r)
2663 goto error_free_root;
2664
2665 r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
2666 amdgpu_bo_unreserve(vm->root.base.bo);
2572 if (r) 2667 if (r)
2573 goto error_free_root; 2668 goto error_free_root;
2574 } 2669 }
2575 2670
2576 amdgpu_bo_unreserve(vm->root.bo); 2671 if (pasid) {
2672 unsigned long flags;
2673
2674 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2675 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2676 GFP_ATOMIC);
2677 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2678 if (r < 0)
2679 goto error_free_root;
2680
2681 vm->pasid = pasid;
2682 }
2683
2684 INIT_KFIFO(vm->faults);
2577 2685
2578 return 0; 2686 return 0;
2579 2687
2580error_free_root: 2688error_free_root:
2581 amdgpu_bo_unref(&vm->root.bo->shadow); 2689 amdgpu_bo_unref(&vm->root.base.bo->shadow);
2582 amdgpu_bo_unref(&vm->root.bo); 2690 amdgpu_bo_unref(&vm->root.base.bo);
2583 vm->root.bo = NULL; 2691 vm->root.base.bo = NULL;
2584 2692
2585error_free_sched_entity: 2693error_free_sched_entity:
2586 amd_sched_entity_fini(&ring->sched, &vm->entity); 2694 amd_sched_entity_fini(&ring->sched, &vm->entity);
@@ -2599,9 +2707,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
2599{ 2707{
2600 unsigned i; 2708 unsigned i;
2601 2709
2602 if (level->bo) { 2710 if (level->base.bo) {
2603 amdgpu_bo_unref(&level->bo->shadow); 2711 list_del(&level->base.bo_list);
2604 amdgpu_bo_unref(&level->bo); 2712 list_del(&level->base.vm_status);
2713 amdgpu_bo_unref(&level->base.bo->shadow);
2714 amdgpu_bo_unref(&level->base.bo);
2605 } 2715 }
2606 2716
2607 if (level->entries) 2717 if (level->entries)
@@ -2624,14 +2734,28 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2624{ 2734{
2625 struct amdgpu_bo_va_mapping *mapping, *tmp; 2735 struct amdgpu_bo_va_mapping *mapping, *tmp;
2626 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2736 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
2737 u64 fault;
2627 int i; 2738 int i;
2628 2739
2740 /* Clear pending page faults from IH when the VM is destroyed */
2741 while (kfifo_get(&vm->faults, &fault))
2742 amdgpu_ih_clear_fault(adev, fault);
2743
2744 if (vm->pasid) {
2745 unsigned long flags;
2746
2747 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2748 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2749 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2750 }
2751
2629 amd_sched_entity_fini(vm->entity.sched, &vm->entity); 2752 amd_sched_entity_fini(vm->entity.sched, &vm->entity);
2630 2753
2631 if (!RB_EMPTY_ROOT(&vm->va)) { 2754 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2632 dev_err(adev->dev, "still active bo inside vm\n"); 2755 dev_err(adev->dev, "still active bo inside vm\n");
2633 } 2756 }
2634 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { 2757 rbtree_postorder_for_each_entry_safe(mapping, tmp,
2758 &vm->va.rb_root, rb) {
2635 list_del(&mapping->list); 2759 list_del(&mapping->list);
2636 amdgpu_vm_it_remove(mapping, &vm->va); 2760 amdgpu_vm_it_remove(mapping, &vm->va);
2637 kfree(mapping); 2761 kfree(mapping);
@@ -2647,7 +2771,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2647 } 2771 }
2648 2772
2649 amdgpu_vm_free_levels(&vm->root); 2773 amdgpu_vm_free_levels(&vm->root);
2650 dma_fence_put(vm->last_dir_update); 2774 dma_fence_put(vm->last_update);
2651 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2775 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2652 amdgpu_vm_free_reserved_vmid(adev, vm, i); 2776 amdgpu_vm_free_reserved_vmid(adev, vm, i);
2653} 2777}
@@ -2705,6 +2829,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2705 adev->vm_manager.vm_update_mode = 0; 2829 adev->vm_manager.vm_update_mode = 0;
2706#endif 2830#endif
2707 2831
2832 idr_init(&adev->vm_manager.pasid_idr);
2833 spin_lock_init(&adev->vm_manager.pasid_lock);
2708} 2834}
2709 2835
2710/** 2836/**
@@ -2718,6 +2844,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2718{ 2844{
2719 unsigned i, j; 2845 unsigned i, j;
2720 2846
2847 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2848 idr_destroy(&adev->vm_manager.pasid_idr);
2849
2721 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2850 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
2722 struct amdgpu_vm_id_manager *id_mgr = 2851 struct amdgpu_vm_id_manager *id_mgr =
2723 &adev->vm_manager.id_mgr[i]; 2852 &adev->vm_manager.id_mgr[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 217ecba8f4cc..0af090667dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -25,6 +25,7 @@
25#define __AMDGPU_VM_H__ 25#define __AMDGPU_VM_H__
26 26
27#include <linux/rbtree.h> 27#include <linux/rbtree.h>
28#include <linux/idr.h>
28 29
29#include "gpu_scheduler.h" 30#include "gpu_scheduler.h"
30#include "amdgpu_sync.h" 31#include "amdgpu_sync.h"
@@ -50,11 +51,6 @@ struct amdgpu_bo_list_entry;
50/* PTBs (Page Table Blocks) need to be aligned to 32K */ 51/* PTBs (Page Table Blocks) need to be aligned to 32K */
51#define AMDGPU_VM_PTB_ALIGN_SIZE 32768 52#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
52 53
53/* LOG2 number of continuous pages for the fragment field */
54#define AMDGPU_LOG2_PAGES_PER_FRAG(adev) \
55 ((adev)->asic_type < CHIP_VEGA10 ? 4 : \
56 (adev)->vm_manager.block_size)
57
58#define AMDGPU_PTE_VALID (1ULL << 0) 54#define AMDGPU_PTE_VALID (1ULL << 0)
59#define AMDGPU_PTE_SYSTEM (1ULL << 1) 55#define AMDGPU_PTE_SYSTEM (1ULL << 1)
60#define AMDGPU_PTE_SNOOPED (1ULL << 2) 56#define AMDGPU_PTE_SNOOPED (1ULL << 2)
@@ -99,37 +95,57 @@ struct amdgpu_bo_list_entry;
99#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) 95#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
100#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) 96#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
101 97
98/* base structure for tracking BO usage in a VM */
99struct amdgpu_vm_bo_base {
100 /* constant after initialization */
101 struct amdgpu_vm *vm;
102 struct amdgpu_bo *bo;
103
104 /* protected by bo being reserved */
105 struct list_head bo_list;
106
107 /* protected by spinlock */
108 struct list_head vm_status;
109
110 /* protected by the BO being reserved */
111 bool moved;
112};
102 113
103struct amdgpu_vm_pt { 114struct amdgpu_vm_pt {
104 struct amdgpu_bo *bo; 115 struct amdgpu_vm_bo_base base;
105 uint64_t addr; 116 uint64_t addr;
106 bool huge_page;
107 117
108 /* array of page tables, one for each directory entry */ 118 /* array of page tables, one for each directory entry */
109 struct amdgpu_vm_pt *entries; 119 struct amdgpu_vm_pt *entries;
110 unsigned last_entry_used; 120 unsigned last_entry_used;
111}; 121};
112 122
123#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
124#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
125#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
126
113struct amdgpu_vm { 127struct amdgpu_vm {
114 /* tree of virtual addresses mapped */ 128 /* tree of virtual addresses mapped */
115 struct rb_root va; 129 struct rb_root_cached va;
116 130
117 /* protecting invalidated */ 131 /* protecting invalidated */
118 spinlock_t status_lock; 132 spinlock_t status_lock;
119 133
120 /* BOs moved, but not yet updated in the PT */ 134 /* BOs who needs a validation */
121 struct list_head invalidated; 135 struct list_head evicted;
136
137 /* PT BOs which relocated and their parent need an update */
138 struct list_head relocated;
122 139
123 /* BOs cleared in the PT because of a move */ 140 /* BOs moved, but not yet updated in the PT */
124 struct list_head cleared; 141 struct list_head moved;
125 142
126 /* BO mappings freed, but not yet updated in the PT */ 143 /* BO mappings freed, but not yet updated in the PT */
127 struct list_head freed; 144 struct list_head freed;
128 145
129 /* contains the page directory */ 146 /* contains the page directory */
130 struct amdgpu_vm_pt root; 147 struct amdgpu_vm_pt root;
131 struct dma_fence *last_dir_update; 148 struct dma_fence *last_update;
132 uint64_t last_eviction_counter;
133 149
134 /* protecting freed */ 150 /* protecting freed */
135 spinlock_t freed_lock; 151 spinlock_t freed_lock;
@@ -137,18 +153,20 @@ struct amdgpu_vm {
137 /* Scheduler entity for page table updates */ 153 /* Scheduler entity for page table updates */
138 struct amd_sched_entity entity; 154 struct amd_sched_entity entity;
139 155
140 /* client id */ 156 /* client id and PASID (TODO: replace client_id with PASID) */
141 u64 client_id; 157 u64 client_id;
158 unsigned int pasid;
142 /* dedicated to vm */ 159 /* dedicated to vm */
143 struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; 160 struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
144 /* each VM will map on CSA */
145 struct amdgpu_bo_va *csa_bo_va;
146 161
147 /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ 162 /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
148 bool use_cpu_for_update; 163 bool use_cpu_for_update;
149 164
150 /* Flag to indicate ATS support from PTE for GFX9 */ 165 /* Flag to indicate ATS support from PTE for GFX9 */
151 bool pte_support_ats; 166 bool pte_support_ats;
167
168 /* Up to 128 pending page faults */
169 DECLARE_KFIFO(faults, u64, 128);
152}; 170};
153 171
154struct amdgpu_vm_id { 172struct amdgpu_vm_id {
@@ -191,6 +209,7 @@ struct amdgpu_vm_manager {
191 uint32_t num_level; 209 uint32_t num_level;
192 uint64_t vm_size; 210 uint64_t vm_size;
193 uint32_t block_size; 211 uint32_t block_size;
212 uint32_t fragment_size;
194 /* vram base address for page table entry */ 213 /* vram base address for page table entry */
195 u64 vram_base_offset; 214 u64 vram_base_offset;
196 /* vm pte handling */ 215 /* vm pte handling */
@@ -210,21 +229,28 @@ struct amdgpu_vm_manager {
210 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU 229 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
211 */ 230 */
212 int vm_update_mode; 231 int vm_update_mode;
232
233 /* PASID to VM mapping, will be used in interrupt context to
234 * look up VM of a page fault
235 */
236 struct idr pasid_idr;
237 spinlock_t pasid_lock;
213}; 238};
214 239
240int amdgpu_vm_alloc_pasid(unsigned int bits);
241void amdgpu_vm_free_pasid(unsigned int pasid);
215void amdgpu_vm_manager_init(struct amdgpu_device *adev); 242void amdgpu_vm_manager_init(struct amdgpu_device *adev);
216void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 243void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
217int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 244int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
218 int vm_context); 245 int vm_context, unsigned int pasid);
219void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 246void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
220void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 247void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
221 struct list_head *validated, 248 struct list_head *validated,
222 struct amdgpu_bo_list_entry *entry); 249 struct amdgpu_bo_list_entry *entry);
250bool amdgpu_vm_ready(struct amdgpu_vm *vm);
223int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 251int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
224 int (*callback)(void *p, struct amdgpu_bo *bo), 252 int (*callback)(void *p, struct amdgpu_bo *bo),
225 void *param); 253 void *param);
226void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
227 struct amdgpu_vm *vm);
228int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 254int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
229 struct amdgpu_vm *vm, 255 struct amdgpu_vm *vm,
230 uint64_t saddr, uint64_t size); 256 uint64_t saddr, uint64_t size);
@@ -240,13 +266,13 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
240int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 266int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
241 struct amdgpu_vm *vm, 267 struct amdgpu_vm *vm,
242 struct dma_fence **fence); 268 struct dma_fence **fence);
243int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, 269int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
244 struct amdgpu_sync *sync); 270 struct amdgpu_vm *vm);
245int amdgpu_vm_bo_update(struct amdgpu_device *adev, 271int amdgpu_vm_bo_update(struct amdgpu_device *adev,
246 struct amdgpu_bo_va *bo_va, 272 struct amdgpu_bo_va *bo_va,
247 bool clear); 273 bool clear);
248void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 274void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
249 struct amdgpu_bo *bo); 275 struct amdgpu_bo *bo, bool evicted);
250struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 276struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
251 struct amdgpu_bo *bo); 277 struct amdgpu_bo *bo);
252struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 278struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
@@ -266,9 +292,14 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
266int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 292int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
267 struct amdgpu_vm *vm, 293 struct amdgpu_vm *vm,
268 uint64_t saddr, uint64_t size); 294 uint64_t saddr, uint64_t size);
295struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
296 uint64_t addr);
269void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 297void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
270 struct amdgpu_bo_va *bo_va); 298 struct amdgpu_bo_va *bo_va);
271void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); 299void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
300 uint32_t fragment_size_default);
301void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
302 uint32_t fragment_size_default);
272int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 303int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
273bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 304bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
274 struct amdgpu_job *job); 305 struct amdgpu_job *job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a2c59a08b2bd..26e900627971 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -28,6 +28,8 @@
28struct amdgpu_vram_mgr { 28struct amdgpu_vram_mgr {
29 struct drm_mm mm; 29 struct drm_mm mm;
30 spinlock_t lock; 30 spinlock_t lock;
31 atomic64_t usage;
32 atomic64_t vis_usage;
31}; 33};
32 34
33/** 35/**
@@ -79,6 +81,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
79} 81}
80 82
81/** 83/**
84 * amdgpu_vram_mgr_vis_size - Calculate visible node size
85 *
86 * @adev: amdgpu device structure
87 * @node: MM node structure
88 *
89 * Calculate how many bytes of the MM node are inside visible VRAM
90 */
91static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
92 struct drm_mm_node *node)
93{
94 uint64_t start = node->start << PAGE_SHIFT;
95 uint64_t end = (node->size + node->start) << PAGE_SHIFT;
96
97 if (start >= adev->mc.visible_vram_size)
98 return 0;
99
100 return (end > adev->mc.visible_vram_size ?
101 adev->mc.visible_vram_size : end) - start;
102}
103
104/**
82 * amdgpu_vram_mgr_new - allocate new ranges 105 * amdgpu_vram_mgr_new - allocate new ranges
83 * 106 *
84 * @man: TTM memory type manager 107 * @man: TTM memory type manager
@@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
93 const struct ttm_place *place, 116 const struct ttm_place *place,
94 struct ttm_mem_reg *mem) 117 struct ttm_mem_reg *mem)
95{ 118{
119 struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
96 struct amdgpu_vram_mgr *mgr = man->priv; 120 struct amdgpu_vram_mgr *mgr = man->priv;
97 struct drm_mm *mm = &mgr->mm; 121 struct drm_mm *mm = &mgr->mm;
98 struct drm_mm_node *nodes; 122 struct drm_mm_node *nodes;
99 enum drm_mm_insert_mode mode; 123 enum drm_mm_insert_mode mode;
100 unsigned long lpfn, num_nodes, pages_per_node, pages_left; 124 unsigned long lpfn, num_nodes, pages_per_node, pages_left;
125 uint64_t usage = 0, vis_usage = 0;
101 unsigned i; 126 unsigned i;
102 int r; 127 int r;
103 128
@@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
142 if (unlikely(r)) 167 if (unlikely(r))
143 goto error; 168 goto error;
144 169
170 usage += nodes[i].size << PAGE_SHIFT;
171 vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
172
145 /* Calculate a virtual BO start address to easily check if 173 /* Calculate a virtual BO start address to easily check if
146 * everything is CPU accessible. 174 * everything is CPU accessible.
147 */ 175 */
@@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
155 } 183 }
156 spin_unlock(&mgr->lock); 184 spin_unlock(&mgr->lock);
157 185
186 atomic64_add(usage, &mgr->usage);
187 atomic64_add(vis_usage, &mgr->vis_usage);
188
158 mem->mm_node = nodes; 189 mem->mm_node = nodes;
159 190
160 return 0; 191 return 0;
@@ -181,8 +212,10 @@ error:
181static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, 212static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
182 struct ttm_mem_reg *mem) 213 struct ttm_mem_reg *mem)
183{ 214{
215 struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev);
184 struct amdgpu_vram_mgr *mgr = man->priv; 216 struct amdgpu_vram_mgr *mgr = man->priv;
185 struct drm_mm_node *nodes = mem->mm_node; 217 struct drm_mm_node *nodes = mem->mm_node;
218 uint64_t usage = 0, vis_usage = 0;
186 unsigned pages = mem->num_pages; 219 unsigned pages = mem->num_pages;
187 220
188 if (!mem->mm_node) 221 if (!mem->mm_node)
@@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
192 while (pages) { 225 while (pages) {
193 pages -= nodes->size; 226 pages -= nodes->size;
194 drm_mm_remove_node(nodes); 227 drm_mm_remove_node(nodes);
228 usage += nodes->size << PAGE_SHIFT;
229 vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
195 ++nodes; 230 ++nodes;
196 } 231 }
197 spin_unlock(&mgr->lock); 232 spin_unlock(&mgr->lock);
198 233
234 atomic64_sub(usage, &mgr->usage);
235 atomic64_sub(vis_usage, &mgr->vis_usage);
236
199 kfree(mem->mm_node); 237 kfree(mem->mm_node);
200 mem->mm_node = NULL; 238 mem->mm_node = NULL;
201} 239}
202 240
203/** 241/**
242 * amdgpu_vram_mgr_usage - how many bytes are used in this domain
243 *
244 * @man: TTM memory type manager
245 *
246 * Returns how many bytes are used in this domain.
247 */
248uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man)
249{
250 struct amdgpu_vram_mgr *mgr = man->priv;
251
252 return atomic64_read(&mgr->usage);
253}
254
255/**
256 * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
257 *
258 * @man: TTM memory type manager
259 *
260 * Returns how many bytes are used in the visible part of VRAM
261 */
262uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man)
263{
264 struct amdgpu_vram_mgr *mgr = man->priv;
265
266 return atomic64_read(&mgr->vis_usage);
267}
268
269/**
204 * amdgpu_vram_mgr_debug - dump VRAM table 270 * amdgpu_vram_mgr_debug - dump VRAM table
205 * 271 *
206 * @man: TTM memory type manager 272 * @man: TTM memory type manager
207 * @prefix: text prefix 273 * @printer: DRM printer to use
208 * 274 *
209 * Dump the table content using printk. 275 * Dump the table content using printk.
210 */ 276 */
211static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, 277static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
212 const char *prefix) 278 struct drm_printer *printer)
213{ 279{
214 struct amdgpu_vram_mgr *mgr = man->priv; 280 struct amdgpu_vram_mgr *mgr = man->priv;
215 struct drm_printer p = drm_debug_printer(prefix);
216 281
217 spin_lock(&mgr->lock); 282 spin_lock(&mgr->lock);
218 drm_mm_print(&mgr->mm, &p); 283 drm_mm_print(&mgr->mm, printer);
219 spin_unlock(&mgr->lock); 284 spin_unlock(&mgr->lock);
285
286 drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
287 man->size, amdgpu_vram_mgr_usage(man) >> 20,
288 amdgpu_vram_mgr_vis_usage(man) >> 20);
220} 289}
221 290
222const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { 291const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index d69aa2e179bb..69500a8b4e2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1343,8 +1343,11 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
1343 idx = 0x80; 1343 idx = 0x80;
1344 1344
1345 str = CSTR(idx); 1345 str = CSTR(idx);
1346 if (*str != '\0') 1346 if (*str != '\0') {
1347 pr_info("ATOM BIOS: %s\n", str); 1347 pr_info("ATOM BIOS: %s\n", str);
1348 strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version));
1349 }
1350
1348 1351
1349 return ctx; 1352 return ctx;
1350} 1353}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index ddd8045accf3..a39170991afe 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -140,6 +140,7 @@ struct atom_context {
140 int io_mode; 140 int io_mode;
141 uint32_t *scratch; 141 uint32_t *scratch;
142 int scratch_size_bytes; 142 int scratch_size_bytes;
143 char vbios_version[20];
143}; 144};
144 145
145extern int amdgpu_atom_debug; 146extern int amdgpu_atom_debug;
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index cb508a211b2f..68ce1bdaf2fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -307,7 +307,6 @@ static int ci_set_power_limit(struct amdgpu_device *adev, u32 n);
307static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev, 307static int ci_set_overdrive_target_tdp(struct amdgpu_device *adev,
308 u32 target_tdp); 308 u32 target_tdp);
309static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate); 309static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate);
310static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev);
311static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev); 310static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev);
312 311
313static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, 312static PPSMC_Result amdgpu_ci_send_msg_to_smc_with_parameter(struct amdgpu_device *adev,
@@ -883,8 +882,9 @@ static int ci_power_control_set_level(struct amdgpu_device *adev)
883 return ret; 882 return ret;
884} 883}
885 884
886static void ci_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) 885static void ci_dpm_powergate_uvd(void *handle, bool gate)
887{ 886{
887 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
888 struct ci_power_info *pi = ci_get_pi(adev); 888 struct ci_power_info *pi = ci_get_pi(adev);
889 889
890 pi->uvd_power_gated = gate; 890 pi->uvd_power_gated = gate;
@@ -901,8 +901,9 @@ static void ci_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate)
901 } 901 }
902} 902}
903 903
904static bool ci_dpm_vblank_too_short(struct amdgpu_device *adev) 904static bool ci_dpm_vblank_too_short(void *handle)
905{ 905{
906 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
906 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); 907 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
907 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; 908 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
908 909
@@ -1210,11 +1211,12 @@ static int ci_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev)
1210 } 1211 }
1211} 1212}
1212 1213
1213static int ci_dpm_get_fan_speed_percent(struct amdgpu_device *adev, 1214static int ci_dpm_get_fan_speed_percent(void *handle,
1214 u32 *speed) 1215 u32 *speed)
1215{ 1216{
1216 u32 duty, duty100; 1217 u32 duty, duty100;
1217 u64 tmp64; 1218 u64 tmp64;
1219 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1218 1220
1219 if (adev->pm.no_fan) 1221 if (adev->pm.no_fan)
1220 return -ENOENT; 1222 return -ENOENT;
@@ -1237,12 +1239,13 @@ static int ci_dpm_get_fan_speed_percent(struct amdgpu_device *adev,
1237 return 0; 1239 return 0;
1238} 1240}
1239 1241
1240static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev, 1242static int ci_dpm_set_fan_speed_percent(void *handle,
1241 u32 speed) 1243 u32 speed)
1242{ 1244{
1243 u32 tmp; 1245 u32 tmp;
1244 u32 duty, duty100; 1246 u32 duty, duty100;
1245 u64 tmp64; 1247 u64 tmp64;
1248 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1246 struct ci_power_info *pi = ci_get_pi(adev); 1249 struct ci_power_info *pi = ci_get_pi(adev);
1247 1250
1248 if (adev->pm.no_fan) 1251 if (adev->pm.no_fan)
@@ -1271,8 +1274,10 @@ static int ci_dpm_set_fan_speed_percent(struct amdgpu_device *adev,
1271 return 0; 1274 return 0;
1272} 1275}
1273 1276
1274static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) 1277static void ci_dpm_set_fan_control_mode(void *handle, u32 mode)
1275{ 1278{
1279 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1280
1276 switch (mode) { 1281 switch (mode) {
1277 case AMD_FAN_CTRL_NONE: 1282 case AMD_FAN_CTRL_NONE:
1278 if (adev->pm.dpm.fan.ucode_fan_control) 1283 if (adev->pm.dpm.fan.ucode_fan_control)
@@ -1292,8 +1297,9 @@ static void ci_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode)
1292 } 1297 }
1293} 1298}
1294 1299
1295static u32 ci_dpm_get_fan_control_mode(struct amdgpu_device *adev) 1300static u32 ci_dpm_get_fan_control_mode(void *handle)
1296{ 1301{
1302 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1297 struct ci_power_info *pi = ci_get_pi(adev); 1303 struct ci_power_info *pi = ci_get_pi(adev);
1298 1304
1299 if (pi->fan_is_controlled_by_smc) 1305 if (pi->fan_is_controlled_by_smc)
@@ -4378,9 +4384,10 @@ static u32 ci_get_lowest_enabled_level(struct amdgpu_device *adev,
4378} 4384}
4379 4385
4380 4386
4381static int ci_dpm_force_performance_level(struct amdgpu_device *adev, 4387static int ci_dpm_force_performance_level(void *handle,
4382 enum amd_dpm_forced_level level) 4388 enum amd_dpm_forced_level level)
4383{ 4389{
4390 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4384 struct ci_power_info *pi = ci_get_pi(adev); 4391 struct ci_power_info *pi = ci_get_pi(adev);
4385 u32 tmp, levels, i; 4392 u32 tmp, levels, i;
4386 int ret; 4393 int ret;
@@ -5291,8 +5298,9 @@ static void ci_update_requested_ps(struct amdgpu_device *adev,
5291 adev->pm.dpm.requested_ps = &pi->requested_rps; 5298 adev->pm.dpm.requested_ps = &pi->requested_rps;
5292} 5299}
5293 5300
5294static int ci_dpm_pre_set_power_state(struct amdgpu_device *adev) 5301static int ci_dpm_pre_set_power_state(void *handle)
5295{ 5302{
5303 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5296 struct ci_power_info *pi = ci_get_pi(adev); 5304 struct ci_power_info *pi = ci_get_pi(adev);
5297 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; 5305 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps;
5298 struct amdgpu_ps *new_ps = &requested_ps; 5306 struct amdgpu_ps *new_ps = &requested_ps;
@@ -5304,8 +5312,9 @@ static int ci_dpm_pre_set_power_state(struct amdgpu_device *adev)
5304 return 0; 5312 return 0;
5305} 5313}
5306 5314
5307static void ci_dpm_post_set_power_state(struct amdgpu_device *adev) 5315static void ci_dpm_post_set_power_state(void *handle)
5308{ 5316{
5317 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5309 struct ci_power_info *pi = ci_get_pi(adev); 5318 struct ci_power_info *pi = ci_get_pi(adev);
5310 struct amdgpu_ps *new_ps = &pi->requested_rps; 5319 struct amdgpu_ps *new_ps = &pi->requested_rps;
5311 5320
@@ -5479,8 +5488,9 @@ static void ci_dpm_disable(struct amdgpu_device *adev)
5479 ci_update_current_ps(adev, boot_ps); 5488 ci_update_current_ps(adev, boot_ps);
5480} 5489}
5481 5490
5482static int ci_dpm_set_power_state(struct amdgpu_device *adev) 5491static int ci_dpm_set_power_state(void *handle)
5483{ 5492{
5493 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5484 struct ci_power_info *pi = ci_get_pi(adev); 5494 struct ci_power_info *pi = ci_get_pi(adev);
5485 struct amdgpu_ps *new_ps = &pi->requested_rps; 5495 struct amdgpu_ps *new_ps = &pi->requested_rps;
5486 struct amdgpu_ps *old_ps = &pi->current_rps; 5496 struct amdgpu_ps *old_ps = &pi->current_rps;
@@ -5551,8 +5561,10 @@ static void ci_dpm_reset_asic(struct amdgpu_device *adev)
5551} 5561}
5552#endif 5562#endif
5553 5563
5554static void ci_dpm_display_configuration_changed(struct amdgpu_device *adev) 5564static void ci_dpm_display_configuration_changed(void *handle)
5555{ 5565{
5566 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5567
5556 ci_program_display_gap(adev); 5568 ci_program_display_gap(adev);
5557} 5569}
5558 5570
@@ -6105,9 +6117,10 @@ static int ci_dpm_init(struct amdgpu_device *adev)
6105} 6117}
6106 6118
6107static void 6119static void
6108ci_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, 6120ci_dpm_debugfs_print_current_performance_level(void *handle,
6109 struct seq_file *m) 6121 struct seq_file *m)
6110{ 6122{
6123 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6111 struct ci_power_info *pi = ci_get_pi(adev); 6124 struct ci_power_info *pi = ci_get_pi(adev);
6112 struct amdgpu_ps *rps = &pi->current_rps; 6125 struct amdgpu_ps *rps = &pi->current_rps;
6113 u32 sclk = ci_get_average_sclk_freq(adev); 6126 u32 sclk = ci_get_average_sclk_freq(adev);
@@ -6131,12 +6144,13 @@ ci_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev,
6131 seq_printf(m, "GPU load: %u %%\n", activity_percent); 6144 seq_printf(m, "GPU load: %u %%\n", activity_percent);
6132} 6145}
6133 6146
6134static void ci_dpm_print_power_state(struct amdgpu_device *adev, 6147static void ci_dpm_print_power_state(void *handle, void *current_ps)
6135 struct amdgpu_ps *rps)
6136{ 6148{
6149 struct amdgpu_ps *rps = (struct amdgpu_ps *)current_ps;
6137 struct ci_ps *ps = ci_get_ps(rps); 6150 struct ci_ps *ps = ci_get_ps(rps);
6138 struct ci_pl *pl; 6151 struct ci_pl *pl;
6139 int i; 6152 int i;
6153 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6140 6154
6141 amdgpu_dpm_print_class_info(rps->class, rps->class2); 6155 amdgpu_dpm_print_class_info(rps->class, rps->class2);
6142 amdgpu_dpm_print_cap_info(rps->caps); 6156 amdgpu_dpm_print_cap_info(rps->caps);
@@ -6158,20 +6172,23 @@ static inline bool ci_are_power_levels_equal(const struct ci_pl *ci_cpl1,
6158 (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane)); 6172 (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane));
6159} 6173}
6160 6174
6161static int ci_check_state_equal(struct amdgpu_device *adev, 6175static int ci_check_state_equal(void *handle,
6162 struct amdgpu_ps *cps, 6176 void *current_ps,
6163 struct amdgpu_ps *rps, 6177 void *request_ps,
6164 bool *equal) 6178 bool *equal)
6165{ 6179{
6166 struct ci_ps *ci_cps; 6180 struct ci_ps *ci_cps;
6167 struct ci_ps *ci_rps; 6181 struct ci_ps *ci_rps;
6168 int i; 6182 int i;
6183 struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps;
6184 struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps;
6185 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6169 6186
6170 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) 6187 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL)
6171 return -EINVAL; 6188 return -EINVAL;
6172 6189
6173 ci_cps = ci_get_ps(cps); 6190 ci_cps = ci_get_ps((struct amdgpu_ps *)cps);
6174 ci_rps = ci_get_ps(rps); 6191 ci_rps = ci_get_ps((struct amdgpu_ps *)rps);
6175 6192
6176 if (ci_cps == NULL) { 6193 if (ci_cps == NULL) {
6177 *equal = false; 6194 *equal = false;
@@ -6199,8 +6216,9 @@ static int ci_check_state_equal(struct amdgpu_device *adev,
6199 return 0; 6216 return 0;
6200} 6217}
6201 6218
6202static u32 ci_dpm_get_sclk(struct amdgpu_device *adev, bool low) 6219static u32 ci_dpm_get_sclk(void *handle, bool low)
6203{ 6220{
6221 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6204 struct ci_power_info *pi = ci_get_pi(adev); 6222 struct ci_power_info *pi = ci_get_pi(adev);
6205 struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); 6223 struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps);
6206 6224
@@ -6210,8 +6228,9 @@ static u32 ci_dpm_get_sclk(struct amdgpu_device *adev, bool low)
6210 return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; 6228 return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk;
6211} 6229}
6212 6230
6213static u32 ci_dpm_get_mclk(struct amdgpu_device *adev, bool low) 6231static u32 ci_dpm_get_mclk(void *handle, bool low)
6214{ 6232{
6233 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6215 struct ci_power_info *pi = ci_get_pi(adev); 6234 struct ci_power_info *pi = ci_get_pi(adev);
6216 struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps); 6235 struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps);
6217 6236
@@ -6222,10 +6241,11 @@ static u32 ci_dpm_get_mclk(struct amdgpu_device *adev, bool low)
6222} 6241}
6223 6242
6224/* get temperature in millidegrees */ 6243/* get temperature in millidegrees */
6225static int ci_dpm_get_temp(struct amdgpu_device *adev) 6244static int ci_dpm_get_temp(void *handle)
6226{ 6245{
6227 u32 temp; 6246 u32 temp;
6228 int actual_temp = 0; 6247 int actual_temp = 0;
6248 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6229 6249
6230 temp = (RREG32_SMC(ixCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> 6250 temp = (RREG32_SMC(ixCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
6231 CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; 6251 CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
@@ -6261,7 +6281,6 @@ static int ci_dpm_early_init(void *handle)
6261{ 6281{
6262 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6282 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6263 6283
6264 ci_dpm_set_dpm_funcs(adev);
6265 ci_dpm_set_irq_funcs(adev); 6284 ci_dpm_set_irq_funcs(adev);
6266 6285
6267 return 0; 6286 return 0;
@@ -6551,9 +6570,10 @@ static int ci_dpm_set_powergating_state(void *handle,
6551 return 0; 6570 return 0;
6552} 6571}
6553 6572
6554static int ci_dpm_print_clock_levels(struct amdgpu_device *adev, 6573static int ci_dpm_print_clock_levels(void *handle,
6555 enum pp_clock_type type, char *buf) 6574 enum pp_clock_type type, char *buf)
6556{ 6575{
6576 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6557 struct ci_power_info *pi = ci_get_pi(adev); 6577 struct ci_power_info *pi = ci_get_pi(adev);
6558 struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table; 6578 struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
6559 struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table; 6579 struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
@@ -6618,9 +6638,10 @@ static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
6618 return size; 6638 return size;
6619} 6639}
6620 6640
6621static int ci_dpm_force_clock_level(struct amdgpu_device *adev, 6641static int ci_dpm_force_clock_level(void *handle,
6622 enum pp_clock_type type, uint32_t mask) 6642 enum pp_clock_type type, uint32_t mask)
6623{ 6643{
6644 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6624 struct ci_power_info *pi = ci_get_pi(adev); 6645 struct ci_power_info *pi = ci_get_pi(adev);
6625 6646
6626 if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | 6647 if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO |
@@ -6664,8 +6685,9 @@ static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
6664 return 0; 6685 return 0;
6665} 6686}
6666 6687
6667static int ci_dpm_get_sclk_od(struct amdgpu_device *adev) 6688static int ci_dpm_get_sclk_od(void *handle)
6668{ 6689{
6690 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6669 struct ci_power_info *pi = ci_get_pi(adev); 6691 struct ci_power_info *pi = ci_get_pi(adev);
6670 struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table); 6692 struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
6671 struct ci_single_dpm_table *golden_sclk_table = 6693 struct ci_single_dpm_table *golden_sclk_table =
@@ -6680,8 +6702,9 @@ static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
6680 return value; 6702 return value;
6681} 6703}
6682 6704
6683static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value) 6705static int ci_dpm_set_sclk_od(void *handle, uint32_t value)
6684{ 6706{
6707 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6685 struct ci_power_info *pi = ci_get_pi(adev); 6708 struct ci_power_info *pi = ci_get_pi(adev);
6686 struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); 6709 struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
6687 struct ci_single_dpm_table *golden_sclk_table = 6710 struct ci_single_dpm_table *golden_sclk_table =
@@ -6698,8 +6721,9 @@ static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
6698 return 0; 6721 return 0;
6699} 6722}
6700 6723
6701static int ci_dpm_get_mclk_od(struct amdgpu_device *adev) 6724static int ci_dpm_get_mclk_od(void *handle)
6702{ 6725{
6726 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6703 struct ci_power_info *pi = ci_get_pi(adev); 6727 struct ci_power_info *pi = ci_get_pi(adev);
6704 struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table); 6728 struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
6705 struct ci_single_dpm_table *golden_mclk_table = 6729 struct ci_single_dpm_table *golden_mclk_table =
@@ -6714,8 +6738,9 @@ static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
6714 return value; 6738 return value;
6715} 6739}
6716 6740
6717static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) 6741static int ci_dpm_set_mclk_od(void *handle, uint32_t value)
6718{ 6742{
6743 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6719 struct ci_power_info *pi = ci_get_pi(adev); 6744 struct ci_power_info *pi = ci_get_pi(adev);
6720 struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps); 6745 struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
6721 struct ci_single_dpm_table *golden_mclk_table = 6746 struct ci_single_dpm_table *golden_mclk_table =
@@ -6732,9 +6757,10 @@ static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
6732 return 0; 6757 return 0;
6733} 6758}
6734 6759
6735static int ci_dpm_get_power_profile_state(struct amdgpu_device *adev, 6760static int ci_dpm_get_power_profile_state(void *handle,
6736 struct amd_pp_profile *query) 6761 struct amd_pp_profile *query)
6737{ 6762{
6763 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6738 struct ci_power_info *pi = ci_get_pi(adev); 6764 struct ci_power_info *pi = ci_get_pi(adev);
6739 6765
6740 if (!pi || !query) 6766 if (!pi || !query)
@@ -6851,9 +6877,10 @@ static int ci_set_power_profile_state(struct amdgpu_device *adev,
6851 return result; 6877 return result;
6852} 6878}
6853 6879
6854static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev, 6880static int ci_dpm_set_power_profile_state(void *handle,
6855 struct amd_pp_profile *request) 6881 struct amd_pp_profile *request)
6856{ 6882{
6883 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6857 struct ci_power_info *pi = ci_get_pi(adev); 6884 struct ci_power_info *pi = ci_get_pi(adev);
6858 int ret = -1; 6885 int ret = -1;
6859 6886
@@ -6906,9 +6933,10 @@ static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev,
6906 return 0; 6933 return 0;
6907} 6934}
6908 6935
6909static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev, 6936static int ci_dpm_reset_power_profile_state(void *handle,
6910 struct amd_pp_profile *request) 6937 struct amd_pp_profile *request)
6911{ 6938{
6939 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6912 struct ci_power_info *pi = ci_get_pi(adev); 6940 struct ci_power_info *pi = ci_get_pi(adev);
6913 6941
6914 if (!pi || !request) 6942 if (!pi || !request)
@@ -6927,9 +6955,10 @@ static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev,
6927 return -EINVAL; 6955 return -EINVAL;
6928} 6956}
6929 6957
6930static int ci_dpm_switch_power_profile(struct amdgpu_device *adev, 6958static int ci_dpm_switch_power_profile(void *handle,
6931 enum amd_pp_profile_type type) 6959 enum amd_pp_profile_type type)
6932{ 6960{
6961 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6933 struct ci_power_info *pi = ci_get_pi(adev); 6962 struct ci_power_info *pi = ci_get_pi(adev);
6934 struct amd_pp_profile request = {0}; 6963 struct amd_pp_profile request = {0};
6935 6964
@@ -6944,11 +6973,12 @@ static int ci_dpm_switch_power_profile(struct amdgpu_device *adev,
6944 return 0; 6973 return 0;
6945} 6974}
6946 6975
6947static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx, 6976static int ci_dpm_read_sensor(void *handle, int idx,
6948 void *value, int *size) 6977 void *value, int *size)
6949{ 6978{
6950 u32 activity_percent = 50; 6979 u32 activity_percent = 50;
6951 int ret; 6980 int ret;
6981 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6952 6982
6953 /* size must be at least 4 bytes for all sensors */ 6983 /* size must be at least 4 bytes for all sensors */
6954 if (*size < 4) 6984 if (*size < 4)
@@ -7003,7 +7033,7 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = {
7003 .set_powergating_state = ci_dpm_set_powergating_state, 7033 .set_powergating_state = ci_dpm_set_powergating_state,
7004}; 7034};
7005 7035
7006static const struct amdgpu_dpm_funcs ci_dpm_funcs = { 7036const struct amd_pm_funcs ci_dpm_funcs = {
7007 .get_temperature = &ci_dpm_get_temp, 7037 .get_temperature = &ci_dpm_get_temp,
7008 .pre_set_power_state = &ci_dpm_pre_set_power_state, 7038 .pre_set_power_state = &ci_dpm_pre_set_power_state,
7009 .set_power_state = &ci_dpm_set_power_state, 7039 .set_power_state = &ci_dpm_set_power_state,
@@ -7035,12 +7065,6 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
7035 .read_sensor = ci_dpm_read_sensor, 7065 .read_sensor = ci_dpm_read_sensor,
7036}; 7066};
7037 7067
7038static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
7039{
7040 if (adev->pm.funcs == NULL)
7041 adev->pm.funcs = &ci_dpm_funcs;
7042}
7043
7044static const struct amdgpu_irq_src_funcs ci_dpm_irq_funcs = { 7068static const struct amdgpu_irq_src_funcs ci_dpm_irq_funcs = {
7045 .set = ci_dpm_set_interrupt_state, 7069 .set = ci_dpm_set_interrupt_state,
7046 .process = ci_dpm_process_interrupt, 7070 .process = ci_dpm_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
index b1c8e7b446ea..c7b4349f6319 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
@@ -26,5 +26,6 @@
26 26
27extern const struct amd_ip_funcs ci_dpm_ip_funcs; 27extern const struct amd_ip_funcs ci_dpm_ip_funcs;
28extern const struct amd_ip_funcs kv_dpm_ip_funcs; 28extern const struct amd_ip_funcs kv_dpm_ip_funcs;
29 29extern const struct amd_pm_funcs ci_dpm_funcs;
30extern const struct amd_pm_funcs kv_dpm_funcs;
30#endif 31#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index b8918432c572..07d3d895da10 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -228,6 +228,19 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
228 * [127:96] - reserved 228 * [127:96] - reserved
229 */ 229 */
230 230
231/**
232 * cik_ih_prescreen_iv - prescreen an interrupt vector
233 *
234 * @adev: amdgpu_device pointer
235 *
236 * Returns true if the interrupt vector should be further processed.
237 */
238static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
239{
240 /* Process all interrupts */
241 return true;
242}
243
231 /** 244 /**
232 * cik_ih_decode_iv - decode an interrupt vector 245 * cik_ih_decode_iv - decode an interrupt vector
233 * 246 *
@@ -433,6 +446,7 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
433 446
434static const struct amdgpu_ih_funcs cik_ih_funcs = { 447static const struct amdgpu_ih_funcs cik_ih_funcs = {
435 .get_wptr = cik_ih_get_wptr, 448 .get_wptr = cik_ih_get_wptr,
449 .prescreen_iv = cik_ih_prescreen_iv,
436 .decode_iv = cik_ih_decode_iv, 450 .decode_iv = cik_ih_decode_iv,
437 .set_rptr = cik_ih_set_rptr 451 .set_rptr = cik_ih_set_rptr
438}; 452};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index f508f4d01e4a..60cecd117705 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1387,8 +1387,13 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
1387} 1387}
1388 1388
1389static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { 1389static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
1390 .copy_pte_num_dw = 7,
1390 .copy_pte = cik_sdma_vm_copy_pte, 1391 .copy_pte = cik_sdma_vm_copy_pte,
1392
1391 .write_pte = cik_sdma_vm_write_pte, 1393 .write_pte = cik_sdma_vm_write_pte,
1394
1395 .set_max_nums_pte_pde = 0x1fffff >> 3,
1396 .set_pte_pde_num_dw = 10,
1392 .set_pte_pde = cik_sdma_vm_set_pte_pde, 1397 .set_pte_pde = cik_sdma_vm_set_pte_pde,
1393}; 1398};
1394 1399
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 0c1209cdd1cb..b6cdf4afaf46 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -208,6 +208,19 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
208} 208}
209 209
210/** 210/**
211 * cz_ih_prescreen_iv - prescreen an interrupt vector
212 *
213 * @adev: amdgpu_device pointer
214 *
215 * Returns true if the interrupt vector should be further processed.
216 */
217static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
218{
219 /* Process all interrupts */
220 return true;
221}
222
223/**
211 * cz_ih_decode_iv - decode an interrupt vector 224 * cz_ih_decode_iv - decode an interrupt vector
212 * 225 *
213 * @adev: amdgpu_device pointer 226 * @adev: amdgpu_device pointer
@@ -414,6 +427,7 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
414 427
415static const struct amdgpu_ih_funcs cz_ih_funcs = { 428static const struct amdgpu_ih_funcs cz_ih_funcs = {
416 .get_wptr = cz_ih_get_wptr, 429 .get_wptr = cz_ih_get_wptr,
430 .prescreen_iv = cz_ih_prescreen_iv,
417 .decode_iv = cz_ih_decode_iv, 431 .decode_iv = cz_ih_decode_iv,
418 .set_rptr = cz_ih_set_rptr 432 .set_rptr = cz_ih_set_rptr
419}; 433};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index d228f5a99044..dbbe986f90f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
636 NUM_BANKS(ADDR_SURF_2_BANK); 636 NUM_BANKS(ADDR_SURF_2_BANK);
637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); 638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
639 } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { 639 } else if (adev->asic_type == CHIP_OLAND) {
640 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
641 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
642 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
644 NUM_BANKS(ADDR_SURF_16_BANK) |
645 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
648 tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
649 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
650 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
652 NUM_BANKS(ADDR_SURF_16_BANK) |
653 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
656 tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
657 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
658 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
660 NUM_BANKS(ADDR_SURF_16_BANK) |
661 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
664 tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
665 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
666 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
668 NUM_BANKS(ADDR_SURF_16_BANK) |
669 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
672 tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
673 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
674 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
675 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
676 NUM_BANKS(ADDR_SURF_16_BANK) |
677 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
680 tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
681 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
682 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
683 TILE_SPLIT(split_equal_to_row_size) |
684 NUM_BANKS(ADDR_SURF_16_BANK) |
685 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
688 tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
689 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
690 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
691 TILE_SPLIT(split_equal_to_row_size) |
692 NUM_BANKS(ADDR_SURF_16_BANK) |
693 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
696 tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
697 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
698 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
699 TILE_SPLIT(split_equal_to_row_size) |
700 NUM_BANKS(ADDR_SURF_16_BANK) |
701 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
704 tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
705 ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
706 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
707 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
708 NUM_BANKS(ADDR_SURF_16_BANK) |
709 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
712 tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
713 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
714 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
715 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
716 NUM_BANKS(ADDR_SURF_16_BANK) |
717 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
720 tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
721 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
722 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
723 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
724 NUM_BANKS(ADDR_SURF_16_BANK) |
725 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
728 tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
729 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
730 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
732 NUM_BANKS(ADDR_SURF_16_BANK) |
733 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
736 tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
737 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
738 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
740 NUM_BANKS(ADDR_SURF_16_BANK) |
741 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
744 tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
745 ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
746 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
747 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
748 NUM_BANKS(ADDR_SURF_16_BANK) |
749 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
752 tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
753 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
754 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
756 NUM_BANKS(ADDR_SURF_16_BANK) |
757 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
758 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
759 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
760 tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
761 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
762 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
764 NUM_BANKS(ADDR_SURF_16_BANK) |
765 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
768 tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
769 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
770 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
771 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
772 NUM_BANKS(ADDR_SURF_16_BANK) |
773 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
776 tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
777 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
778 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
779 TILE_SPLIT(split_equal_to_row_size) |
780 NUM_BANKS(ADDR_SURF_16_BANK) |
781 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
784 tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
785 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
786 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
788 NUM_BANKS(ADDR_SURF_16_BANK) |
789 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
792 tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
793 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
794 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
796 NUM_BANKS(ADDR_SURF_16_BANK) |
797 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
800 tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
801 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
802 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
804 NUM_BANKS(ADDR_SURF_16_BANK) |
805 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
808 tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
809 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
810 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
812 NUM_BANKS(ADDR_SURF_16_BANK) |
813 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
816 tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
817 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
818 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
820 NUM_BANKS(ADDR_SURF_8_BANK) |
821 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
824 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
825 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
826 } else if (adev->asic_type == CHIP_HAINAN) {
640 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 827 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
641 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 828 ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
642 PIPE_CONFIG(ADDR_SURF_P2) | 829 PIPE_CONFIG(ADDR_SURF_P2) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 53a4af7596c1..00868764a0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1921,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
1921 ELEMENT_SIZE, 1); 1921 ELEMENT_SIZE, 1);
1922 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 1922 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
1923 INDEX_STRIDE, 3); 1923 INDEX_STRIDE, 3);
1924 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1924 1925
1925 mutex_lock(&adev->srbm_mutex); 1926 mutex_lock(&adev->srbm_mutex);
1926 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 1927 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
@@ -1934,7 +1935,6 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
1934 WREG32(mmSH_MEM_APE1_BASE, 1); 1935 WREG32(mmSH_MEM_APE1_BASE, 1);
1935 WREG32(mmSH_MEM_APE1_LIMIT, 0); 1936 WREG32(mmSH_MEM_APE1_LIMIT, 0);
1936 WREG32(mmSH_MEM_BASES, sh_mem_base); 1937 WREG32(mmSH_MEM_BASES, sh_mem_base);
1937 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
1938 } 1938 }
1939 cik_srbm_select(adev, 0, 0, 0, 0); 1939 cik_srbm_select(adev, 0, 0, 0, 0);
1940 mutex_unlock(&adev->srbm_mutex); 1940 mutex_unlock(&adev->srbm_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0710b0b2e4b6..dfc10b1baea0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -125,24 +125,39 @@ MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 126
127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 131MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 133MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
134MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 135MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
136MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 137MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 138
134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 139MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
140MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 141MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
142MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 143MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
144MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 145MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
146MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 147MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
148MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 149MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 150
141MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 151MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
152MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
142MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 153MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
154MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
143MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 155MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
156MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
144MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 157MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
158MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
145MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 159MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
160MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
146MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 161MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
147 162
148static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 163static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
@@ -918,8 +933,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
918 BUG(); 933 BUG();
919 } 934 }
920 935
921 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 936 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
922 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 937 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
938 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
939 if (err == -ENOENT) {
940 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
941 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
942 }
943 } else {
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
945 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
946 }
923 if (err) 947 if (err)
924 goto out; 948 goto out;
925 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 949 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
@@ -929,8 +953,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
929 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 953 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 954 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931 955
932 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 956 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
933 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 957 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
958 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
959 if (err == -ENOENT) {
960 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
961 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
962 }
963 } else {
964 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
965 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
966 }
934 if (err) 967 if (err)
935 goto out; 968 goto out;
936 err = amdgpu_ucode_validate(adev->gfx.me_fw); 969 err = amdgpu_ucode_validate(adev->gfx.me_fw);
@@ -941,8 +974,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
941 974
942 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 975 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943 976
944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 977 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
945 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 978 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
979 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
980 if (err == -ENOENT) {
981 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
982 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
983 }
984 } else {
985 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
986 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
987 }
946 if (err) 988 if (err)
947 goto out; 989 goto out;
948 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 990 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
@@ -1012,8 +1054,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
1012 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1054 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1013 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1055 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1014 1056
1015 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1057 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1016 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1058 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1059 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1060 if (err == -ENOENT) {
1061 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1062 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1063 }
1064 } else {
1065 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1066 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1067 }
1017 if (err) 1068 if (err)
1018 goto out; 1069 goto out;
1019 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1070 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
@@ -1025,8 +1076,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
1025 1076
1026 if ((adev->asic_type != CHIP_STONEY) && 1077 if ((adev->asic_type != CHIP_STONEY) &&
1027 (adev->asic_type != CHIP_TOPAZ)) { 1078 (adev->asic_type != CHIP_TOPAZ)) {
1028 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1079 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1029 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1080 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1081 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1082 if (err == -ENOENT) {
1083 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1084 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1085 }
1086 } else {
1087 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1088 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1089 }
1030 if (!err) { 1090 if (!err) {
1031 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1091 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1032 if (err) 1092 if (err)
@@ -2053,6 +2113,7 @@ static int gfx_v8_0_sw_fini(void *handle)
2053 amdgpu_gfx_compute_mqd_sw_fini(adev); 2113 amdgpu_gfx_compute_mqd_sw_fini(adev);
2054 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2114 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2055 amdgpu_gfx_kiq_fini(adev); 2115 amdgpu_gfx_kiq_fini(adev);
2116 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2056 2117
2057 gfx_v8_0_mec_fini(adev); 2118 gfx_v8_0_mec_fini(adev);
2058 gfx_v8_0_rlc_fini(adev); 2119 gfx_v8_0_rlc_fini(adev);
@@ -3707,6 +3768,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3707 ELEMENT_SIZE, 1); 3768 ELEMENT_SIZE, 1);
3708 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3769 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3709 INDEX_STRIDE, 3); 3770 INDEX_STRIDE, 3);
3771 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3772
3710 mutex_lock(&adev->srbm_mutex); 3773 mutex_lock(&adev->srbm_mutex);
3711 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3774 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3712 vi_srbm_select(adev, 0, 0, 0, i); 3775 vi_srbm_select(adev, 0, 0, 0, i);
@@ -3730,7 +3793,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3730 3793
3731 WREG32(mmSH_MEM_APE1_BASE, 1); 3794 WREG32(mmSH_MEM_APE1_BASE, 1);
3732 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3795 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3733 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3734 } 3796 }
3735 vi_srbm_select(adev, 0, 0, 0, 0); 3797 vi_srbm_select(adev, 0, 0, 0, 0);
3736 mutex_unlock(&adev->srbm_mutex); 3798 mutex_unlock(&adev->srbm_mutex);
@@ -4576,12 +4638,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4576 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4638 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4577 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4639 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4578 mqd->compute_misc_reserved = 0x00000003; 4640 mqd->compute_misc_reserved = 0x00000003;
4579 if (!(adev->flags & AMD_IS_APU)) { 4641 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4580 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4642 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4581 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); 4643 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4582 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4644 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4583 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4584 }
4585 eop_base_addr = ring->eop_gpu_addr >> 8; 4645 eop_base_addr = ring->eop_gpu_addr >> 8;
4586 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4646 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4587 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4647 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4752,7 +4812,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4752 4812
4753 gfx_v8_0_kiq_setting(ring); 4813 gfx_v8_0_kiq_setting(ring);
4754 4814
4755 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4815 if (adev->in_sriov_reset) { /* for GPU_RESET case */
4756 /* reset MQD to a clean status */ 4816 /* reset MQD to a clean status */
4757 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4817 if (adev->gfx.mec.mqd_backup[mqd_idx])
4758 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4818 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
@@ -4767,8 +4827,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4767 mutex_unlock(&adev->srbm_mutex); 4827 mutex_unlock(&adev->srbm_mutex);
4768 } else { 4828 } else {
4769 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4829 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4770 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; 4830 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4771 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; 4831 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4772 mutex_lock(&adev->srbm_mutex); 4832 mutex_lock(&adev->srbm_mutex);
4773 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4833 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4774 gfx_v8_0_mqd_init(ring); 4834 gfx_v8_0_mqd_init(ring);
@@ -4789,10 +4849,10 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4789 struct vi_mqd *mqd = ring->mqd_ptr; 4849 struct vi_mqd *mqd = ring->mqd_ptr;
4790 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4850 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4791 4851
4792 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 4852 if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4793 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4853 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4794 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF; 4854 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4795 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF; 4855 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4796 mutex_lock(&adev->srbm_mutex); 4856 mutex_lock(&adev->srbm_mutex);
4797 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4857 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4798 gfx_v8_0_mqd_init(ring); 4858 gfx_v8_0_mqd_init(ring);
@@ -4801,7 +4861,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4801 4861
4802 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4862 if (adev->gfx.mec.mqd_backup[mqd_idx])
4803 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4863 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4804 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4864 } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4805 /* reset MQD to a clean status */ 4865 /* reset MQD to a clean status */
4806 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4866 if (adev->gfx.mec.mqd_backup[mqd_idx])
4807 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4867 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
@@ -4974,12 +5034,69 @@ static int gfx_v8_0_hw_init(void *handle)
4974 return r; 5034 return r;
4975} 5035}
4976 5036
5037static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5038{
5039 struct amdgpu_device *adev = kiq_ring->adev;
5040 uint32_t scratch, tmp = 0;
5041 int r, i;
5042
5043 r = amdgpu_gfx_scratch_get(adev, &scratch);
5044 if (r) {
5045 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5046 return r;
5047 }
5048 WREG32(scratch, 0xCAFEDEAD);
5049
5050 r = amdgpu_ring_alloc(kiq_ring, 10);
5051 if (r) {
5052 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5053 amdgpu_gfx_scratch_free(adev, scratch);
5054 return r;
5055 }
5056
5057 /* unmap queues */
5058 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5059 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5060 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5061 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5062 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5063 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5064 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5065 amdgpu_ring_write(kiq_ring, 0);
5066 amdgpu_ring_write(kiq_ring, 0);
5067 amdgpu_ring_write(kiq_ring, 0);
5068 /* write to scratch for completion */
5069 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5070 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5071 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5072 amdgpu_ring_commit(kiq_ring);
5073
5074 for (i = 0; i < adev->usec_timeout; i++) {
5075 tmp = RREG32(scratch);
5076 if (tmp == 0xDEADBEEF)
5077 break;
5078 DRM_UDELAY(1);
5079 }
5080 if (i >= adev->usec_timeout) {
5081 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5082 r = -EINVAL;
5083 }
5084 amdgpu_gfx_scratch_free(adev, scratch);
5085 return r;
5086}
5087
4977static int gfx_v8_0_hw_fini(void *handle) 5088static int gfx_v8_0_hw_fini(void *handle)
4978{ 5089{
4979 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5090 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5091 int i;
4980 5092
4981 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5093 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4982 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5094 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5095
5096 /* disable KCQ to avoid CPC touch memory not valid anymore */
5097 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5098 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5099
4983 if (amdgpu_sriov_vf(adev)) { 5100 if (amdgpu_sriov_vf(adev)) {
4984 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5101 pr_debug("For SRIOV client, shouldn't do anything.\n");
4985 return 0; 5102 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b39f81dda847..deeaee1457ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -66,38 +66,70 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
66 66
67static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 67static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
68{ 68{
69 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 69 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE),
70 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, 70 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
71 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), 71 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0),
72 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, 72 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) },
73 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), 73 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE),
74 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, 74 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
75 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), 75 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1),
76 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, 76 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1) },
77 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), 77 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE),
78 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, 78 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
79 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), 79 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2),
80 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, 80 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2) },
81 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), 81 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE),
82 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, 82 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
83 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), 83 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3),
84 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, 84 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3) },
85 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), 85 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE),
86 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, 86 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE),
87 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), 87 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4),
88 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, 88 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4) },
89 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), 89 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE),
90 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, 90 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE),
91 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), 91 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5),
92 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, 92 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5) },
93 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), 93 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE),
94 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, 94 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE),
95 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), 95 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6),
96 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, 96 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6) },
97 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), 97 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE),
98 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, 98 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE),
99 {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), 99 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7),
100 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} 100 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7) },
101 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE),
102 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE),
103 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8),
104 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8) },
105 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE),
106 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE),
107 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9),
108 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9) },
109 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE),
110 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE),
111 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10),
112 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10) },
113 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE),
114 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE),
115 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11),
116 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11) },
117 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE),
118 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE),
119 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12),
120 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)},
121 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE),
122 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE),
123 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13),
124 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13) },
125 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE),
126 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE),
127 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14),
128 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14) },
129 { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE),
130 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE),
131 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15),
132 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15) }
101}; 133};
102 134
103static const u32 golden_settings_gc_9_0[] = 135static const u32 golden_settings_gc_9_0[] =
@@ -352,6 +384,25 @@ err1:
352 return r; 384 return r;
353} 385}
354 386
387
388static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
389{
390 release_firmware(adev->gfx.pfp_fw);
391 adev->gfx.pfp_fw = NULL;
392 release_firmware(adev->gfx.me_fw);
393 adev->gfx.me_fw = NULL;
394 release_firmware(adev->gfx.ce_fw);
395 adev->gfx.ce_fw = NULL;
396 release_firmware(adev->gfx.rlc_fw);
397 adev->gfx.rlc_fw = NULL;
398 release_firmware(adev->gfx.mec_fw);
399 adev->gfx.mec_fw = NULL;
400 release_firmware(adev->gfx.mec2_fw);
401 adev->gfx.mec2_fw = NULL;
402
403 kfree(adev->gfx.rlc.register_list_format);
404}
405
355static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 406static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
356{ 407{
357 const char *chip_name; 408 const char *chip_name;
@@ -1120,30 +1171,22 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1120{ 1171{
1121 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1172 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1122 int r; 1173 int r;
1123 u32 data; 1174 u32 data, base;
1124 u32 size;
1125 u32 base;
1126 1175
1127 if (!amdgpu_ngg) 1176 if (!amdgpu_ngg)
1128 return 0; 1177 return 0;
1129 1178
1130 /* Program buffer size */ 1179 /* Program buffer size */
1131 data = 0; 1180 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1132 size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; 1181 adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1133 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); 1182 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1134 1183 adev->gfx.ngg.buf[NGG_POS].size >> 8);
1135 size = adev->gfx.ngg.buf[NGG_POS].size / 256;
1136 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
1137
1138 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1184 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1139 1185
1140 data = 0; 1186 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1141 size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; 1187 adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1142 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); 1188 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1143 1189 adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1144 size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024;
1145 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
1146
1147 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1190 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1148 1191
1149 /* Program buffer base address */ 1192 /* Program buffer base address */
@@ -1306,7 +1349,10 @@ static int gfx_v9_0_sw_init(void *handle)
1306 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1349 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1307 ring = &adev->gfx.gfx_ring[i]; 1350 ring = &adev->gfx.gfx_ring[i];
1308 ring->ring_obj = NULL; 1351 ring->ring_obj = NULL;
1309 sprintf(ring->name, "gfx"); 1352 if (!i)
1353 sprintf(ring->name, "gfx");
1354 else
1355 sprintf(ring->name, "gfx_%d", i);
1310 ring->use_doorbell = true; 1356 ring->use_doorbell = true;
1311 ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; 1357 ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
1312 r = amdgpu_ring_init(adev, ring, 1024, 1358 r = amdgpu_ring_init(adev, ring, 1024,
@@ -1346,7 +1392,7 @@ static int gfx_v9_0_sw_init(void *handle)
1346 return r; 1392 return r;
1347 1393
1348 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1394 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1349 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); 1395 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1350 if (r) 1396 if (r)
1351 return r; 1397 return r;
1352 1398
@@ -1398,9 +1444,11 @@ static int gfx_v9_0_sw_fini(void *handle)
1398 amdgpu_gfx_compute_mqd_sw_fini(adev); 1444 amdgpu_gfx_compute_mqd_sw_fini(adev);
1399 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1445 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1400 amdgpu_gfx_kiq_fini(adev); 1446 amdgpu_gfx_kiq_fini(adev);
1447 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
1401 1448
1402 gfx_v9_0_mec_fini(adev); 1449 gfx_v9_0_mec_fini(adev);
1403 gfx_v9_0_ngg_fini(adev); 1450 gfx_v9_0_ngg_fini(adev);
1451 gfx_v9_0_free_microcode(adev);
1404 1452
1405 return 0; 1453 return 0;
1406} 1454}
@@ -1740,11 +1788,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1740 1788
1741static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 1789static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
1742{ 1790{
1743 u32 tmp = 0; 1791 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
1744
1745 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1746 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1747 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1748} 1792}
1749 1793
1750static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 1794static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
@@ -1822,16 +1866,11 @@ static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev
1822 uint32_t default_data = 0; 1866 uint32_t default_data = 0;
1823 1867
1824 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1868 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1825 1869 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1826 if (enable == true) { 1870 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
1827 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; 1871 enable ? 1 : 0);
1828 if (default_data != data) 1872 if (default_data != data)
1829 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1830 } else {
1831 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
1832 if(default_data != data)
1833 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1834 }
1835} 1874}
1836 1875
1837static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 1876static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
@@ -1841,16 +1880,11 @@ static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *ad
1841 uint32_t default_data = 0; 1880 uint32_t default_data = 0;
1842 1881
1843 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1882 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1844 1883 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1845 if (enable == true) { 1884 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
1846 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; 1885 enable ? 1 : 0);
1847 if(default_data != data) 1886 if(default_data != data)
1848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1887 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1849 } else {
1850 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
1851 if(default_data != data)
1852 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1853 }
1854} 1888}
1855 1889
1856static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 1890static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
@@ -1860,16 +1894,11 @@ static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
1860 uint32_t default_data = 0; 1894 uint32_t default_data = 0;
1861 1895
1862 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1896 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1863 1897 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1864 if (enable == true) { 1898 CP_PG_DISABLE,
1865 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; 1899 enable ? 0 : 1);
1866 if(default_data != data) 1900 if(default_data != data)
1867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1901 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1868 } else {
1869 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
1870 if(default_data != data)
1871 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1872 }
1873} 1902}
1874 1903
1875static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 1904static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
@@ -1878,10 +1907,9 @@ static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
1878 uint32_t data, default_data; 1907 uint32_t data, default_data;
1879 1908
1880 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1909 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1881 if (enable == true) 1910 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1882 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 1911 GFX_POWER_GATING_ENABLE,
1883 else 1912 enable ? 1 : 0);
1884 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
1885 if(default_data != data) 1913 if(default_data != data)
1886 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1914 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1887} 1915}
@@ -1892,10 +1920,9 @@ static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
1892 uint32_t data, default_data; 1920 uint32_t data, default_data;
1893 1921
1894 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1922 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1895 if (enable == true) 1923 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1896 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; 1924 GFX_PIPELINE_PG_ENABLE,
1897 else 1925 enable ? 1 : 0);
1898 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
1899 if(default_data != data) 1926 if(default_data != data)
1900 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1927 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1901 1928
@@ -1910,10 +1937,9 @@ static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
1910 uint32_t data, default_data; 1937 uint32_t data, default_data;
1911 1938
1912 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1939 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1913 if (enable == true) 1940 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1914 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 1941 STATIC_PER_CU_PG_ENABLE,
1915 else 1942 enable ? 1 : 0);
1916 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
1917 if(default_data != data) 1943 if(default_data != data)
1918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1944 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1919} 1945}
@@ -1924,10 +1950,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
1924 uint32_t data, default_data; 1950 uint32_t data, default_data;
1925 1951
1926 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 1952 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1927 if (enable == true) 1953 data = REG_SET_FIELD(data, RLC_PG_CNTL,
1928 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 1954 DYN_PER_CU_PG_ENABLE,
1929 else 1955 enable ? 1 : 0);
1930 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
1931 if(default_data != data) 1956 if(default_data != data)
1932 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 1957 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1933} 1958}
@@ -1967,13 +1992,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
1967 1992
1968void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 1993void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
1969{ 1994{
1970 u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); 1995 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
1971
1972 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1973 WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
1974
1975 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 1996 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
1976
1977 gfx_v9_0_wait_for_rlc_serdes(adev); 1997 gfx_v9_0_wait_for_rlc_serdes(adev);
1978} 1998}
1979 1999
@@ -2045,8 +2065,10 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2045{ 2065{
2046 int r; 2066 int r;
2047 2067
2048 if (amdgpu_sriov_vf(adev)) 2068 if (amdgpu_sriov_vf(adev)) {
2069 gfx_v9_0_init_csb(adev);
2049 return 0; 2070 return 0;
2071 }
2050 2072
2051 gfx_v9_0_rlc_stop(adev); 2073 gfx_v9_0_rlc_stop(adev);
2052 2074
@@ -2157,7 +2179,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2157 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2179 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2158 const struct cs_section_def *sect = NULL; 2180 const struct cs_section_def *sect = NULL;
2159 const struct cs_extent_def *ext = NULL; 2181 const struct cs_extent_def *ext = NULL;
2160 int r, i; 2182 int r, i, tmp;
2161 2183
2162 /* init the CP */ 2184 /* init the CP */
2163 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2185 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
@@ -2165,7 +2187,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2165 2187
2166 gfx_v9_0_cp_gfx_enable(adev, true); 2188 gfx_v9_0_cp_gfx_enable(adev, true);
2167 2189
2168 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); 2190 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2169 if (r) { 2191 if (r) {
2170 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2192 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2171 return r; 2193 return r;
@@ -2203,6 +2225,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2203 amdgpu_ring_write(ring, 0x8000); 2225 amdgpu_ring_write(ring, 0x8000);
2204 amdgpu_ring_write(ring, 0x8000); 2226 amdgpu_ring_write(ring, 0x8000);
2205 2227
2228 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2229 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2230 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2231 amdgpu_ring_write(ring, tmp);
2232 amdgpu_ring_write(ring, 0);
2233
2206 amdgpu_ring_commit(ring); 2234 amdgpu_ring_commit(ring);
2207 2235
2208 return 0; 2236 return 0;
@@ -2457,6 +2485,13 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2457 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2485 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2458 mqd->compute_misc_reserved = 0x00000003; 2486 mqd->compute_misc_reserved = 0x00000003;
2459 2487
2488 mqd->dynamic_cu_mask_addr_lo =
2489 lower_32_bits(ring->mqd_gpu_addr
2490 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2491 mqd->dynamic_cu_mask_addr_hi =
2492 upper_32_bits(ring->mqd_gpu_addr
2493 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2494
2460 eop_base_addr = ring->eop_gpu_addr >> 8; 2495 eop_base_addr = ring->eop_gpu_addr >> 8;
2461 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2496 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2462 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2497 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -2480,10 +2515,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2480 DOORBELL_SOURCE, 0); 2515 DOORBELL_SOURCE, 0);
2481 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2516 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2482 DOORBELL_HIT, 0); 2517 DOORBELL_HIT, 0);
2483 } 2518 } else {
2484 else
2485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2519 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2486 DOORBELL_EN, 0); 2520 DOORBELL_EN, 0);
2521 }
2487 2522
2488 mqd->cp_hqd_pq_doorbell_control = tmp; 2523 mqd->cp_hqd_pq_doorbell_control = tmp;
2489 2524
@@ -2686,10 +2721,10 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2686 2721
2687 gfx_v9_0_kiq_setting(ring); 2722 gfx_v9_0_kiq_setting(ring);
2688 2723
2689 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 2724 if (adev->in_sriov_reset) { /* for GPU_RESET case */
2690 /* reset MQD to a clean status */ 2725 /* reset MQD to a clean status */
2691 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2726 if (adev->gfx.mec.mqd_backup[mqd_idx])
2692 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 2727 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2693 2728
2694 /* reset ring buffer */ 2729 /* reset ring buffer */
2695 ring->wptr = 0; 2730 ring->wptr = 0;
@@ -2701,7 +2736,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2701 soc15_grbm_select(adev, 0, 0, 0, 0); 2736 soc15_grbm_select(adev, 0, 0, 0, 0);
2702 mutex_unlock(&adev->srbm_mutex); 2737 mutex_unlock(&adev->srbm_mutex);
2703 } else { 2738 } else {
2704 memset((void *)mqd, 0, sizeof(*mqd)); 2739 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2740 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2741 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2705 mutex_lock(&adev->srbm_mutex); 2742 mutex_lock(&adev->srbm_mutex);
2706 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2743 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2707 gfx_v9_0_mqd_init(ring); 2744 gfx_v9_0_mqd_init(ring);
@@ -2710,7 +2747,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2710 mutex_unlock(&adev->srbm_mutex); 2747 mutex_unlock(&adev->srbm_mutex);
2711 2748
2712 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2749 if (adev->gfx.mec.mqd_backup[mqd_idx])
2713 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 2750 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2714 } 2751 }
2715 2752
2716 return 0; 2753 return 0;
@@ -2722,8 +2759,10 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2722 struct v9_mqd *mqd = ring->mqd_ptr; 2759 struct v9_mqd *mqd = ring->mqd_ptr;
2723 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 2760 int mqd_idx = ring - &adev->gfx.compute_ring[0];
2724 2761
2725 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 2762 if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
2726 memset((void *)mqd, 0, sizeof(*mqd)); 2763 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2764 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2765 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2727 mutex_lock(&adev->srbm_mutex); 2766 mutex_lock(&adev->srbm_mutex);
2728 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2767 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2729 gfx_v9_0_mqd_init(ring); 2768 gfx_v9_0_mqd_init(ring);
@@ -2731,11 +2770,11 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2731 mutex_unlock(&adev->srbm_mutex); 2770 mutex_unlock(&adev->srbm_mutex);
2732 2771
2733 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2772 if (adev->gfx.mec.mqd_backup[mqd_idx])
2734 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 2773 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2735 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 2774 } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
2736 /* reset MQD to a clean status */ 2775 /* reset MQD to a clean status */
2737 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2776 if (adev->gfx.mec.mqd_backup[mqd_idx])
2738 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 2777 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2739 2778
2740 /* reset ring buffer */ 2779 /* reset ring buffer */
2741 ring->wptr = 0; 2780 ring->wptr = 0;
@@ -2876,12 +2915,70 @@ static int gfx_v9_0_hw_init(void *handle)
2876 return r; 2915 return r;
2877} 2916}
2878 2917
2918static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
2919{
2920 struct amdgpu_device *adev = kiq_ring->adev;
2921 uint32_t scratch, tmp = 0;
2922 int r, i;
2923
2924 r = amdgpu_gfx_scratch_get(adev, &scratch);
2925 if (r) {
2926 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2927 return r;
2928 }
2929 WREG32(scratch, 0xCAFEDEAD);
2930
2931 r = amdgpu_ring_alloc(kiq_ring, 10);
2932 if (r) {
2933 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2934 amdgpu_gfx_scratch_free(adev, scratch);
2935 return r;
2936 }
2937
2938 /* unmap queues */
2939 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
2940 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2941 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
2942 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
2943 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
2944 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
2945 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
2946 amdgpu_ring_write(kiq_ring, 0);
2947 amdgpu_ring_write(kiq_ring, 0);
2948 amdgpu_ring_write(kiq_ring, 0);
2949 /* write to scratch for completion */
2950 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2951 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2952 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2953 amdgpu_ring_commit(kiq_ring);
2954
2955 for (i = 0; i < adev->usec_timeout; i++) {
2956 tmp = RREG32(scratch);
2957 if (tmp == 0xDEADBEEF)
2958 break;
2959 DRM_UDELAY(1);
2960 }
2961 if (i >= adev->usec_timeout) {
2962 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
2963 r = -EINVAL;
2964 }
2965 amdgpu_gfx_scratch_free(adev, scratch);
2966 return r;
2967}
2968
2969
2879static int gfx_v9_0_hw_fini(void *handle) 2970static int gfx_v9_0_hw_fini(void *handle)
2880{ 2971{
2881 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2972 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2973 int i;
2882 2974
2883 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 2975 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
2884 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 2976 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
2977
2978 /* disable KCQ to avoid CPC touch memory not valid anymore */
2979 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2980 gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
2981
2885 if (amdgpu_sriov_vf(adev)) { 2982 if (amdgpu_sriov_vf(adev)) {
2886 pr_debug("For SRIOV client, shouldn't do anything.\n"); 2983 pr_debug("For SRIOV client, shouldn't do anything.\n");
2887 return 0; 2984 return 0;
@@ -2924,15 +3021,10 @@ static bool gfx_v9_0_is_idle(void *handle)
2924static int gfx_v9_0_wait_for_idle(void *handle) 3021static int gfx_v9_0_wait_for_idle(void *handle)
2925{ 3022{
2926 unsigned i; 3023 unsigned i;
2927 u32 tmp;
2928 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3024 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2929 3025
2930 for (i = 0; i < adev->usec_timeout; i++) { 3026 for (i = 0; i < adev->usec_timeout; i++) {
2931 /* read MC_STATUS */ 3027 if (gfx_v9_0_is_idle(handle))
2932 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
2933 GRBM_STATUS__GUI_ACTIVE_MASK;
2934
2935 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
2936 return 0; 3028 return 0;
2937 udelay(1); 3029 udelay(1);
2938 } 3030 }
@@ -3493,7 +3585,9 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3493 u32 ref_and_mask, reg_mem_engine; 3585 u32 ref_and_mask, reg_mem_engine;
3494 struct nbio_hdp_flush_reg *nbio_hf_reg; 3586 struct nbio_hdp_flush_reg *nbio_hf_reg;
3495 3587
3496 if (ring->adev->asic_type == CHIP_VEGA10) 3588 if (ring->adev->flags & AMD_IS_APU)
3589 nbio_hf_reg = &nbio_v7_0_hdp_flush_reg;
3590 else
3497 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; 3591 nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
3498 3592
3499 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3593 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
@@ -3522,7 +3616,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3522static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 3616static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
3523{ 3617{
3524 gfx_v9_0_write_data_to_reg(ring, 0, true, 3618 gfx_v9_0_write_data_to_reg(ring, 0, true,
3525 SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); 3619 SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
3526} 3620}
3527 3621
3528static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 3622static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
@@ -3751,6 +3845,12 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
3751 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 3845 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
3752} 3846}
3753 3847
3848static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
3849{
3850 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
3851 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
3852}
3853
3754static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 3854static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
3755{ 3855{
3756 uint32_t dw2 = 0; 3856 uint32_t dw2 = 0;
@@ -3758,6 +3858,8 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
3758 if (amdgpu_sriov_vf(ring->adev)) 3858 if (amdgpu_sriov_vf(ring->adev))
3759 gfx_v9_0_ring_emit_ce_meta(ring); 3859 gfx_v9_0_ring_emit_ce_meta(ring);
3760 3860
3861 gfx_v9_0_ring_emit_tmz(ring, true);
3862
3761 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 3863 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
3762 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 3864 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
3763 /* set load_global_config & load_global_uconfig */ 3865 /* set load_global_config & load_global_uconfig */
@@ -3808,12 +3910,6 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
3808 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 3910 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
3809} 3911}
3810 3912
3811static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
3812{
3813 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
3814 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
3815}
3816
3817static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 3913static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
3818{ 3914{
3819 struct amdgpu_device *adev = ring->adev; 3915 struct amdgpu_device *adev = ring->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 408723ef157c..c17996e18086 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -144,8 +144,8 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
144 WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); 144 WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
145 145
146 tmp = mmVM_L2_CNTL3_DEFAULT; 146 tmp = mmVM_L2_CNTL3_DEFAULT;
147 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); 147 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
148 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); 148 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
149 WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); 149 WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
150 150
151 tmp = mmVM_L2_CNTL4_DEFAULT; 151 tmp = mmVM_L2_CNTL4_DEFAULT;
@@ -319,6 +319,12 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
319 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 319 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
320 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 320 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
321 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 321 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
322 if (!value) {
323 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
324 CRASH_ON_NO_RETRY_FAULT, 1);
325 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
326 CRASH_ON_RETRY_FAULT, 1);
327 }
322 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); 328 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
323} 329}
324 330
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 93c45f26b7c8..f4603a7c8ef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -332,7 +332,24 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
332 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 332 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
333 adev->mc.visible_vram_size = adev->mc.aper_size; 333 adev->mc.visible_vram_size = adev->mc.aper_size;
334 334
335 amdgpu_gart_set_defaults(adev); 335 /* set the gart size */
336 if (amdgpu_gart_size == -1) {
337 switch (adev->asic_type) {
338 case CHIP_HAINAN: /* no MM engines */
339 default:
340 adev->mc.gart_size = 256ULL << 20;
341 break;
342 case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
343 case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
344 case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
345 case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
346 adev->mc.gart_size = 1024ULL << 20;
347 break;
348 }
349 } else {
350 adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
351 }
352
336 gmc_v6_0_vram_gtt_location(adev, &adev->mc); 353 gmc_v6_0_vram_gtt_location(adev, &adev->mc);
337 354
338 return 0; 355 return 0;
@@ -461,6 +478,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
461static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) 478static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
462{ 479{
463 int r, i; 480 int r, i;
481 u32 field;
464 482
465 if (adev->gart.robj == NULL) { 483 if (adev->gart.robj == NULL) {
466 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 484 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -488,10 +506,12 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
488 WREG32(mmVM_L2_CNTL2, 506 WREG32(mmVM_L2_CNTL2,
489 VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | 507 VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK |
490 VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); 508 VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK);
509
510 field = adev->vm_manager.fragment_size;
491 WREG32(mmVM_L2_CNTL3, 511 WREG32(mmVM_L2_CNTL3,
492 VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | 512 VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
493 (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | 513 (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
494 (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); 514 (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
495 /* setup context0 */ 515 /* setup context0 */
496 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); 516 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
497 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); 517 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
@@ -811,7 +831,7 @@ static int gmc_v6_0_sw_init(void *handle)
811 if (r) 831 if (r)
812 return r; 832 return r;
813 833
814 amdgpu_vm_adjust_size(adev, 64); 834 amdgpu_vm_adjust_size(adev, 64, 9);
815 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; 835 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
816 836
817 adev->mc.mc_mask = 0xffffffffffULL; 837 adev->mc.mc_mask = 0xffffffffffULL;
@@ -881,6 +901,8 @@ static int gmc_v6_0_sw_fini(void *handle)
881 gmc_v6_0_gart_fini(adev); 901 gmc_v6_0_gart_fini(adev);
882 amdgpu_gem_force_release(adev); 902 amdgpu_gem_force_release(adev);
883 amdgpu_bo_fini(adev); 903 amdgpu_bo_fini(adev);
904 release_firmware(adev->mc.fw);
905 adev->mc.fw = NULL;
884 906
885 return 0; 907 return 0;
886} 908}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 4a9e84062874..b0528ca9207b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -386,7 +386,27 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
386 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 386 if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
387 adev->mc.visible_vram_size = adev->mc.real_vram_size; 387 adev->mc.visible_vram_size = adev->mc.real_vram_size;
388 388
389 amdgpu_gart_set_defaults(adev); 389 /* set the gart size */
390 if (amdgpu_gart_size == -1) {
391 switch (adev->asic_type) {
392 case CHIP_TOPAZ: /* no MM engines */
393 default:
394 adev->mc.gart_size = 256ULL << 20;
395 break;
396#ifdef CONFIG_DRM_AMDGPU_CIK
397 case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
398 case CHIP_HAWAII: /* UVD, VCE do not support GPUVM */
399 case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
400 case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
401 case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
402 adev->mc.gart_size = 1024ULL << 20;
403 break;
404#endif
405 }
406 } else {
407 adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
408 }
409
390 gmc_v7_0_vram_gtt_location(adev, &adev->mc); 410 gmc_v7_0_vram_gtt_location(adev, &adev->mc);
391 411
392 return 0; 412 return 0;
@@ -562,7 +582,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
562static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) 582static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
563{ 583{
564 int r, i; 584 int r, i;
565 u32 tmp; 585 u32 tmp, field;
566 586
567 if (adev->gart.robj == NULL) { 587 if (adev->gart.robj == NULL) {
568 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 588 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -592,10 +612,12 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
592 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 612 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
593 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 613 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
594 WREG32(mmVM_L2_CNTL2, tmp); 614 WREG32(mmVM_L2_CNTL2, tmp);
615
616 field = adev->vm_manager.fragment_size;
595 tmp = RREG32(mmVM_L2_CNTL3); 617 tmp = RREG32(mmVM_L2_CNTL3);
596 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); 618 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
597 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); 619 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
598 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); 620 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
599 WREG32(mmVM_L2_CNTL3, tmp); 621 WREG32(mmVM_L2_CNTL3, tmp);
600 /* setup context0 */ 622 /* setup context0 */
601 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); 623 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
@@ -948,7 +970,7 @@ static int gmc_v7_0_sw_init(void *handle)
948 * Currently set to 4GB ((1 << 20) 4k pages). 970 * Currently set to 4GB ((1 << 20) 4k pages).
949 * Max GPUVM size for cayman and SI is 40 bits. 971 * Max GPUVM size for cayman and SI is 40 bits.
950 */ 972 */
951 amdgpu_vm_adjust_size(adev, 64); 973 amdgpu_vm_adjust_size(adev, 64, 9);
952 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; 974 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
953 975
954 /* Set the internal MC address mask 976 /* Set the internal MC address mask
@@ -1028,6 +1050,8 @@ static int gmc_v7_0_sw_fini(void *handle)
1028 gmc_v7_0_gart_fini(adev); 1050 gmc_v7_0_gart_fini(adev);
1029 amdgpu_gem_force_release(adev); 1051 amdgpu_gem_force_release(adev);
1030 amdgpu_bo_fini(adev); 1052 amdgpu_bo_fini(adev);
1053 release_firmware(adev->mc.fw);
1054 adev->mc.fw = NULL;
1031 1055
1032 return 0; 1056 return 0;
1033} 1057}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 85c937b5e40b..f368cfe2f585 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -562,7 +562,26 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
562 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 562 if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
563 adev->mc.visible_vram_size = adev->mc.real_vram_size; 563 adev->mc.visible_vram_size = adev->mc.real_vram_size;
564 564
565 amdgpu_gart_set_defaults(adev); 565 /* set the gart size */
566 if (amdgpu_gart_size == -1) {
567 switch (adev->asic_type) {
568 case CHIP_POLARIS11: /* all engines support GPUVM */
569 case CHIP_POLARIS10: /* all engines support GPUVM */
570 case CHIP_POLARIS12: /* all engines support GPUVM */
571 default:
572 adev->mc.gart_size = 256ULL << 20;
573 break;
574 case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
575 case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
576 case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
577 case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
578 adev->mc.gart_size = 1024ULL << 20;
579 break;
580 }
581 } else {
582 adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
583 }
584
566 gmc_v8_0_vram_gtt_location(adev, &adev->mc); 585 gmc_v8_0_vram_gtt_location(adev, &adev->mc);
567 586
568 return 0; 587 return 0;
@@ -762,7 +781,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
762static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) 781static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
763{ 782{
764 int r, i; 783 int r, i;
765 u32 tmp; 784 u32 tmp, field;
766 785
767 if (adev->gart.robj == NULL) { 786 if (adev->gart.robj == NULL) {
768 dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); 787 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
@@ -793,10 +812,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
793 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 812 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
794 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 813 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
795 WREG32(mmVM_L2_CNTL2, tmp); 814 WREG32(mmVM_L2_CNTL2, tmp);
815
816 field = adev->vm_manager.fragment_size;
796 tmp = RREG32(mmVM_L2_CNTL3); 817 tmp = RREG32(mmVM_L2_CNTL3);
797 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); 818 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1);
798 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); 819 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field);
799 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); 820 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
800 WREG32(mmVM_L2_CNTL3, tmp); 821 WREG32(mmVM_L2_CNTL3, tmp);
801 /* XXX: set to enable PTE/PDE in system memory */ 822 /* XXX: set to enable PTE/PDE in system memory */
802 tmp = RREG32(mmVM_L2_CNTL4); 823 tmp = RREG32(mmVM_L2_CNTL4);
@@ -1046,7 +1067,7 @@ static int gmc_v8_0_sw_init(void *handle)
1046 * Currently set to 4GB ((1 << 20) 4k pages). 1067 * Currently set to 4GB ((1 << 20) 4k pages).
1047 * Max GPUVM size for cayman and SI is 40 bits. 1068 * Max GPUVM size for cayman and SI is 40 bits.
1048 */ 1069 */
1049 amdgpu_vm_adjust_size(adev, 64); 1070 amdgpu_vm_adjust_size(adev, 64, 9);
1050 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; 1071 adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
1051 1072
1052 /* Set the internal MC address mask 1073 /* Set the internal MC address mask
@@ -1126,6 +1147,8 @@ static int gmc_v8_0_sw_fini(void *handle)
1126 gmc_v8_0_gart_fini(adev); 1147 gmc_v8_0_gart_fini(adev);
1127 amdgpu_gem_force_release(adev); 1148 amdgpu_gem_force_release(adev);
1128 amdgpu_bo_fini(adev); 1149 amdgpu_bo_fini(adev);
1150 release_firmware(adev->mc.fw);
1151 adev->mc.fw = NULL;
1129 1152
1130 return 0; 1153 return 0;
1131} 1154}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c22899a08106..621699331e09 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -32,6 +32,8 @@
32#include "vega10/DC/dce_12_0_offset.h" 32#include "vega10/DC/dce_12_0_offset.h"
33#include "vega10/DC/dce_12_0_sh_mask.h" 33#include "vega10/DC/dce_12_0_sh_mask.h"
34#include "vega10/vega10_enum.h" 34#include "vega10/vega10_enum.h"
35#include "vega10/MMHUB/mmhub_1_0_offset.h"
36#include "vega10/ATHUB/athub_1_0_offset.h"
35 37
36#include "soc15_common.h" 38#include "soc15_common.h"
37 39
@@ -71,13 +73,25 @@ static const u32 golden_settings_vega10_hdp[] =
71 0xf6e, 0x0fffffff, 0x00000000, 73 0xf6e, 0x0fffffff, 0x00000000,
72}; 74};
73 75
76static const u32 golden_settings_mmhub_1_0_0[] =
77{
78 SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_WRCLI2), 0x00000007, 0xfe5fe0fa,
79 SOC15_REG_OFFSET(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0), 0x00000030, 0x55555565
80};
81
82static const u32 golden_settings_athub_1_0_0[] =
83{
84 SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL), 0x0000ff00, 0x00000800,
85 SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL2), 0x00ff00ff, 0x00080008
86};
87
74static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, 88static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
75 struct amdgpu_irq_src *src, 89 struct amdgpu_irq_src *src,
76 unsigned type, 90 unsigned type,
77 enum amdgpu_interrupt_state state) 91 enum amdgpu_interrupt_state state)
78{ 92{
79 struct amdgpu_vmhub *hub; 93 struct amdgpu_vmhub *hub;
80 u32 tmp, reg, bits, i; 94 u32 tmp, reg, bits, i, j;
81 95
82 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 96 bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
83 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | 97 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
@@ -89,43 +103,26 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
89 103
90 switch (state) { 104 switch (state) {
91 case AMDGPU_IRQ_STATE_DISABLE: 105 case AMDGPU_IRQ_STATE_DISABLE:
92 /* MM HUB */ 106 for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
93 hub = &adev->vmhub[AMDGPU_MMHUB]; 107 hub = &adev->vmhub[j];
94 for (i = 0; i< 16; i++) { 108 for (i = 0; i < 16; i++) {
95 reg = hub->vm_context0_cntl + i; 109 reg = hub->vm_context0_cntl + i;
96 tmp = RREG32(reg); 110 tmp = RREG32(reg);
97 tmp &= ~bits; 111 tmp &= ~bits;
98 WREG32(reg, tmp); 112 WREG32(reg, tmp);
99 } 113 }
100
101 /* GFX HUB */
102 hub = &adev->vmhub[AMDGPU_GFXHUB];
103 for (i = 0; i < 16; i++) {
104 reg = hub->vm_context0_cntl + i;
105 tmp = RREG32(reg);
106 tmp &= ~bits;
107 WREG32(reg, tmp);
108 } 114 }
109 break; 115 break;
110 case AMDGPU_IRQ_STATE_ENABLE: 116 case AMDGPU_IRQ_STATE_ENABLE:
111 /* MM HUB */ 117 for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) {
112 hub = &adev->vmhub[AMDGPU_MMHUB]; 118 hub = &adev->vmhub[j];
113 for (i = 0; i< 16; i++) { 119 for (i = 0; i < 16; i++) {
114 reg = hub->vm_context0_cntl + i; 120 reg = hub->vm_context0_cntl + i;
115 tmp = RREG32(reg); 121 tmp = RREG32(reg);
116 tmp |= bits; 122 tmp |= bits;
117 WREG32(reg, tmp); 123 WREG32(reg, tmp);
124 }
118 } 125 }
119
120 /* GFX HUB */
121 hub = &adev->vmhub[AMDGPU_GFXHUB];
122 for (i = 0; i < 16; i++) {
123 reg = hub->vm_context0_cntl + i;
124 tmp = RREG32(reg);
125 tmp |= bits;
126 WREG32(reg, tmp);
127 }
128 break;
129 default: 126 default:
130 break; 127 break;
131 } 128 }
@@ -499,7 +496,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
499 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 496 if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
500 adev->mc.visible_vram_size = adev->mc.real_vram_size; 497 adev->mc.visible_vram_size = adev->mc.real_vram_size;
501 498
502 amdgpu_gart_set_defaults(adev); 499 /* set the gart size */
500 if (amdgpu_gart_size == -1) {
501 switch (adev->asic_type) {
502 case CHIP_VEGA10: /* all engines support GPUVM */
503 default:
504 adev->mc.gart_size = 256ULL << 20;
505 break;
506 case CHIP_RAVEN: /* DCE SG support */
507 adev->mc.gart_size = 1024ULL << 20;
508 break;
509 }
510 } else {
511 adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
512 }
513
503 gmc_v9_0_vram_gtt_location(adev, &adev->mc); 514 gmc_v9_0_vram_gtt_location(adev, &adev->mc);
504 515
505 return 0; 516 return 0;
@@ -541,9 +552,10 @@ static int gmc_v9_0_sw_init(void *handle)
541 adev->vm_manager.vm_size = 1U << 18; 552 adev->vm_manager.vm_size = 1U << 18;
542 adev->vm_manager.block_size = 9; 553 adev->vm_manager.block_size = 9;
543 adev->vm_manager.num_level = 3; 554 adev->vm_manager.num_level = 3;
555 amdgpu_vm_set_fragment_size(adev, 9);
544 } else { 556 } else {
545 /* vm_size is 64GB for legacy 2-level page support*/ 557 /* vm_size is 64GB for legacy 2-level page support */
546 amdgpu_vm_adjust_size(adev, 64); 558 amdgpu_vm_adjust_size(adev, 64, 9);
547 adev->vm_manager.num_level = 1; 559 adev->vm_manager.num_level = 1;
548 } 560 }
549 break; 561 break;
@@ -558,14 +570,16 @@ static int gmc_v9_0_sw_init(void *handle)
558 adev->vm_manager.vm_size = 1U << 18; 570 adev->vm_manager.vm_size = 1U << 18;
559 adev->vm_manager.block_size = 9; 571 adev->vm_manager.block_size = 9;
560 adev->vm_manager.num_level = 3; 572 adev->vm_manager.num_level = 3;
573 amdgpu_vm_set_fragment_size(adev, 9);
561 break; 574 break;
562 default: 575 default:
563 break; 576 break;
564 } 577 }
565 578
566 DRM_INFO("vm size is %llu GB, block size is %u-bit\n", 579 DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n",
567 adev->vm_manager.vm_size, 580 adev->vm_manager.vm_size,
568 adev->vm_manager.block_size); 581 adev->vm_manager.block_size,
582 adev->vm_manager.fragment_size);
569 583
570 /* This interrupt is VMC page fault.*/ 584 /* This interrupt is VMC page fault.*/
571 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, 585 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
@@ -665,8 +679,17 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
665{ 679{
666 switch (adev->asic_type) { 680 switch (adev->asic_type) {
667 case CHIP_VEGA10: 681 case CHIP_VEGA10:
682 amdgpu_program_register_sequence(adev,
683 golden_settings_mmhub_1_0_0,
684 (const u32)ARRAY_SIZE(golden_settings_mmhub_1_0_0));
685 amdgpu_program_register_sequence(adev,
686 golden_settings_athub_1_0_0,
687 (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0));
668 break; 688 break;
669 case CHIP_RAVEN: 689 case CHIP_RAVEN:
690 amdgpu_program_register_sequence(adev,
691 golden_settings_athub_1_0_0,
692 (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0));
670 break; 693 break;
671 default: 694 default:
672 break; 695 break;
@@ -696,12 +719,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
696 if (r) 719 if (r)
697 return r; 720 return r;
698 721
699 /* After HDP is initialized, flush HDP.*/
700 if (adev->flags & AMD_IS_APU)
701 nbio_v7_0_hdp_flush(adev);
702 else
703 nbio_v6_1_hdp_flush(adev);
704
705 switch (adev->asic_type) { 722 switch (adev->asic_type) {
706 case CHIP_RAVEN: 723 case CHIP_RAVEN:
707 mmhub_v1_0_initialize_power_gating(adev); 724 mmhub_v1_0_initialize_power_gating(adev);
@@ -719,13 +736,16 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
719 if (r) 736 if (r)
720 return r; 737 return r;
721 738
722 tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); 739 WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
723 tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
724 WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
725 740
726 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); 741 tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
727 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 742 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
728 743
744 /* After HDP is initialized, flush HDP.*/
745 if (adev->flags & AMD_IS_APU)
746 nbio_v7_0_hdp_flush(adev);
747 else
748 nbio_v6_1_hdp_flush(adev);
729 749
730 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 750 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
731 value = false; 751 value = false;
@@ -734,7 +754,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
734 754
735 gfxhub_v1_0_set_fault_enable_default(adev, value); 755 gfxhub_v1_0_set_fault_enable_default(adev, value);
736 mmhub_v1_0_set_fault_enable_default(adev, value); 756 mmhub_v1_0_set_fault_enable_default(adev, value);
737
738 gmc_v9_0_gart_flush_gpu_tlb(adev, 0); 757 gmc_v9_0_gart_flush_gpu_tlb(adev, 0);
739 758
740 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 759 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -753,17 +772,11 @@ static int gmc_v9_0_hw_init(void *handle)
753 gmc_v9_0_init_golden_registers(adev); 772 gmc_v9_0_init_golden_registers(adev);
754 773
755 if (adev->mode_info.num_crtc) { 774 if (adev->mode_info.num_crtc) {
756 u32 tmp;
757
758 /* Lockout access through VGA aperture*/ 775 /* Lockout access through VGA aperture*/
759 tmp = RREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL); 776 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
760 tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
761 WREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL, tmp);
762 777
763 /* disable VGA render */ 778 /* disable VGA render */
764 tmp = RREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL); 779 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
765 tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
766 WREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL, tmp);
767 } 780 }
768 781
769 r = gmc_v9_0_gart_enable(adev); 782 r = gmc_v9_0_gart_enable(adev);
@@ -805,9 +818,7 @@ static int gmc_v9_0_suspend(void *handle)
805{ 818{
806 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 819 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
807 820
808 gmc_v9_0_hw_fini(adev); 821 return gmc_v9_0_hw_fini(adev);
809
810 return 0;
811} 822}
812 823
813static int gmc_v9_0_resume(void *handle) 824static int gmc_v9_0_resume(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 7a0ea27ac429..65ed6d3a8f05 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -208,6 +208,19 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
208} 208}
209 209
210/** 210/**
211 * iceland_ih_prescreen_iv - prescreen an interrupt vector
212 *
213 * @adev: amdgpu_device pointer
214 *
215 * Returns true if the interrupt vector should be further processed.
216 */
217static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
218{
219 /* Process all interrupts */
220 return true;
221}
222
223/**
211 * iceland_ih_decode_iv - decode an interrupt vector 224 * iceland_ih_decode_iv - decode an interrupt vector
212 * 225 *
213 * @adev: amdgpu_device pointer 226 * @adev: amdgpu_device pointer
@@ -412,6 +425,7 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
412 425
413static const struct amdgpu_ih_funcs iceland_ih_funcs = { 426static const struct amdgpu_ih_funcs iceland_ih_funcs = {
414 .get_wptr = iceland_ih_get_wptr, 427 .get_wptr = iceland_ih_get_wptr,
428 .prescreen_iv = iceland_ih_prescreen_iv,
415 .decode_iv = iceland_ih_decode_iv, 429 .decode_iv = iceland_ih_decode_iv,
416 .set_rptr = iceland_ih_set_rptr 430 .set_rptr = iceland_ih_set_rptr
417}; 431};
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 3bbf2ccfca89..b57399a462c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -42,7 +42,6 @@
42#define KV_MINIMUM_ENGINE_CLOCK 800 42#define KV_MINIMUM_ENGINE_CLOCK 800
43#define SMC_RAM_END 0x40000 43#define SMC_RAM_END 0x40000
44 44
45static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev);
46static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev); 45static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev);
47static int kv_enable_nb_dpm(struct amdgpu_device *adev, 46static int kv_enable_nb_dpm(struct amdgpu_device *adev,
48 bool enable); 47 bool enable);
@@ -64,7 +63,7 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev,
64 int min_temp, int max_temp); 63 int min_temp, int max_temp);
65static int kv_init_fps_limits(struct amdgpu_device *adev); 64static int kv_init_fps_limits(struct amdgpu_device *adev);
66 65
67static void kv_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate); 66static void kv_dpm_powergate_uvd(void *handle, bool gate);
68static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate); 67static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate);
69static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate); 68static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate);
70static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate); 69static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate);
@@ -1245,8 +1244,9 @@ static void kv_update_requested_ps(struct amdgpu_device *adev,
1245 adev->pm.dpm.requested_ps = &pi->requested_rps; 1244 adev->pm.dpm.requested_ps = &pi->requested_rps;
1246} 1245}
1247 1246
1248static void kv_dpm_enable_bapm(struct amdgpu_device *adev, bool enable) 1247static void kv_dpm_enable_bapm(void *handle, bool enable)
1249{ 1248{
1249 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1250 struct kv_power_info *pi = kv_get_pi(adev); 1250 struct kv_power_info *pi = kv_get_pi(adev);
1251 int ret; 1251 int ret;
1252 1252
@@ -1672,8 +1672,9 @@ static int kv_update_acp_dpm(struct amdgpu_device *adev, bool gate)
1672 return kv_enable_acp_dpm(adev, !gate); 1672 return kv_enable_acp_dpm(adev, !gate);
1673} 1673}
1674 1674
1675static void kv_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate) 1675static void kv_dpm_powergate_uvd(void *handle, bool gate)
1676{ 1676{
1677 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1677 struct kv_power_info *pi = kv_get_pi(adev); 1678 struct kv_power_info *pi = kv_get_pi(adev);
1678 int ret; 1679 int ret;
1679 1680
@@ -1868,10 +1869,11 @@ static int kv_enable_nb_dpm(struct amdgpu_device *adev,
1868 return ret; 1869 return ret;
1869} 1870}
1870 1871
1871static int kv_dpm_force_performance_level(struct amdgpu_device *adev, 1872static int kv_dpm_force_performance_level(void *handle,
1872 enum amd_dpm_forced_level level) 1873 enum amd_dpm_forced_level level)
1873{ 1874{
1874 int ret; 1875 int ret;
1876 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1875 1877
1876 if (level == AMD_DPM_FORCED_LEVEL_HIGH) { 1878 if (level == AMD_DPM_FORCED_LEVEL_HIGH) {
1877 ret = kv_force_dpm_highest(adev); 1879 ret = kv_force_dpm_highest(adev);
@@ -1892,8 +1894,9 @@ static int kv_dpm_force_performance_level(struct amdgpu_device *adev,
1892 return 0; 1894 return 0;
1893} 1895}
1894 1896
1895static int kv_dpm_pre_set_power_state(struct amdgpu_device *adev) 1897static int kv_dpm_pre_set_power_state(void *handle)
1896{ 1898{
1899 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1897 struct kv_power_info *pi = kv_get_pi(adev); 1900 struct kv_power_info *pi = kv_get_pi(adev);
1898 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; 1901 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps;
1899 struct amdgpu_ps *new_ps = &requested_ps; 1902 struct amdgpu_ps *new_ps = &requested_ps;
@@ -1907,8 +1910,9 @@ static int kv_dpm_pre_set_power_state(struct amdgpu_device *adev)
1907 return 0; 1910 return 0;
1908} 1911}
1909 1912
1910static int kv_dpm_set_power_state(struct amdgpu_device *adev) 1913static int kv_dpm_set_power_state(void *handle)
1911{ 1914{
1915 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1912 struct kv_power_info *pi = kv_get_pi(adev); 1916 struct kv_power_info *pi = kv_get_pi(adev);
1913 struct amdgpu_ps *new_ps = &pi->requested_rps; 1917 struct amdgpu_ps *new_ps = &pi->requested_rps;
1914 struct amdgpu_ps *old_ps = &pi->current_rps; 1918 struct amdgpu_ps *old_ps = &pi->current_rps;
@@ -1981,8 +1985,9 @@ static int kv_dpm_set_power_state(struct amdgpu_device *adev)
1981 return 0; 1985 return 0;
1982} 1986}
1983 1987
1984static void kv_dpm_post_set_power_state(struct amdgpu_device *adev) 1988static void kv_dpm_post_set_power_state(void *handle)
1985{ 1989{
1990 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1986 struct kv_power_info *pi = kv_get_pi(adev); 1991 struct kv_power_info *pi = kv_get_pi(adev);
1987 struct amdgpu_ps *new_ps = &pi->requested_rps; 1992 struct amdgpu_ps *new_ps = &pi->requested_rps;
1988 1993
@@ -2848,9 +2853,10 @@ static int kv_dpm_init(struct amdgpu_device *adev)
2848} 2853}
2849 2854
2850static void 2855static void
2851kv_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, 2856kv_dpm_debugfs_print_current_performance_level(void *handle,
2852 struct seq_file *m) 2857 struct seq_file *m)
2853{ 2858{
2859 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2854 struct kv_power_info *pi = kv_get_pi(adev); 2860 struct kv_power_info *pi = kv_get_pi(adev);
2855 u32 current_index = 2861 u32 current_index =
2856 (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & 2862 (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) &
@@ -2875,11 +2881,12 @@ kv_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev,
2875} 2881}
2876 2882
2877static void 2883static void
2878kv_dpm_print_power_state(struct amdgpu_device *adev, 2884kv_dpm_print_power_state(void *handle, void *request_ps)
2879 struct amdgpu_ps *rps)
2880{ 2885{
2881 int i; 2886 int i;
2887 struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps;
2882 struct kv_ps *ps = kv_get_ps(rps); 2888 struct kv_ps *ps = kv_get_ps(rps);
2889 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2883 2890
2884 amdgpu_dpm_print_class_info(rps->class, rps->class2); 2891 amdgpu_dpm_print_class_info(rps->class, rps->class2);
2885 amdgpu_dpm_print_cap_info(rps->caps); 2892 amdgpu_dpm_print_cap_info(rps->caps);
@@ -2905,13 +2912,14 @@ static void kv_dpm_fini(struct amdgpu_device *adev)
2905 amdgpu_free_extended_power_table(adev); 2912 amdgpu_free_extended_power_table(adev);
2906} 2913}
2907 2914
2908static void kv_dpm_display_configuration_changed(struct amdgpu_device *adev) 2915static void kv_dpm_display_configuration_changed(void *handle)
2909{ 2916{
2910 2917
2911} 2918}
2912 2919
2913static u32 kv_dpm_get_sclk(struct amdgpu_device *adev, bool low) 2920static u32 kv_dpm_get_sclk(void *handle, bool low)
2914{ 2921{
2922 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2915 struct kv_power_info *pi = kv_get_pi(adev); 2923 struct kv_power_info *pi = kv_get_pi(adev);
2916 struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps); 2924 struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps);
2917 2925
@@ -2921,18 +2929,20 @@ static u32 kv_dpm_get_sclk(struct amdgpu_device *adev, bool low)
2921 return requested_state->levels[requested_state->num_levels - 1].sclk; 2929 return requested_state->levels[requested_state->num_levels - 1].sclk;
2922} 2930}
2923 2931
2924static u32 kv_dpm_get_mclk(struct amdgpu_device *adev, bool low) 2932static u32 kv_dpm_get_mclk(void *handle, bool low)
2925{ 2933{
2934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2926 struct kv_power_info *pi = kv_get_pi(adev); 2935 struct kv_power_info *pi = kv_get_pi(adev);
2927 2936
2928 return pi->sys_info.bootup_uma_clk; 2937 return pi->sys_info.bootup_uma_clk;
2929} 2938}
2930 2939
2931/* get temperature in millidegrees */ 2940/* get temperature in millidegrees */
2932static int kv_dpm_get_temp(struct amdgpu_device *adev) 2941static int kv_dpm_get_temp(void *handle)
2933{ 2942{
2934 u32 temp; 2943 u32 temp;
2935 int actual_temp = 0; 2944 int actual_temp = 0;
2945 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2936 2946
2937 temp = RREG32_SMC(0xC0300E0C); 2947 temp = RREG32_SMC(0xC0300E0C);
2938 2948
@@ -2950,7 +2960,6 @@ static int kv_dpm_early_init(void *handle)
2950{ 2960{
2951 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2961 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2952 2962
2953 kv_dpm_set_dpm_funcs(adev);
2954 kv_dpm_set_irq_funcs(adev); 2963 kv_dpm_set_irq_funcs(adev);
2955 2964
2956 return 0; 2965 return 0;
@@ -3222,14 +3231,17 @@ static inline bool kv_are_power_levels_equal(const struct kv_pl *kv_cpl1,
3222 (kv_cpl1->force_nbp_state == kv_cpl2->force_nbp_state)); 3231 (kv_cpl1->force_nbp_state == kv_cpl2->force_nbp_state));
3223} 3232}
3224 3233
3225static int kv_check_state_equal(struct amdgpu_device *adev, 3234static int kv_check_state_equal(void *handle,
3226 struct amdgpu_ps *cps, 3235 void *current_ps,
3227 struct amdgpu_ps *rps, 3236 void *request_ps,
3228 bool *equal) 3237 bool *equal)
3229{ 3238{
3230 struct kv_ps *kv_cps; 3239 struct kv_ps *kv_cps;
3231 struct kv_ps *kv_rps; 3240 struct kv_ps *kv_rps;
3232 int i; 3241 int i;
3242 struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps;
3243 struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps;
3244 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3233 3245
3234 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) 3246 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL)
3235 return -EINVAL; 3247 return -EINVAL;
@@ -3262,9 +3274,10 @@ static int kv_check_state_equal(struct amdgpu_device *adev,
3262 return 0; 3274 return 0;
3263} 3275}
3264 3276
3265static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx, 3277static int kv_dpm_read_sensor(void *handle, int idx,
3266 void *value, int *size) 3278 void *value, int *size)
3267{ 3279{
3280 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3268 struct kv_power_info *pi = kv_get_pi(adev); 3281 struct kv_power_info *pi = kv_get_pi(adev);
3269 uint32_t sclk; 3282 uint32_t sclk;
3270 u32 pl_index = 3283 u32 pl_index =
@@ -3312,7 +3325,7 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = {
3312 .set_powergating_state = kv_dpm_set_powergating_state, 3325 .set_powergating_state = kv_dpm_set_powergating_state,
3313}; 3326};
3314 3327
3315static const struct amdgpu_dpm_funcs kv_dpm_funcs = { 3328const struct amd_pm_funcs kv_dpm_funcs = {
3316 .get_temperature = &kv_dpm_get_temp, 3329 .get_temperature = &kv_dpm_get_temp,
3317 .pre_set_power_state = &kv_dpm_pre_set_power_state, 3330 .pre_set_power_state = &kv_dpm_pre_set_power_state,
3318 .set_power_state = &kv_dpm_set_power_state, 3331 .set_power_state = &kv_dpm_set_power_state,
@@ -3330,12 +3343,6 @@ static const struct amdgpu_dpm_funcs kv_dpm_funcs = {
3330 .read_sensor = &kv_dpm_read_sensor, 3343 .read_sensor = &kv_dpm_read_sensor,
3331}; 3344};
3332 3345
3333static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev)
3334{
3335 if (adev->pm.funcs == NULL)
3336 adev->pm.funcs = &kv_dpm_funcs;
3337}
3338
3339static const struct amdgpu_irq_src_funcs kv_dpm_irq_funcs = { 3346static const struct amdgpu_irq_src_funcs kv_dpm_irq_funcs = {
3340 .set = kv_dpm_set_interrupt_state, 3347 .set = kv_dpm_set_interrupt_state,
3341 .process = kv_dpm_process_interrupt, 3348 .process = kv_dpm_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index ad8def3cc343..cc21c4bdec27 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -158,8 +158,8 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
158 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); 158 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
159 159
160 tmp = mmVM_L2_CNTL3_DEFAULT; 160 tmp = mmVM_L2_CNTL3_DEFAULT;
161 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); 161 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
162 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 9); 162 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
163 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); 163 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
164 164
165 tmp = mmVM_L2_CNTL4_DEFAULT; 165 tmp = mmVM_L2_CNTL4_DEFAULT;
@@ -273,7 +273,7 @@ static const struct pctl_data pctl0_data[] = {
273 {0x135, 0x12a810}, 273 {0x135, 0x12a810},
274 {0x149, 0x7a82c} 274 {0x149, 0x7a82c}
275}; 275};
276#define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0])) 276#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
277 277
278#define PCTL0_RENG_EXEC_END_PTR 0x151 278#define PCTL0_RENG_EXEC_END_PTR 0x151
279#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 279#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
@@ -309,7 +309,7 @@ static const struct pctl_data pctl1_data[] = {
309 {0x1f0, 0x5000a7f6}, 309 {0x1f0, 0x5000a7f6},
310 {0x1f1, 0x5000a7e4} 310 {0x1f1, 0x5000a7e4}
311}; 311};
312#define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0])) 312#define PCTL1_DATA_LEN (ARRAY_SIZE(pctl1_data))
313 313
314#define PCTL1_RENG_EXEC_END_PTR 0x1f1 314#define PCTL1_RENG_EXEC_END_PTR 0x1f1
315#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 315#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000
@@ -561,6 +561,13 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
561 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 561 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
562 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, 562 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
563 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); 563 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
564 if (!value) {
565 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
566 CRASH_ON_NO_RETRY_FAULT, 1);
567 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
568 CRASH_ON_RETRY_FAULT, 1);
569 }
570
564 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); 571 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);
565} 572}
566 573
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 1e91b9a1c591..67e78576a9eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -24,7 +24,7 @@
24#ifndef __MXGPU_AI_H__ 24#ifndef __MXGPU_AI_H__
25#define __MXGPU_AI_H__ 25#define __MXGPU_AI_H__
26 26
27#define AI_MAILBOX_TIMEDOUT 5000 27#define AI_MAILBOX_TIMEDOUT 12000
28 28
29enum idh_request { 29enum idh_request {
30 IDH_REQ_GPU_INIT_ACCESS = 1, 30 IDH_REQ_GPU_INIT_ACCESS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
index c791d73d2d54..f13dc6cc158f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
@@ -23,7 +23,7 @@
23#ifndef __MXGPU_VI_H__ 23#ifndef __MXGPU_VI_H__
24#define __MXGPU_VI_H__ 24#define __MXGPU_VI_H__
25 25
26#define VI_MAILBOX_TIMEDOUT 5000 26#define VI_MAILBOX_TIMEDOUT 12000
27#define VI_MAILBOX_RESET_TIME 12 27#define VI_MAILBOX_RESET_TIME 12
28 28
29/* VI mailbox messages request */ 29/* VI mailbox messages request */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index f7cf994b1da2..dea7c909ca5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -35,6 +35,8 @@
35#include "raven1/GC/gc_9_1_offset.h" 35#include "raven1/GC/gc_9_1_offset.h"
36#include "raven1/SDMA0/sdma0_4_1_offset.h" 36#include "raven1/SDMA0/sdma0_4_1_offset.h"
37 37
38MODULE_FIRMWARE("amdgpu/raven_asd.bin");
39
38static int 40static int
39psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) 41psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
40{ 42{
@@ -136,15 +138,13 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm
136{ 138{
137 int ret; 139 int ret;
138 uint64_t fw_mem_mc_addr = ucode->mc_addr; 140 uint64_t fw_mem_mc_addr = ucode->mc_addr;
139 struct common_firmware_header *header;
140 141
141 memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); 142 memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
142 header = (struct common_firmware_header *)ucode->fw;
143 143
144 cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; 144 cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
145 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); 145 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
146 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); 146 cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
147 cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes); 147 cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
148 148
149 ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); 149 ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
150 if (ret) 150 if (ret)
@@ -209,7 +209,7 @@ int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
209 return ret; 209 return ret;
210} 210}
211 211
212int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) 212int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
213{ 213{
214 int ret = 0; 214 int ret = 0;
215 struct psp_ring *ring; 215 struct psp_ring *ring;
@@ -229,6 +229,19 @@ int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type
229 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 229 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
230 0x80000000, 0x80000000, false); 230 0x80000000, 0x80000000, false);
231 231
232 return ret;
233}
234
235int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
236{
237 int ret = 0;
238 struct psp_ring *ring = &psp->km_ring;
239 struct amdgpu_device *adev = psp->adev;
240
241 ret = psp_v10_0_ring_stop(psp, ring_type);
242 if (ret)
243 DRM_ERROR("Fail to stop psp ring\n");
244
232 amdgpu_bo_free_kernel(&adev->firmware.rbuf, 245 amdgpu_bo_free_kernel(&adev->firmware.rbuf,
233 &ring->ring_mem_mc_addr, 246 &ring->ring_mem_mc_addr,
234 (void **)&ring->ring_mem); 247 (void **)&ring->ring_mem);
@@ -245,15 +258,20 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
245 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; 258 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
246 struct psp_ring *ring = &psp->km_ring; 259 struct psp_ring *ring = &psp->km_ring;
247 struct amdgpu_device *adev = psp->adev; 260 struct amdgpu_device *adev = psp->adev;
261 uint32_t ring_size_dw = ring->ring_size / 4;
262 uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
248 263
249 /* KM (GPCOM) prepare write pointer */ 264 /* KM (GPCOM) prepare write pointer */
250 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); 265 psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
251 266
252 /* Update KM RB frame pointer to new frame */ 267 /* Update KM RB frame pointer to new frame */
253 if ((psp_write_ptr_reg % ring->ring_size) == 0) 268 if ((psp_write_ptr_reg % ring_size_dw) == 0)
254 write_frame = ring->ring_mem; 269 write_frame = ring->ring_mem;
255 else 270 else
256 write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4)); 271 write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw);
272
273 /* Initialize KM RB frame */
274 memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
257 275
258 /* Update KM RB frame */ 276 /* Update KM RB frame */
259 write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); 277 write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
@@ -263,8 +281,7 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
263 write_frame->fence_value = index; 281 write_frame->fence_value = index;
264 282
265 /* Update the write Pointer in DWORDs */ 283 /* Update the write Pointer in DWORDs */
266 psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4; 284 psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
267 psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg;
268 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); 285 WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
269 286
270 return 0; 287 return 0;
@@ -390,3 +407,10 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp,
390 407
391 return true; 408 return true;
392} 409}
410
411
412int psp_v10_0_mode1_reset(struct psp_context *psp)
413{
414 DRM_INFO("psp mode 1 reset not supported now! \n");
415 return -EINVAL;
416}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
index e76cde2f01f9..451e8308303f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
@@ -34,6 +34,8 @@ extern int psp_v10_0_ring_init(struct psp_context *psp,
34 enum psp_ring_type ring_type); 34 enum psp_ring_type ring_type);
35extern int psp_v10_0_ring_create(struct psp_context *psp, 35extern int psp_v10_0_ring_create(struct psp_context *psp,
36 enum psp_ring_type ring_type); 36 enum psp_ring_type ring_type);
37extern int psp_v10_0_ring_stop(struct psp_context *psp,
38 enum psp_ring_type ring_type);
37extern int psp_v10_0_ring_destroy(struct psp_context *psp, 39extern int psp_v10_0_ring_destroy(struct psp_context *psp,
38 enum psp_ring_type ring_type); 40 enum psp_ring_type ring_type);
39extern int psp_v10_0_cmd_submit(struct psp_context *psp, 41extern int psp_v10_0_cmd_submit(struct psp_context *psp,
@@ -43,4 +45,6 @@ extern int psp_v10_0_cmd_submit(struct psp_context *psp,
43extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, 45extern bool psp_v10_0_compare_sram_data(struct psp_context *psp,
44 struct amdgpu_firmware_info *ucode, 46 struct amdgpu_firmware_info *ucode,
45 enum AMDGPU_UCODE_ID ucode_type); 47 enum AMDGPU_UCODE_ID ucode_type);
48
49extern int psp_v10_0_mode1_reset(struct psp_context *psp);
46#endif 50#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 2a535a4b8d5b..cee5c396b277 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -319,7 +319,7 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
319 return ret; 319 return ret;
320} 320}
321 321
322int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) 322int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
323{ 323{
324 int ret = 0; 324 int ret = 0;
325 struct psp_ring *ring; 325 struct psp_ring *ring;
@@ -339,6 +339,19 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
339 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 339 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
340 0x80000000, 0x80000000, false); 340 0x80000000, 0x80000000, false);
341 341
342 return ret;
343}
344
345int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
346{
347 int ret = 0;
348 struct psp_ring *ring = &psp->km_ring;
349 struct amdgpu_device *adev = psp->adev;
350
351 ret = psp_v3_1_ring_stop(psp, ring_type);
352 if (ret)
353 DRM_ERROR("Fail to stop psp ring\n");
354
342 amdgpu_bo_free_kernel(&adev->firmware.rbuf, 355 amdgpu_bo_free_kernel(&adev->firmware.rbuf,
343 &ring->ring_mem_mc_addr, 356 &ring->ring_mem_mc_addr,
344 (void **)&ring->ring_mem); 357 (void **)&ring->ring_mem);
@@ -517,3 +530,37 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
517 reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); 530 reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2);
518 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; 531 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
519} 532}
533
534int psp_v3_1_mode1_reset(struct psp_context *psp)
535{
536 int ret;
537 uint32_t offset;
538 struct amdgpu_device *adev = psp->adev;
539
540 offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
541
542 ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
543
544 if (ret) {
545 DRM_INFO("psp is not working correctly before mode1 reset!\n");
546 return -EINVAL;
547 }
548
549 /*send the mode 1 reset command*/
550 WREG32(offset, 0x70000);
551
552 mdelay(1000);
553
554 offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
555
556 ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
557
558 if (ret) {
559 DRM_INFO("psp mode 1 reset failed!\n");
560 return -EINVAL;
561 }
562
563 DRM_INFO("psp mode1 reset succeed \n");
564
565 return 0;
566}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index 9dcd0b25c4c6..b05dbada7751 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -41,6 +41,8 @@ extern int psp_v3_1_ring_init(struct psp_context *psp,
41 enum psp_ring_type ring_type); 41 enum psp_ring_type ring_type);
42extern int psp_v3_1_ring_create(struct psp_context *psp, 42extern int psp_v3_1_ring_create(struct psp_context *psp,
43 enum psp_ring_type ring_type); 43 enum psp_ring_type ring_type);
44extern int psp_v3_1_ring_stop(struct psp_context *psp,
45 enum psp_ring_type ring_type);
44extern int psp_v3_1_ring_destroy(struct psp_context *psp, 46extern int psp_v3_1_ring_destroy(struct psp_context *psp,
45 enum psp_ring_type ring_type); 47 enum psp_ring_type ring_type);
46extern int psp_v3_1_cmd_submit(struct psp_context *psp, 48extern int psp_v3_1_cmd_submit(struct psp_context *psp,
@@ -51,4 +53,5 @@ extern bool psp_v3_1_compare_sram_data(struct psp_context *psp,
51 struct amdgpu_firmware_info *ucode, 53 struct amdgpu_firmware_info *ucode,
52 enum AMDGPU_UCODE_ID ucode_type); 54 enum AMDGPU_UCODE_ID ucode_type);
53extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); 55extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp);
56extern int psp_v3_1_mode1_reset(struct psp_context *psp);
54#endif 57#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f2d0710258cb..acdee3a4602c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1324,8 +1324,13 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
1324} 1324}
1325 1325
1326static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { 1326static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
1327 .copy_pte_num_dw = 7,
1327 .copy_pte = sdma_v2_4_vm_copy_pte, 1328 .copy_pte = sdma_v2_4_vm_copy_pte,
1329
1328 .write_pte = sdma_v2_4_vm_write_pte, 1330 .write_pte = sdma_v2_4_vm_write_pte,
1331
1332 .set_max_nums_pte_pde = 0x1fffff >> 3,
1333 .set_pte_pde_num_dw = 10,
1329 .set_pte_pde = sdma_v2_4_vm_set_pte_pde, 1334 .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
1330}; 1335};
1331 1336
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index b1de44f22824..72f31cc7df00 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -379,8 +379,10 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
379 struct amdgpu_device *adev = ring->adev; 379 struct amdgpu_device *adev = ring->adev;
380 380
381 if (ring->use_doorbell) { 381 if (ring->use_doorbell) {
382 u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
383
382 /* XXX check if swapping is necessary on BE */ 384 /* XXX check if swapping is necessary on BE */
383 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; 385 WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
384 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); 386 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
385 } else { 387 } else {
386 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; 388 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
@@ -641,10 +643,11 @@ static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
641static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) 643static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
642{ 644{
643 struct amdgpu_ring *ring; 645 struct amdgpu_ring *ring;
644 u32 rb_cntl, ib_cntl; 646 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
645 u32 rb_bufsz; 647 u32 rb_bufsz;
646 u32 wb_offset; 648 u32 wb_offset;
647 u32 doorbell; 649 u32 doorbell;
650 u64 wptr_gpu_addr;
648 int i, j, r; 651 int i, j, r;
649 652
650 for (i = 0; i < adev->sdma.num_instances; i++) { 653 for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -707,6 +710,20 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
707 } 710 }
708 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell); 711 WREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i], doorbell);
709 712
713 /* setup the wptr shadow polling */
714 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
715
716 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO + sdma_offsets[i],
717 lower_32_bits(wptr_gpu_addr));
718 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
719 upper_32_bits(wptr_gpu_addr));
720 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
721 if (amdgpu_sriov_vf(adev))
722 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
723 else
724 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
725 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl);
726
710 /* enable DMA RB */ 727 /* enable DMA RB */
711 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); 728 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
712 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 729 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
@@ -1713,11 +1730,11 @@ static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
1713} 1730}
1714 1731
1715static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { 1732static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
1716 .copy_max_bytes = 0x1fffff, 1733 .copy_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */
1717 .copy_num_dw = 7, 1734 .copy_num_dw = 7,
1718 .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, 1735 .emit_copy_buffer = sdma_v3_0_emit_copy_buffer,
1719 1736
1720 .fill_max_bytes = 0x1fffff, 1737 .fill_max_bytes = 0x3fffe0, /* not 0x3fffff due to HW limitation */
1721 .fill_num_dw = 5, 1738 .fill_num_dw = 5,
1722 .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, 1739 .emit_fill_buffer = sdma_v3_0_emit_fill_buffer,
1723}; 1740};
@@ -1731,8 +1748,14 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
1731} 1748}
1732 1749
1733static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { 1750static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
1751 .copy_pte_num_dw = 7,
1734 .copy_pte = sdma_v3_0_vm_copy_pte, 1752 .copy_pte = sdma_v3_0_vm_copy_pte,
1753
1735 .write_pte = sdma_v3_0_vm_write_pte, 1754 .write_pte = sdma_v3_0_vm_write_pte,
1755
1756 /* not 0x3fffff due to HW limitation */
1757 .set_max_nums_pte_pde = 0x3fffe0 >> 3,
1758 .set_pte_pde_num_dw = 10,
1736 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1759 .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
1737}; 1760};
1738 1761
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index fd7c72aaafa6..c26d205ff3bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -54,7 +54,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
54static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); 54static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev);
55 55
56static const u32 golden_settings_sdma_4[] = { 56static const u32 golden_settings_sdma_4[] = {
57 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, 57 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07,
58 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, 58 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100,
59 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, 59 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100,
60 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, 60 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
@@ -89,7 +89,7 @@ static const u32 golden_settings_sdma_vg10[] = {
89 89
90static const u32 golden_settings_sdma_4_1[] = 90static const u32 golden_settings_sdma_4_1[] =
91{ 91{
92 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, 92 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07,
93 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, 93 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100,
94 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, 94 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100,
95 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, 95 SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000,
@@ -398,7 +398,7 @@ static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
398{ 398{
399 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 399 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
400 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 400 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
401 amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0)); 401 amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE));
402 amdgpu_ring_write(ring, 1); 402 amdgpu_ring_write(ring, 1);
403} 403}
404 404
@@ -1264,6 +1264,11 @@ static int sdma_v4_0_sw_fini(void *handle)
1264 for (i = 0; i < adev->sdma.num_instances; i++) 1264 for (i = 0; i < adev->sdma.num_instances; i++)
1265 amdgpu_ring_fini(&adev->sdma.instance[i].ring); 1265 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1266 1266
1267 for (i = 0; i < adev->sdma.num_instances; i++) {
1268 release_firmware(adev->sdma.instance[i].fw);
1269 adev->sdma.instance[i].fw = NULL;
1270 }
1271
1267 return 0; 1272 return 0;
1268} 1273}
1269 1274
@@ -1714,8 +1719,13 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
1714} 1719}
1715 1720
1716static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { 1721static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
1722 .copy_pte_num_dw = 7,
1717 .copy_pte = sdma_v4_0_vm_copy_pte, 1723 .copy_pte = sdma_v4_0_vm_copy_pte,
1724
1718 .write_pte = sdma_v4_0_vm_write_pte, 1725 .write_pte = sdma_v4_0_vm_write_pte,
1726
1727 .set_max_nums_pte_pde = 0x400000 >> 3,
1728 .set_pte_pde_num_dw = 10,
1719 .set_pte_pde = sdma_v4_0_vm_set_pte_pde, 1729 .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
1720}; 1730};
1721 1731
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 112969f3301a..3fa2fbf8c9a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -887,8 +887,13 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
887} 887}
888 888
889static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { 889static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
890 .copy_pte_num_dw = 5,
890 .copy_pte = si_dma_vm_copy_pte, 891 .copy_pte = si_dma_vm_copy_pte,
892
891 .write_pte = si_dma_vm_write_pte, 893 .write_pte = si_dma_vm_write_pte,
894
895 .set_max_nums_pte_pde = 0xffff8 >> 3,
896 .set_pte_pde_num_dw = 9,
892 .set_pte_pde = si_dma_vm_set_pte_pde, 897 .set_pte_pde = si_dma_vm_set_pte_pde,
893}; 898};
894 899
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index d63873f3f574..9b8db6046271 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -1847,7 +1847,6 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev,
1847 1847
1848static void si_thermal_start_smc_fan_control(struct amdgpu_device *adev); 1848static void si_thermal_start_smc_fan_control(struct amdgpu_device *adev);
1849static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev); 1849static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev);
1850static void si_dpm_set_dpm_funcs(struct amdgpu_device *adev);
1851static void si_dpm_set_irq_funcs(struct amdgpu_device *adev); 1850static void si_dpm_set_irq_funcs(struct amdgpu_device *adev);
1852 1851
1853static struct si_power_info *si_get_pi(struct amdgpu_device *adev) 1852static struct si_power_info *si_get_pi(struct amdgpu_device *adev)
@@ -3060,9 +3059,9 @@ static int si_get_vce_clock_voltage(struct amdgpu_device *adev,
3060 return ret; 3059 return ret;
3061} 3060}
3062 3061
3063static bool si_dpm_vblank_too_short(struct amdgpu_device *adev) 3062static bool si_dpm_vblank_too_short(void *handle)
3064{ 3063{
3065 3064 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3066 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); 3065 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
3067 /* we never hit the non-gddr5 limit so disable it */ 3066 /* we never hit the non-gddr5 limit so disable it */
3068 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; 3067 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0;
@@ -3871,9 +3870,10 @@ static int si_restrict_performance_levels_before_switch(struct amdgpu_device *ad
3871 0 : -EINVAL; 3870 0 : -EINVAL;
3872} 3871}
3873 3872
3874static int si_dpm_force_performance_level(struct amdgpu_device *adev, 3873static int si_dpm_force_performance_level(void *handle,
3875 enum amd_dpm_forced_level level) 3874 enum amd_dpm_forced_level level)
3876{ 3875{
3876 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3877 struct amdgpu_ps *rps = adev->pm.dpm.current_ps; 3877 struct amdgpu_ps *rps = adev->pm.dpm.current_ps;
3878 struct si_ps *ps = si_get_ps(rps); 3878 struct si_ps *ps = si_get_ps(rps);
3879 u32 levels = ps->performance_level_count; 3879 u32 levels = ps->performance_level_count;
@@ -6575,11 +6575,12 @@ static int si_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev)
6575 } 6575 }
6576} 6576}
6577 6577
6578static int si_dpm_get_fan_speed_percent(struct amdgpu_device *adev, 6578static int si_dpm_get_fan_speed_percent(void *handle,
6579 u32 *speed) 6579 u32 *speed)
6580{ 6580{
6581 u32 duty, duty100; 6581 u32 duty, duty100;
6582 u64 tmp64; 6582 u64 tmp64;
6583 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6583 6584
6584 if (adev->pm.no_fan) 6585 if (adev->pm.no_fan)
6585 return -ENOENT; 6586 return -ENOENT;
@@ -6600,9 +6601,10 @@ static int si_dpm_get_fan_speed_percent(struct amdgpu_device *adev,
6600 return 0; 6601 return 0;
6601} 6602}
6602 6603
6603static int si_dpm_set_fan_speed_percent(struct amdgpu_device *adev, 6604static int si_dpm_set_fan_speed_percent(void *handle,
6604 u32 speed) 6605 u32 speed)
6605{ 6606{
6607 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6606 struct si_power_info *si_pi = si_get_pi(adev); 6608 struct si_power_info *si_pi = si_get_pi(adev);
6607 u32 tmp; 6609 u32 tmp;
6608 u32 duty, duty100; 6610 u32 duty, duty100;
@@ -6633,8 +6635,10 @@ static int si_dpm_set_fan_speed_percent(struct amdgpu_device *adev,
6633 return 0; 6635 return 0;
6634} 6636}
6635 6637
6636static void si_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode) 6638static void si_dpm_set_fan_control_mode(void *handle, u32 mode)
6637{ 6639{
6640 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6641
6638 if (mode) { 6642 if (mode) {
6639 /* stop auto-manage */ 6643 /* stop auto-manage */
6640 if (adev->pm.dpm.fan.ucode_fan_control) 6644 if (adev->pm.dpm.fan.ucode_fan_control)
@@ -6649,8 +6653,9 @@ static void si_dpm_set_fan_control_mode(struct amdgpu_device *adev, u32 mode)
6649 } 6653 }
6650} 6654}
6651 6655
6652static u32 si_dpm_get_fan_control_mode(struct amdgpu_device *adev) 6656static u32 si_dpm_get_fan_control_mode(void *handle)
6653{ 6657{
6658 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6654 struct si_power_info *si_pi = si_get_pi(adev); 6659 struct si_power_info *si_pi = si_get_pi(adev);
6655 u32 tmp; 6660 u32 tmp;
6656 6661
@@ -6946,8 +6951,9 @@ static void si_dpm_disable(struct amdgpu_device *adev)
6946 ni_update_current_ps(adev, boot_ps); 6951 ni_update_current_ps(adev, boot_ps);
6947} 6952}
6948 6953
6949static int si_dpm_pre_set_power_state(struct amdgpu_device *adev) 6954static int si_dpm_pre_set_power_state(void *handle)
6950{ 6955{
6956 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6951 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 6957 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
6952 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps; 6958 struct amdgpu_ps requested_ps = *adev->pm.dpm.requested_ps;
6953 struct amdgpu_ps *new_ps = &requested_ps; 6959 struct amdgpu_ps *new_ps = &requested_ps;
@@ -6984,8 +6990,9 @@ static int si_power_control_set_level(struct amdgpu_device *adev)
6984 return 0; 6990 return 0;
6985} 6991}
6986 6992
6987static int si_dpm_set_power_state(struct amdgpu_device *adev) 6993static int si_dpm_set_power_state(void *handle)
6988{ 6994{
6995 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6989 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 6996 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
6990 struct amdgpu_ps *new_ps = &eg_pi->requested_rps; 6997 struct amdgpu_ps *new_ps = &eg_pi->requested_rps;
6991 struct amdgpu_ps *old_ps = &eg_pi->current_rps; 6998 struct amdgpu_ps *old_ps = &eg_pi->current_rps;
@@ -7086,8 +7093,9 @@ static int si_dpm_set_power_state(struct amdgpu_device *adev)
7086 return 0; 7093 return 0;
7087} 7094}
7088 7095
7089static void si_dpm_post_set_power_state(struct amdgpu_device *adev) 7096static void si_dpm_post_set_power_state(void *handle)
7090{ 7097{
7098 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7091 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 7099 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
7092 struct amdgpu_ps *new_ps = &eg_pi->requested_rps; 7100 struct amdgpu_ps *new_ps = &eg_pi->requested_rps;
7093 7101
@@ -7103,8 +7111,10 @@ void si_dpm_reset_asic(struct amdgpu_device *adev)
7103} 7111}
7104#endif 7112#endif
7105 7113
7106static void si_dpm_display_configuration_changed(struct amdgpu_device *adev) 7114static void si_dpm_display_configuration_changed(void *handle)
7107{ 7115{
7116 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7117
7108 si_program_display_gap(adev); 7118 si_program_display_gap(adev);
7109} 7119}
7110 7120
@@ -7486,9 +7496,10 @@ static void si_dpm_fini(struct amdgpu_device *adev)
7486 amdgpu_free_extended_power_table(adev); 7496 amdgpu_free_extended_power_table(adev);
7487} 7497}
7488 7498
7489static void si_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev, 7499static void si_dpm_debugfs_print_current_performance_level(void *handle,
7490 struct seq_file *m) 7500 struct seq_file *m)
7491{ 7501{
7502 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7492 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 7503 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
7493 struct amdgpu_ps *rps = &eg_pi->current_rps; 7504 struct amdgpu_ps *rps = &eg_pi->current_rps;
7494 struct si_ps *ps = si_get_ps(rps); 7505 struct si_ps *ps = si_get_ps(rps);
@@ -7860,10 +7871,11 @@ static int si_dpm_set_powergating_state(void *handle,
7860} 7871}
7861 7872
7862/* get temperature in millidegrees */ 7873/* get temperature in millidegrees */
7863static int si_dpm_get_temp(struct amdgpu_device *adev) 7874static int si_dpm_get_temp(void *handle)
7864{ 7875{
7865 u32 temp; 7876 u32 temp;
7866 int actual_temp = 0; 7877 int actual_temp = 0;
7878 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7867 7879
7868 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> 7880 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
7869 CTF_TEMP_SHIFT; 7881 CTF_TEMP_SHIFT;
@@ -7878,8 +7890,9 @@ static int si_dpm_get_temp(struct amdgpu_device *adev)
7878 return actual_temp; 7890 return actual_temp;
7879} 7891}
7880 7892
7881static u32 si_dpm_get_sclk(struct amdgpu_device *adev, bool low) 7893static u32 si_dpm_get_sclk(void *handle, bool low)
7882{ 7894{
7895 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7883 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 7896 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
7884 struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); 7897 struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps);
7885 7898
@@ -7889,8 +7902,9 @@ static u32 si_dpm_get_sclk(struct amdgpu_device *adev, bool low)
7889 return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk; 7902 return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk;
7890} 7903}
7891 7904
7892static u32 si_dpm_get_mclk(struct amdgpu_device *adev, bool low) 7905static u32 si_dpm_get_mclk(void *handle, bool low)
7893{ 7906{
7907 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7894 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 7908 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
7895 struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps); 7909 struct si_ps *requested_state = si_get_ps(&eg_pi->requested_rps);
7896 7910
@@ -7900,9 +7914,11 @@ static u32 si_dpm_get_mclk(struct amdgpu_device *adev, bool low)
7900 return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk; 7914 return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk;
7901} 7915}
7902 7916
7903static void si_dpm_print_power_state(struct amdgpu_device *adev, 7917static void si_dpm_print_power_state(void *handle,
7904 struct amdgpu_ps *rps) 7918 void *current_ps)
7905{ 7919{
7920 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7921 struct amdgpu_ps *rps = (struct amdgpu_ps *)current_ps;
7906 struct si_ps *ps = si_get_ps(rps); 7922 struct si_ps *ps = si_get_ps(rps);
7907 struct rv7xx_pl *pl; 7923 struct rv7xx_pl *pl;
7908 int i; 7924 int i;
@@ -7927,7 +7943,6 @@ static int si_dpm_early_init(void *handle)
7927 7943
7928 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7944 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7929 7945
7930 si_dpm_set_dpm_funcs(adev);
7931 si_dpm_set_irq_funcs(adev); 7946 si_dpm_set_irq_funcs(adev);
7932 return 0; 7947 return 0;
7933} 7948}
@@ -7942,20 +7957,23 @@ static inline bool si_are_power_levels_equal(const struct rv7xx_pl *si_cpl1,
7942 (si_cpl1->vddci == si_cpl2->vddci)); 7957 (si_cpl1->vddci == si_cpl2->vddci));
7943} 7958}
7944 7959
7945static int si_check_state_equal(struct amdgpu_device *adev, 7960static int si_check_state_equal(void *handle,
7946 struct amdgpu_ps *cps, 7961 void *current_ps,
7947 struct amdgpu_ps *rps, 7962 void *request_ps,
7948 bool *equal) 7963 bool *equal)
7949{ 7964{
7950 struct si_ps *si_cps; 7965 struct si_ps *si_cps;
7951 struct si_ps *si_rps; 7966 struct si_ps *si_rps;
7952 int i; 7967 int i;
7968 struct amdgpu_ps *cps = (struct amdgpu_ps *)current_ps;
7969 struct amdgpu_ps *rps = (struct amdgpu_ps *)request_ps;
7970 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7953 7971
7954 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL) 7972 if (adev == NULL || cps == NULL || rps == NULL || equal == NULL)
7955 return -EINVAL; 7973 return -EINVAL;
7956 7974
7957 si_cps = si_get_ps(cps); 7975 si_cps = si_get_ps((struct amdgpu_ps *)cps);
7958 si_rps = si_get_ps(rps); 7976 si_rps = si_get_ps((struct amdgpu_ps *)rps);
7959 7977
7960 if (si_cps == NULL) { 7978 if (si_cps == NULL) {
7961 printk("si_cps is NULL\n"); 7979 printk("si_cps is NULL\n");
@@ -7983,9 +8001,10 @@ static int si_check_state_equal(struct amdgpu_device *adev,
7983 return 0; 8001 return 0;
7984} 8002}
7985 8003
7986static int si_dpm_read_sensor(struct amdgpu_device *adev, int idx, 8004static int si_dpm_read_sensor(void *handle, int idx,
7987 void *value, int *size) 8005 void *value, int *size)
7988{ 8006{
8007 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7989 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); 8008 struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
7990 struct amdgpu_ps *rps = &eg_pi->current_rps; 8009 struct amdgpu_ps *rps = &eg_pi->current_rps;
7991 struct si_ps *ps = si_get_ps(rps); 8010 struct si_ps *ps = si_get_ps(rps);
@@ -8041,7 +8060,7 @@ const struct amd_ip_funcs si_dpm_ip_funcs = {
8041 .set_powergating_state = si_dpm_set_powergating_state, 8060 .set_powergating_state = si_dpm_set_powergating_state,
8042}; 8061};
8043 8062
8044static const struct amdgpu_dpm_funcs si_dpm_funcs = { 8063const struct amd_pm_funcs si_dpm_funcs = {
8045 .get_temperature = &si_dpm_get_temp, 8064 .get_temperature = &si_dpm_get_temp,
8046 .pre_set_power_state = &si_dpm_pre_set_power_state, 8065 .pre_set_power_state = &si_dpm_pre_set_power_state,
8047 .set_power_state = &si_dpm_set_power_state, 8066 .set_power_state = &si_dpm_set_power_state,
@@ -8062,12 +8081,6 @@ static const struct amdgpu_dpm_funcs si_dpm_funcs = {
8062 .read_sensor = &si_dpm_read_sensor, 8081 .read_sensor = &si_dpm_read_sensor,
8063}; 8082};
8064 8083
8065static void si_dpm_set_dpm_funcs(struct amdgpu_device *adev)
8066{
8067 if (adev->pm.funcs == NULL)
8068 adev->pm.funcs = &si_dpm_funcs;
8069}
8070
8071static const struct amdgpu_irq_src_funcs si_dpm_irq_funcs = { 8084static const struct amdgpu_irq_src_funcs si_dpm_irq_funcs = {
8072 .set = si_dpm_set_interrupt_state, 8085 .set = si_dpm_set_interrupt_state,
8073 .process = si_dpm_process_interrupt, 8086 .process = si_dpm_process_interrupt,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
index 51ce21c5f4fb..9fe343de3477 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
@@ -246,6 +246,7 @@ enum si_display_gap
246}; 246};
247 247
248extern const struct amd_ip_funcs si_dpm_ip_funcs; 248extern const struct amd_ip_funcs si_dpm_ip_funcs;
249extern const struct amd_pm_funcs si_dpm_funcs;
249 250
250struct ni_leakage_coeffients 251struct ni_leakage_coeffients
251{ 252{
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index ce25e03a077d..d2c6b80309c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -118,6 +118,19 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev)
118 return (wptr & adev->irq.ih.ptr_mask); 118 return (wptr & adev->irq.ih.ptr_mask);
119} 119}
120 120
121/**
122 * si_ih_prescreen_iv - prescreen an interrupt vector
123 *
124 * @adev: amdgpu_device pointer
125 *
126 * Returns true if the interrupt vector should be further processed.
127 */
128static bool si_ih_prescreen_iv(struct amdgpu_device *adev)
129{
130 /* Process all interrupts */
131 return true;
132}
133
121static void si_ih_decode_iv(struct amdgpu_device *adev, 134static void si_ih_decode_iv(struct amdgpu_device *adev,
122 struct amdgpu_iv_entry *entry) 135 struct amdgpu_iv_entry *entry)
123{ 136{
@@ -288,6 +301,7 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
288 301
289static const struct amdgpu_ih_funcs si_ih_funcs = { 302static const struct amdgpu_ih_funcs si_ih_funcs = {
290 .get_wptr = si_ih_get_wptr, 303 .get_wptr = si_ih_get_wptr,
304 .prescreen_iv = si_ih_prescreen_iv,
291 .decode_iv = si_ih_decode_iv, 305 .decode_iv = si_ih_decode_iv,
292 .set_rptr = si_ih_set_rptr 306 .set_rptr = si_ih_set_rptr
293}; 307};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f2c3a49f73a0..245a18aeb389 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -407,18 +407,27 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num,
407 return -EINVAL; 407 return -EINVAL;
408} 408}
409 409
410static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) 410static int soc15_asic_reset(struct amdgpu_device *adev)
411{ 411{
412 u32 i; 412 u32 i;
413 413
414 dev_info(adev->dev, "GPU pci config reset\n"); 414 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
415
416 dev_info(adev->dev, "GPU reset\n");
415 417
416 /* disable BM */ 418 /* disable BM */
417 pci_clear_master(adev->pdev); 419 pci_clear_master(adev->pdev);
418 /* reset */
419 amdgpu_pci_config_reset(adev);
420 420
421 udelay(100); 421 pci_save_state(adev->pdev);
422
423 for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) {
424 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){
425 adev->ip_blocks[i].version->funcs->soft_reset((void *)adev);
426 break;
427 }
428 }
429
430 pci_restore_state(adev->pdev);
422 431
423 /* wait for asic to come out of reset */ 432 /* wait for asic to come out of reset */
424 for (i = 0; i < adev->usec_timeout; i++) { 433 for (i = 0; i < adev->usec_timeout; i++) {
@@ -430,14 +439,6 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev)
430 udelay(1); 439 udelay(1);
431 } 440 }
432 441
433}
434
435static int soc15_asic_reset(struct amdgpu_device *adev)
436{
437 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
438
439 soc15_gpu_pci_config_reset(adev);
440
441 amdgpu_atombios_scratch_regs_engine_hung(adev, false); 442 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
442 443
443 return 0; 444 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index e79befd80eed..7f408f85fdb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -250,6 +250,7 @@
250#define PACKET3_SET_UCONFIG_REG 0x79 250#define PACKET3_SET_UCONFIG_REG 0x79
251#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 251#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
252#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 252#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
253#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
253#define PACKET3_SCRATCH_RAM_WRITE 0x7D 254#define PACKET3_SCRATCH_RAM_WRITE 0x7D
254#define PACKET3_SCRATCH_RAM_READ 0x7E 255#define PACKET3_SCRATCH_RAM_READ 0x7E
255#define PACKET3_LOAD_CONST_RAM 0x80 256#define PACKET3_LOAD_CONST_RAM 0x80
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 923df2c0e535..5ed00692618e 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,6 +219,19 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
219} 219}
220 220
221/** 221/**
222 * tonga_ih_prescreen_iv - prescreen an interrupt vector
223 *
224 * @adev: amdgpu_device pointer
225 *
226 * Returns true if the interrupt vector should be further processed.
227 */
228static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev)
229{
230 /* Process all interrupts */
231 return true;
232}
233
234/**
222 * tonga_ih_decode_iv - decode an interrupt vector 235 * tonga_ih_decode_iv - decode an interrupt vector
223 * 236 *
224 * @adev: amdgpu_device pointer 237 * @adev: amdgpu_device pointer
@@ -478,6 +491,7 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
478 491
479static const struct amdgpu_ih_funcs tonga_ih_funcs = { 492static const struct amdgpu_ih_funcs tonga_ih_funcs = {
480 .get_wptr = tonga_ih_get_wptr, 493 .get_wptr = tonga_ih_get_wptr,
494 .prescreen_iv = tonga_ih_prescreen_iv,
481 .decode_iv = tonga_ih_decode_iv, 495 .decode_iv = tonga_ih_decode_iv,
482 .set_rptr = tonga_ih_set_rptr 496 .set_rptr = tonga_ih_set_rptr
483}; 497};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 23a85750edd6..b8ed8faf2003 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1161,7 +1161,7 @@ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1161 */ 1161 */
1162static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 1162static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
1163{ 1163{
1164 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); 1164 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
1165 amdgpu_ring_write(ring, 1); 1165 amdgpu_ring_write(ring, 1);
1166} 1166}
1167 1167
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 11134d5f7443..75745544600a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1011,10 +1011,6 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1011{ 1011{
1012 DRM_DEBUG("IH: VCE\n"); 1012 DRM_DEBUG("IH: VCE\n");
1013 1013
1014 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1015 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1016 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1017
1018 switch (entry->src_data[0]) { 1014 switch (entry->src_data[0]) {
1019 case 0: 1015 case 0:
1020 case 1: 1016 case 1:
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 21e7b88401e1..1eb4d79d6e30 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -812,7 +812,7 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
812 */ 812 */
813static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 813static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
814{ 814{
815 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); 815 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
816 amdgpu_ring_write(ring, 1); 816 amdgpu_ring_write(ring, 1);
817} 817}
818 818
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 56150e8d1ed2..a3b30d84dbb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -219,14 +219,92 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
219 wptr, adev->irq.ih.rptr, tmp); 219 wptr, adev->irq.ih.rptr, tmp);
220 adev->irq.ih.rptr = tmp; 220 adev->irq.ih.rptr = tmp;
221 221
222 tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); 222 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL));
223 tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); 223 tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
224 WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); 224 WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp);
225 } 225 }
226 return (wptr & adev->irq.ih.ptr_mask); 226 return (wptr & adev->irq.ih.ptr_mask);
227} 227}
228 228
229/** 229/**
230 * vega10_ih_prescreen_iv - prescreen an interrupt vector
231 *
232 * @adev: amdgpu_device pointer
233 *
234 * Returns true if the interrupt vector should be further processed.
235 */
236static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
237{
238 u32 ring_index = adev->irq.ih.rptr >> 2;
239 u32 dw0, dw3, dw4, dw5;
240 u16 pasid;
241 u64 addr, key;
242 struct amdgpu_vm *vm;
243 int r;
244
245 dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
246 dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
247 dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
248 dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
249
250 /* Filter retry page faults, let only the first one pass. If
251 * there are too many outstanding faults, ignore them until
252 * some faults get cleared.
253 */
254 switch (dw0 & 0xff) {
255 case AMDGPU_IH_CLIENTID_VMC:
256 case AMDGPU_IH_CLIENTID_UTCL2:
257 break;
258 default:
259 /* Not a VM fault */
260 return true;
261 }
262
263 /* Not a retry fault */
264 if (!(dw5 & 0x80))
265 return true;
266
267 pasid = dw3 & 0xffff;
268 /* No PASID, can't identify faulting process */
269 if (!pasid)
270 return true;
271
272 addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
273 key = AMDGPU_VM_FAULT(pasid, addr);
274 r = amdgpu_ih_add_fault(adev, key);
275
276 /* Hash table is full or the fault is already being processed,
277 * ignore further page faults
278 */
279 if (r != 0)
280 goto ignore_iv;
281
282 /* Track retry faults in per-VM fault FIFO. */
283 spin_lock(&adev->vm_manager.pasid_lock);
284 vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
285 spin_unlock(&adev->vm_manager.pasid_lock);
286 if (WARN_ON_ONCE(!vm)) {
287 /* VM not found, process it normally */
288 amdgpu_ih_clear_fault(adev, key);
289 return true;
290 }
291 /* No locking required with single writer and single reader */
292 r = kfifo_put(&vm->faults, key);
293 if (!r) {
294 /* FIFO is full. Ignore it until there is space */
295 amdgpu_ih_clear_fault(adev, key);
296 goto ignore_iv;
297 }
298
299 /* It's the first fault for this address, process it normally */
300 return true;
301
302ignore_iv:
303 adev->irq.ih.rptr += 32;
304 return false;
305}
306
307/**
230 * vega10_ih_decode_iv - decode an interrupt vector 308 * vega10_ih_decode_iv - decode an interrupt vector
231 * 309 *
232 * @adev: amdgpu_device pointer 310 * @adev: amdgpu_device pointer
@@ -310,6 +388,14 @@ static int vega10_ih_sw_init(void *handle)
310 adev->irq.ih.use_doorbell = true; 388 adev->irq.ih.use_doorbell = true;
311 adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; 389 adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1;
312 390
391 adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL);
392 if (!adev->irq.ih.faults)
393 return -ENOMEM;
394 INIT_CHASH_TABLE(adev->irq.ih.faults->hash,
395 AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
396 spin_lock_init(&adev->irq.ih.faults->lock);
397 adev->irq.ih.faults->count = 0;
398
313 r = amdgpu_irq_init(adev); 399 r = amdgpu_irq_init(adev);
314 400
315 return r; 401 return r;
@@ -322,6 +408,9 @@ static int vega10_ih_sw_fini(void *handle)
322 amdgpu_irq_fini(adev); 408 amdgpu_irq_fini(adev);
323 amdgpu_ih_ring_fini(adev); 409 amdgpu_ih_ring_fini(adev);
324 410
411 kfree(adev->irq.ih.faults);
412 adev->irq.ih.faults = NULL;
413
325 return 0; 414 return 0;
326} 415}
327 416
@@ -410,6 +499,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = {
410 499
411static const struct amdgpu_ih_funcs vega10_ih_funcs = { 500static const struct amdgpu_ih_funcs vega10_ih_funcs = {
412 .get_wptr = vega10_ih_get_wptr, 501 .get_wptr = vega10_ih_get_wptr,
502 .prescreen_iv = vega10_ih_prescreen_iv,
413 .decode_iv = vega10_ih_decode_iv, 503 .decode_iv = vega10_ih_decode_iv,
414 .set_rptr = vega10_ih_set_rptr 504 .set_rptr = vega10_ih_set_rptr
415}; 505};
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 6cac291c96da..9ff69b90df36 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1028,8 +1028,7 @@ static int vi_common_early_init(void *handle)
1028 /* rev0 hardware requires workarounds to support PG */ 1028 /* rev0 hardware requires workarounds to support PG */
1029 adev->pg_flags = 0; 1029 adev->pg_flags = 0;
1030 if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { 1030 if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) {
1031 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1031 adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG |
1032 AMD_PG_SUPPORT_GFX_SMG |
1033 AMD_PG_SUPPORT_GFX_PIPELINE | 1032 AMD_PG_SUPPORT_GFX_PIPELINE |
1034 AMD_PG_SUPPORT_CP | 1033 AMD_PG_SUPPORT_CP |
1035 AMD_PG_SUPPORT_UVD | 1034 AMD_PG_SUPPORT_UVD |