aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c179
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c1577
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c550
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c716
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c412
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c290
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c164
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c303
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c137
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c399
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/emu_soc.c (renamed from drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h)18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c435
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c181
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c200
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c120
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/amdgpu/vce_v4_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h2
122 files changed, 6951 insertions, 4599 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index d6e5b7273853..2ca2b5154d52 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
30ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ 30ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
31 -I$(FULL_AMD_PATH)/include \ 31 -I$(FULL_AMD_PATH)/include \
32 -I$(FULL_AMD_PATH)/amdgpu \ 32 -I$(FULL_AMD_PATH)/amdgpu \
33 -I$(FULL_AMD_PATH)/scheduler \
34 -I$(FULL_AMD_PATH)/powerplay/inc \ 33 -I$(FULL_AMD_PATH)/powerplay/inc \
35 -I$(FULL_AMD_PATH)/acp/include \ 34 -I$(FULL_AMD_PATH)/acp/include \
36 -I$(FULL_AMD_DISPLAY_PATH) \ 35 -I$(FULL_AMD_DISPLAY_PATH) \
@@ -63,7 +62,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
63amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o 62amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
64 63
65amdgpu-y += \ 64amdgpu-y += \
66 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o 65 vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
67 66
68# add GMC block 67# add GMC block
69amdgpu-y += \ 68amdgpu-y += \
@@ -88,8 +87,7 @@ amdgpu-y += \
88 87
89# add SMC block 88# add SMC block
90amdgpu-y += \ 89amdgpu-y += \
91 amdgpu_dpm.o \ 90 amdgpu_dpm.o
92 amdgpu_powerplay.o
93 91
94# add DCE block 92# add DCE block
95amdgpu-y += \ 93amdgpu-y += \
@@ -130,6 +128,8 @@ amdgpu-y += \
130# add amdkfd interfaces 128# add amdkfd interfaces
131amdgpu-y += \ 129amdgpu-y += \
132 amdgpu_amdkfd.o \ 130 amdgpu_amdkfd.o \
131 amdgpu_amdkfd_fence.o \
132 amdgpu_amdkfd_gpuvm.o \
133 amdgpu_amdkfd_gfx_v8.o 133 amdgpu_amdkfd_gfx_v8.o
134 134
135# add cgs 135# add cgs
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 74edba18b159..c8b605f3dc05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -68,6 +68,7 @@
68#include "amdgpu_vce.h" 68#include "amdgpu_vce.h"
69#include "amdgpu_vcn.h" 69#include "amdgpu_vcn.h"
70#include "amdgpu_mn.h" 70#include "amdgpu_mn.h"
71#include "amdgpu_gmc.h"
71#include "amdgpu_dm.h" 72#include "amdgpu_dm.h"
72#include "amdgpu_virt.h" 73#include "amdgpu_virt.h"
73#include "amdgpu_gart.h" 74#include "amdgpu_gart.h"
@@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit;
127extern int amdgpu_lbpw; 128extern int amdgpu_lbpw;
128extern int amdgpu_compute_multipipe; 129extern int amdgpu_compute_multipipe;
129extern int amdgpu_gpu_recovery; 130extern int amdgpu_gpu_recovery;
131extern int amdgpu_emu_mode;
130 132
131#ifdef CONFIG_DRM_AMDGPU_SI 133#ifdef CONFIG_DRM_AMDGPU_SI
132extern int amdgpu_si_support; 134extern int amdgpu_si_support;
@@ -179,10 +181,6 @@ extern int amdgpu_cik_support;
179#define CIK_CURSOR_WIDTH 128 181#define CIK_CURSOR_WIDTH 128
180#define CIK_CURSOR_HEIGHT 128 182#define CIK_CURSOR_HEIGHT 128
181 183
182/* GPU RESET flags */
183#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0)
184#define AMDGPU_RESET_INFO_FULLRESET (1 << 1)
185
186struct amdgpu_device; 184struct amdgpu_device;
187struct amdgpu_ib; 185struct amdgpu_ib;
188struct amdgpu_cs_parser; 186struct amdgpu_cs_parser;
@@ -318,13 +316,6 @@ struct amdgpu_vm_pte_funcs {
318 void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, 316 void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
319 uint64_t value, unsigned count, 317 uint64_t value, unsigned count,
320 uint32_t incr); 318 uint32_t incr);
321
322 /* maximum nums of PTEs/PDEs in a single operation */
323 uint32_t set_max_nums_pte_pde;
324
325 /* number of dw to reserve per operation */
326 unsigned set_pte_pde_num_dw;
327
328 /* for linear pte/pde updates without addr mapping */ 319 /* for linear pte/pde updates without addr mapping */
329 void (*set_pte_pde)(struct amdgpu_ib *ib, 320 void (*set_pte_pde)(struct amdgpu_ib *ib,
330 uint64_t pe, 321 uint64_t pe,
@@ -332,28 +323,6 @@ struct amdgpu_vm_pte_funcs {
332 uint32_t incr, uint64_t flags); 323 uint32_t incr, uint64_t flags);
333}; 324};
334 325
335/* provided by the gmc block */
336struct amdgpu_gart_funcs {
337 /* flush the vm tlb via mmio */
338 void (*flush_gpu_tlb)(struct amdgpu_device *adev,
339 uint32_t vmid);
340 /* write pte/pde updates using the cpu */
341 int (*set_pte_pde)(struct amdgpu_device *adev,
342 void *cpu_pt_addr, /* cpu addr of page table */
343 uint32_t gpu_page_idx, /* pte/pde to update */
344 uint64_t addr, /* addr to write into pte/pde */
345 uint64_t flags); /* access flags */
346 /* enable/disable PRT support */
347 void (*set_prt)(struct amdgpu_device *adev, bool enable);
348 /* set pte flags based per asic */
349 uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
350 uint32_t flags);
351 /* get the pde for a given mc addr */
352 void (*get_vm_pde)(struct amdgpu_device *adev, int level,
353 u64 *dst, u64 *flags);
354 uint32_t (*get_invalidate_req)(unsigned int vmid);
355};
356
357/* provided by the ih block */ 326/* provided by the ih block */
358struct amdgpu_ih_funcs { 327struct amdgpu_ih_funcs {
359 /* ring read/write ptr handling, called from interrupt context */ 328 /* ring read/write ptr handling, called from interrupt context */
@@ -371,14 +340,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev);
371bool amdgpu_read_bios(struct amdgpu_device *adev); 340bool amdgpu_read_bios(struct amdgpu_device *adev);
372 341
373/* 342/*
374 * Dummy page
375 */
376struct amdgpu_dummy_page {
377 struct page *page;
378 dma_addr_t addr;
379};
380
381/*
382 * Clocks 343 * Clocks
383 */ 344 */
384 345
@@ -418,8 +379,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
418struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, 379struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
419 struct drm_gem_object *gobj, 380 struct drm_gem_object *gobj,
420 int flags); 381 int flags);
421int amdgpu_gem_prime_pin(struct drm_gem_object *obj); 382struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
422void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); 383 struct dma_buf *dma_buf);
423struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); 384struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
424void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); 385void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
425void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); 386void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
@@ -480,7 +441,7 @@ struct amdgpu_sa_bo {
480void amdgpu_gem_force_release(struct amdgpu_device *adev); 441void amdgpu_gem_force_release(struct amdgpu_device *adev);
481int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, 442int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
482 int alignment, u32 initial_domain, 443 int alignment, u32 initial_domain,
483 u64 flags, bool kernel, 444 u64 flags, enum ttm_bo_type type,
484 struct reservation_object *resv, 445 struct reservation_object *resv,
485 struct drm_gem_object **obj); 446 struct drm_gem_object **obj);
486 447
@@ -494,56 +455,6 @@ int amdgpu_fence_slab_init(void);
494void amdgpu_fence_slab_fini(void); 455void amdgpu_fence_slab_fini(void);
495 456
496/* 457/*
497 * VMHUB structures, functions & helpers
498 */
499struct amdgpu_vmhub {
500 uint32_t ctx0_ptb_addr_lo32;
501 uint32_t ctx0_ptb_addr_hi32;
502 uint32_t vm_inv_eng0_req;
503 uint32_t vm_inv_eng0_ack;
504 uint32_t vm_context0_cntl;
505 uint32_t vm_l2_pro_fault_status;
506 uint32_t vm_l2_pro_fault_cntl;
507};
508
509/*
510 * GPU MC structures, functions & helpers
511 */
512struct amdgpu_mc {
513 resource_size_t aper_size;
514 resource_size_t aper_base;
515 resource_size_t agp_base;
516 /* for some chips with <= 32MB we need to lie
517 * about vram size near mc fb location */
518 u64 mc_vram_size;
519 u64 visible_vram_size;
520 u64 gart_size;
521 u64 gart_start;
522 u64 gart_end;
523 u64 vram_start;
524 u64 vram_end;
525 unsigned vram_width;
526 u64 real_vram_size;
527 int vram_mtrr;
528 u64 mc_mask;
529 const struct firmware *fw; /* MC firmware */
530 uint32_t fw_version;
531 struct amdgpu_irq_src vm_fault;
532 uint32_t vram_type;
533 uint32_t srbm_soft_reset;
534 bool prt_warning;
535 uint64_t stolen_size;
536 /* apertures */
537 u64 shared_aperture_start;
538 u64 shared_aperture_end;
539 u64 private_aperture_start;
540 u64 private_aperture_end;
541 /* protects concurrent invalidation */
542 spinlock_t invalidate_lock;
543 bool translate_further;
544};
545
546/*
547 * GPU doorbell structures, functions & helpers 458 * GPU doorbell structures, functions & helpers
548 */ 459 */
549typedef enum _AMDGPU_DOORBELL_ASSIGNMENT 460typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
@@ -979,6 +890,7 @@ struct amdgpu_gfx_funcs {
979 void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); 890 void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
980 void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst); 891 void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
981 void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); 892 void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
893 void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
982}; 894};
983 895
984struct amdgpu_ngg_buf { 896struct amdgpu_ngg_buf {
@@ -1125,8 +1037,9 @@ struct amdgpu_job {
1125 void *owner; 1037 void *owner;
1126 uint64_t fence_ctx; /* the fence_context this job uses */ 1038 uint64_t fence_ctx; /* the fence_context this job uses */
1127 bool vm_needs_flush; 1039 bool vm_needs_flush;
1128 unsigned vmid;
1129 uint64_t vm_pd_addr; 1040 uint64_t vm_pd_addr;
1041 unsigned vmid;
1042 unsigned pasid;
1130 uint32_t gds_base, gds_size; 1043 uint32_t gds_base, gds_size;
1131 uint32_t gws_base, gws_size; 1044 uint32_t gws_base, gws_size;
1132 uint32_t oa_base, oa_size; 1045 uint32_t oa_base, oa_size;
@@ -1169,8 +1082,6 @@ struct amdgpu_wb {
1169int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); 1082int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
1170void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); 1083void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
1171 1084
1172void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
1173
1174/* 1085/*
1175 * SDMA 1086 * SDMA
1176 */ 1087 */
@@ -1288,6 +1199,11 @@ struct amdgpu_asic_funcs {
1288 void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); 1199 void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
1289 /* get config memsize register */ 1200 /* get config memsize register */
1290 u32 (*get_config_memsize)(struct amdgpu_device *adev); 1201 u32 (*get_config_memsize)(struct amdgpu_device *adev);
1202 /* flush hdp write queue */
1203 void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
1204 /* invalidate hdp read cache */
1205 void (*invalidate_hdp)(struct amdgpu_device *adev,
1206 struct amdgpu_ring *ring);
1291}; 1207};
1292 1208
1293/* 1209/*
@@ -1431,7 +1347,7 @@ struct amdgpu_nbio_funcs {
1431 u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); 1347 u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
1432 u32 (*get_rev_id)(struct amdgpu_device *adev); 1348 u32 (*get_rev_id)(struct amdgpu_device *adev);
1433 void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); 1349 void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
1434 void (*hdp_flush)(struct amdgpu_device *adev); 1350 void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
1435 u32 (*get_memsize)(struct amdgpu_device *adev); 1351 u32 (*get_memsize)(struct amdgpu_device *adev);
1436 void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, 1352 void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
1437 bool use_doorbell, int doorbell_index); 1353 bool use_doorbell, int doorbell_index);
@@ -1463,6 +1379,7 @@ enum amd_hw_ip_block_type {
1463 ATHUB_HWIP, 1379 ATHUB_HWIP,
1464 NBIO_HWIP, 1380 NBIO_HWIP,
1465 MP0_HWIP, 1381 MP0_HWIP,
1382 MP1_HWIP,
1466 UVD_HWIP, 1383 UVD_HWIP,
1467 VCN_HWIP = UVD_HWIP, 1384 VCN_HWIP = UVD_HWIP,
1468 VCE_HWIP, 1385 VCE_HWIP,
@@ -1472,15 +1389,14 @@ enum amd_hw_ip_block_type {
1472 SMUIO_HWIP, 1389 SMUIO_HWIP,
1473 PWR_HWIP, 1390 PWR_HWIP,
1474 NBIF_HWIP, 1391 NBIF_HWIP,
1392 THM_HWIP,
1475 MAX_HWIP 1393 MAX_HWIP
1476}; 1394};
1477 1395
1478#define HWIP_MAX_INSTANCE 6 1396#define HWIP_MAX_INSTANCE 6
1479 1397
1480struct amd_powerplay { 1398struct amd_powerplay {
1481 struct cgs_device *cgs_device;
1482 void *pp_handle; 1399 void *pp_handle;
1483 const struct amd_ip_funcs *ip_funcs;
1484 const struct amd_pm_funcs *pp_funcs; 1400 const struct amd_pm_funcs *pp_funcs;
1485}; 1401};
1486 1402
@@ -1504,6 +1420,7 @@ struct amdgpu_device {
1504 const struct amdgpu_asic_funcs *asic_funcs; 1420 const struct amdgpu_asic_funcs *asic_funcs;
1505 bool shutdown; 1421 bool shutdown;
1506 bool need_dma32; 1422 bool need_dma32;
1423 bool need_swiotlb;
1507 bool accel_working; 1424 bool accel_working;
1508 struct work_struct reset_work; 1425 struct work_struct reset_work;
1509 struct notifier_block acpi_nb; 1426 struct notifier_block acpi_nb;
@@ -1573,9 +1490,9 @@ struct amdgpu_device {
1573 struct amdgpu_clock clock; 1490 struct amdgpu_clock clock;
1574 1491
1575 /* MC */ 1492 /* MC */
1576 struct amdgpu_mc mc; 1493 struct amdgpu_gmc gmc;
1577 struct amdgpu_gart gart; 1494 struct amdgpu_gart gart;
1578 struct amdgpu_dummy_page dummy_page; 1495 dma_addr_t dummy_page_addr;
1579 struct amdgpu_vm_manager vm_manager; 1496 struct amdgpu_vm_manager vm_manager;
1580 struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; 1497 struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
1581 1498
@@ -1714,6 +1631,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
1714 uint32_t acc_flags); 1631 uint32_t acc_flags);
1715void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 1632void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
1716 uint32_t acc_flags); 1633 uint32_t acc_flags);
1634void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
1635uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
1636
1717u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); 1637u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
1718void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); 1638void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
1719 1639
@@ -1725,6 +1645,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
1725bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); 1645bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
1726bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); 1646bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
1727 1647
1648int emu_soc_asic_init(struct amdgpu_device *adev);
1649
1728/* 1650/*
1729 * Registers read & write functions. 1651 * Registers read & write functions.
1730 */ 1652 */
@@ -1735,6 +1657,9 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
1735#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) 1657#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
1736#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) 1658#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
1737 1659
1660#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
1661#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
1662
1738#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) 1663#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
1739#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) 1664#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
1740#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) 1665#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
@@ -1837,13 +1762,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1837#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) 1762#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
1838#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) 1763#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
1839#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) 1764#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
1840#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) 1765#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
1841#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) 1766#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
1842#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) 1767#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
1768#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
1769#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
1770#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
1771#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
1772#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
1843#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) 1773#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
1844#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) 1774#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
1845#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) 1775#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
1846#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
1847#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) 1776#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
1848#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) 1777#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
1849#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) 1778#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1856,11 +1785,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1856#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) 1785#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
1857#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) 1786#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
1858#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) 1787#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
1859#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
1860#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) 1788#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
1861#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) 1789#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
1862#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) 1790#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
1863#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) 1791#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
1792#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
1864#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) 1793#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
1865#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 1794#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
1866#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 1795#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
@@ -1870,7 +1799,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1870#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) 1799#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
1871#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) 1800#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
1872#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) 1801#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
1873#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc))
1874#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) 1802#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
1875#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) 1803#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
1876#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h)) 1804#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
@@ -1887,26 +1815,24 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1887#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) 1815#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
1888#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a)) 1816#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
1889#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) 1817#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
1818#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
1890 1819
1891/* Common functions */ 1820/* Common functions */
1892int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 1821int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
1893 struct amdgpu_job* job, bool force); 1822 struct amdgpu_job* job, bool force);
1894void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); 1823void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
1895bool amdgpu_device_need_post(struct amdgpu_device *adev); 1824bool amdgpu_device_need_post(struct amdgpu_device *adev);
1896void amdgpu_update_display_priority(struct amdgpu_device *adev); 1825void amdgpu_display_update_priority(struct amdgpu_device *adev);
1897 1826
1898void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, 1827void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
1899 u64 num_vis_bytes); 1828 u64 num_vis_bytes);
1900void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); 1829void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
1901bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); 1830bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
1902void amdgpu_device_vram_location(struct amdgpu_device *adev, 1831void amdgpu_device_vram_location(struct amdgpu_device *adev,
1903 struct amdgpu_mc *mc, u64 base); 1832 struct amdgpu_gmc *mc, u64 base);
1904void amdgpu_device_gart_location(struct amdgpu_device *adev, 1833void amdgpu_device_gart_location(struct amdgpu_device *adev,
1905 struct amdgpu_mc *mc); 1834 struct amdgpu_gmc *mc);
1906int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); 1835int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
1907void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
1908int amdgpu_ttm_init(struct amdgpu_device *adev);
1909void amdgpu_ttm_fini(struct amdgpu_device *adev);
1910void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 1836void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1911 const u32 *registers, 1837 const u32 *registers,
1912 const u32 array_size); 1838 const u32 array_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 57afad79f55d..8fa850a070e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
540 size_t size; 540 size_t size;
541 u32 retry = 3; 541 u32 retry = 3;
542 542
543 if (amdgpu_acpi_pcie_notify_device_ready(adev))
544 return -EINVAL;
545
543 /* Get the device handle */ 546 /* Get the device handle */
544 handle = ACPI_HANDLE(&adev->pdev->dev); 547 handle = ACPI_HANDLE(&adev->pdev->dev);
545 if (!handle) 548 if (!handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1d605e1c1d66..4d36203ffb11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,6 +30,8 @@
30const struct kgd2kfd_calls *kgd2kfd; 30const struct kgd2kfd_calls *kgd2kfd;
31bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); 31bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
32 32
33static const unsigned int compute_vmid_bitmap = 0xFF00;
34
33int amdgpu_amdkfd_init(void) 35int amdgpu_amdkfd_init(void)
34{ 36{
35 int ret; 37 int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
56#else 58#else
57 ret = -ENOENT; 59 ret = -ENOENT;
58#endif 60#endif
61 amdgpu_amdkfd_gpuvm_init_mem_limits();
59 62
60 return ret; 63 return ret;
61} 64}
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
78 switch (adev->asic_type) { 81 switch (adev->asic_type) {
79#ifdef CONFIG_DRM_AMDGPU_CIK 82#ifdef CONFIG_DRM_AMDGPU_CIK
80 case CHIP_KAVERI: 83 case CHIP_KAVERI:
84 case CHIP_HAWAII:
81 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); 85 kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
82 break; 86 break;
83#endif 87#endif
84 case CHIP_CARRIZO: 88 case CHIP_CARRIZO:
89 case CHIP_TONGA:
90 case CHIP_FIJI:
91 case CHIP_POLARIS10:
92 case CHIP_POLARIS11:
85 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); 93 kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
86 break; 94 break;
87 default: 95 default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
132 int last_valid_bit; 140 int last_valid_bit;
133 if (adev->kfd) { 141 if (adev->kfd) {
134 struct kgd2kfd_shared_resources gpu_resources = { 142 struct kgd2kfd_shared_resources gpu_resources = {
135 .compute_vmid_bitmap = 0xFF00, 143 .compute_vmid_bitmap = compute_vmid_bitmap,
136 .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, 144 .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
137 .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe 145 .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
146 .gpuvm_size = min(adev->vm_manager.max_pfn
147 << AMDGPU_GPU_PAGE_SHIFT,
148 AMDGPU_VA_HOLE_START),
149 .drm_render_minor = adev->ddev->render->index
138 }; 150 };
139 151
140 /* this is going to have a few of the MSBs set that we need to 152 /* this is going to have a few of the MSBs set that we need to
@@ -204,20 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
204 void **cpu_ptr) 216 void **cpu_ptr)
205{ 217{
206 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 218 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
207 struct kgd_mem **mem = (struct kgd_mem **) mem_obj; 219 struct amdgpu_bo *bo = NULL;
208 int r; 220 int r;
221 uint64_t gpu_addr_tmp = 0;
222 void *cpu_ptr_tmp = NULL;
209 223
210 BUG_ON(kgd == NULL); 224 r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
211 BUG_ON(gpu_addr == NULL); 225 AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
212 BUG_ON(cpu_ptr == NULL); 226 NULL, &bo);
213
214 *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
215 if ((*mem) == NULL)
216 return -ENOMEM;
217
218 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
219 AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0,
220 &(*mem)->bo);
221 if (r) { 227 if (r) {
222 dev_err(adev->dev, 228 dev_err(adev->dev,
223 "failed to allocate BO for amdkfd (%d)\n", r); 229 "failed to allocate BO for amdkfd (%d)\n", r);
@@ -225,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
225 } 231 }
226 232
227 /* map the buffer */ 233 /* map the buffer */
228 r = amdgpu_bo_reserve((*mem)->bo, true); 234 r = amdgpu_bo_reserve(bo, true);
229 if (r) { 235 if (r) {
230 dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 236 dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
231 goto allocate_mem_reserve_bo_failed; 237 goto allocate_mem_reserve_bo_failed;
232 } 238 }
233 239
234 r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, 240 r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
235 &(*mem)->gpu_addr); 241 &gpu_addr_tmp);
236 if (r) { 242 if (r) {
237 dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); 243 dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
238 goto allocate_mem_pin_bo_failed; 244 goto allocate_mem_pin_bo_failed;
239 } 245 }
240 *gpu_addr = (*mem)->gpu_addr;
241 246
242 r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); 247 r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
243 if (r) { 248 if (r) {
244 dev_err(adev->dev, 249 dev_err(adev->dev,
245 "(%d) failed to map bo to kernel for amdkfd\n", r); 250 "(%d) failed to map bo to kernel for amdkfd\n", r);
246 goto allocate_mem_kmap_bo_failed; 251 goto allocate_mem_kmap_bo_failed;
247 } 252 }
248 *cpu_ptr = (*mem)->cpu_ptr;
249 253
250 amdgpu_bo_unreserve((*mem)->bo); 254 *mem_obj = bo;
255 *gpu_addr = gpu_addr_tmp;
256 *cpu_ptr = cpu_ptr_tmp;
257
258 amdgpu_bo_unreserve(bo);
251 259
252 return 0; 260 return 0;
253 261
254allocate_mem_kmap_bo_failed: 262allocate_mem_kmap_bo_failed:
255 amdgpu_bo_unpin((*mem)->bo); 263 amdgpu_bo_unpin(bo);
256allocate_mem_pin_bo_failed: 264allocate_mem_pin_bo_failed:
257 amdgpu_bo_unreserve((*mem)->bo); 265 amdgpu_bo_unreserve(bo);
258allocate_mem_reserve_bo_failed: 266allocate_mem_reserve_bo_failed:
259 amdgpu_bo_unref(&(*mem)->bo); 267 amdgpu_bo_unref(&bo);
260 268
261 return r; 269 return r;
262} 270}
263 271
264void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 272void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
265{ 273{
266 struct kgd_mem *mem = (struct kgd_mem *) mem_obj; 274 struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
267
268 BUG_ON(mem == NULL);
269 275
270 amdgpu_bo_reserve(mem->bo, true); 276 amdgpu_bo_reserve(bo, true);
271 amdgpu_bo_kunmap(mem->bo); 277 amdgpu_bo_kunmap(bo);
272 amdgpu_bo_unpin(mem->bo); 278 amdgpu_bo_unpin(bo);
273 amdgpu_bo_unreserve(mem->bo); 279 amdgpu_bo_unreserve(bo);
274 amdgpu_bo_unref(&(mem->bo)); 280 amdgpu_bo_unref(&(bo));
275 kfree(mem);
276} 281}
277 282
278void get_local_mem_info(struct kgd_dev *kgd, 283void get_local_mem_info(struct kgd_dev *kgd,
@@ -281,24 +286,29 @@ void get_local_mem_info(struct kgd_dev *kgd,
281 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 286 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
282 uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : 287 uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
283 ~((1ULL << 32) - 1); 288 ~((1ULL << 32) - 1);
284 resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; 289 resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
285 290
286 memset(mem_info, 0, sizeof(*mem_info)); 291 memset(mem_info, 0, sizeof(*mem_info));
287 if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { 292 if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
288 mem_info->local_mem_size_public = adev->mc.visible_vram_size; 293 mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
289 mem_info->local_mem_size_private = adev->mc.real_vram_size - 294 mem_info->local_mem_size_private = adev->gmc.real_vram_size -
290 adev->mc.visible_vram_size; 295 adev->gmc.visible_vram_size;
291 } else { 296 } else {
292 mem_info->local_mem_size_public = 0; 297 mem_info->local_mem_size_public = 0;
293 mem_info->local_mem_size_private = adev->mc.real_vram_size; 298 mem_info->local_mem_size_private = adev->gmc.real_vram_size;
294 } 299 }
295 mem_info->vram_width = adev->mc.vram_width; 300 mem_info->vram_width = adev->gmc.vram_width;
296 301
297 pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", 302 pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
298 &adev->mc.aper_base, &aper_limit, 303 &adev->gmc.aper_base, &aper_limit,
299 mem_info->local_mem_size_public, 304 mem_info->local_mem_size_public,
300 mem_info->local_mem_size_private); 305 mem_info->local_mem_size_private);
301 306
307 if (amdgpu_emu_mode == 1) {
308 mem_info->mem_clk_max = 100;
309 return;
310 }
311
302 if (amdgpu_sriov_vf(adev)) 312 if (amdgpu_sriov_vf(adev))
303 mem_info->mem_clk_max = adev->clock.default_mclk / 100; 313 mem_info->mem_clk_max = adev->clock.default_mclk / 100;
304 else 314 else
@@ -319,6 +329,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
319 struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 329 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
320 330
321 /* the sclk is in quantas of 10kHz */ 331 /* the sclk is in quantas of 10kHz */
332 if (amdgpu_emu_mode == 1)
333 return 100;
334
322 if (amdgpu_sriov_vf(adev)) 335 if (amdgpu_sriov_vf(adev))
323 return adev->clock.default_sclk / 100; 336 return adev->clock.default_sclk / 100;
324 337
@@ -354,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
354 367
355 return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 368 return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
356} 369}
370
371int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
372 uint32_t vmid, uint64_t gpu_addr,
373 uint32_t *ib_cmd, uint32_t ib_len)
374{
375 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
376 struct amdgpu_job *job;
377 struct amdgpu_ib *ib;
378 struct amdgpu_ring *ring;
379 struct dma_fence *f = NULL;
380 int ret;
381
382 switch (engine) {
383 case KGD_ENGINE_MEC1:
384 ring = &adev->gfx.compute_ring[0];
385 break;
386 case KGD_ENGINE_SDMA1:
387 ring = &adev->sdma.instance[0].ring;
388 break;
389 case KGD_ENGINE_SDMA2:
390 ring = &adev->sdma.instance[1].ring;
391 break;
392 default:
393 pr_err("Invalid engine in IB submission: %d\n", engine);
394 ret = -EINVAL;
395 goto err;
396 }
397
398 ret = amdgpu_job_alloc(adev, 1, &job, NULL);
399 if (ret)
400 goto err;
401
402 ib = &job->ibs[0];
403 memset(ib, 0, sizeof(struct amdgpu_ib));
404
405 ib->gpu_addr = gpu_addr;
406 ib->ptr = ib_cmd;
407 ib->length_dw = ib_len;
408 /* This works for NO_HWS. TODO: need to handle without knowing VMID */
409 job->vmid = vmid;
410
411 ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
412 if (ret) {
413 DRM_ERROR("amdgpu: failed to schedule IB.\n");
414 goto err_ib_sched;
415 }
416
417 ret = dma_fence_wait(f, false);
418
419err_ib_sched:
420 dma_fence_put(f);
421 amdgpu_job_free(job);
422err:
423 return ret;
424}
425
426bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
427{
428 if (adev->kfd) {
429 if ((1 << vmid) & compute_vmid_bitmap)
430 return true;
431 }
432
433 return false;
434}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2a519f9062ee..c2c2bea731e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,15 +26,71 @@
26#define AMDGPU_AMDKFD_H_INCLUDED 26#define AMDGPU_AMDKFD_H_INCLUDED
27 27
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/mm.h>
29#include <linux/mmu_context.h> 30#include <linux/mmu_context.h>
30#include <kgd_kfd_interface.h> 31#include <kgd_kfd_interface.h>
32#include <drm/ttm/ttm_execbuf_util.h>
33#include "amdgpu_sync.h"
34#include "amdgpu_vm.h"
35
36extern const struct kgd2kfd_calls *kgd2kfd;
31 37
32struct amdgpu_device; 38struct amdgpu_device;
33 39
40struct kfd_bo_va_list {
41 struct list_head bo_list;
42 struct amdgpu_bo_va *bo_va;
43 void *kgd_dev;
44 bool is_mapped;
45 uint64_t va;
46 uint64_t pte_flags;
47};
48
34struct kgd_mem { 49struct kgd_mem {
50 struct mutex lock;
35 struct amdgpu_bo *bo; 51 struct amdgpu_bo *bo;
36 uint64_t gpu_addr; 52 struct list_head bo_va_list;
37 void *cpu_ptr; 53 /* protected by amdkfd_process_info.lock */
54 struct ttm_validate_buffer validate_list;
55 struct ttm_validate_buffer resv_list;
56 uint32_t domain;
57 unsigned int mapped_to_gpu_memory;
58 uint64_t va;
59
60 uint32_t mapping_flags;
61
62 struct amdkfd_process_info *process_info;
63
64 struct amdgpu_sync sync;
65
66 bool aql_queue;
67};
68
69/* KFD Memory Eviction */
70struct amdgpu_amdkfd_fence {
71 struct dma_fence base;
72 struct mm_struct *mm;
73 spinlock_t lock;
74 char timeline_name[TASK_COMM_LEN];
75};
76
77struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
78 struct mm_struct *mm);
79bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
80struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
81
82struct amdkfd_process_info {
83 /* List head of all VMs that belong to a KFD process */
84 struct list_head vm_list_head;
85 /* List head for all KFD BOs that belong to a KFD process. */
86 struct list_head kfd_bo_list;
87 /* Lock to protect kfd_bo_list */
88 struct mutex lock;
89
90 /* Number of VMs */
91 unsigned int n_vms;
92 /* Eviction Fence */
93 struct amdgpu_amdkfd_fence *eviction_fence;
38}; 94};
39 95
40int amdgpu_amdkfd_init(void); 96int amdgpu_amdkfd_init(void);
@@ -48,9 +104,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
48void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); 104void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
49void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); 105void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
50 106
107int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
108 uint32_t vmid, uint64_t gpu_addr,
109 uint32_t *ib_cmd, uint32_t ib_len);
110
51struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); 111struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
52struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); 112struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
53 113
114bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
115
54/* Shared API */ 116/* Shared API */
55int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 117int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
56 void **mem_obj, uint64_t *gpu_addr, 118 void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +141,36 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
79 valid; \ 141 valid; \
80 }) 142 })
81 143
144/* GPUVM API */
145int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
146 void **process_info,
147 struct dma_fence **ef);
148int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
149 struct file *filp,
150 void **vm, void **process_info,
151 struct dma_fence **ef);
152void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
153 struct amdgpu_vm *vm);
154void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
155uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
156int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
157 struct kgd_dev *kgd, uint64_t va, uint64_t size,
158 void *vm, struct kgd_mem **mem,
159 uint64_t *offset, uint32_t flags);
160int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
161 struct kgd_dev *kgd, struct kgd_mem *mem);
162int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
163 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
164int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
165 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
166int amdgpu_amdkfd_gpuvm_sync_memory(
167 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
168int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
169 struct kgd_mem *mem, void **kptr, uint64_t *size);
170int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
171 struct dma_fence **ef);
172
173void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
174void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
175
82#endif /* AMDGPU_AMDKFD_H_INCLUDED */ 176#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644
index 000000000000..2c14025e5e76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,179 @@
1/*
2 * Copyright 2016-2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include <linux/dma-fence.h>
24#include <linux/spinlock.h>
25#include <linux/atomic.h>
26#include <linux/stacktrace.h>
27#include <linux/sched.h>
28#include <linux/slab.h>
29#include <linux/sched/mm.h>
30#include "amdgpu_amdkfd.h"
31
32static const struct dma_fence_ops amdkfd_fence_ops;
33static atomic_t fence_seq = ATOMIC_INIT(0);
34
35/* Eviction Fence
36 * Fence helper functions to deal with KFD memory eviction.
37 * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
38 * evicted unless all the user queues for that process are evicted.
39 *
40 * All the BOs in a process share an eviction fence. When process X wants
41 * to map VRAM memory but TTM can't find enough space, TTM will attempt to
42 * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
43 * by calling ttm_bo_driver->eviction_valuable().
44 *
45 * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
46 * to process X. Otherwise, it will return true to indicate BO can be
47 * evicted by TTM.
48 *
49 * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
50 * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
51 * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
52 *
53 * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
54 * nofity when the BO is free to move. fence_add_callback --> enable_signaling
55 * --> amdgpu_amdkfd_fence.enable_signaling
56 *
57 * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
58 * user queues and signal fence. The work item will also start another delayed
59 * work item to restore BOs
60 */
61
62struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
63 struct mm_struct *mm)
64{
65 struct amdgpu_amdkfd_fence *fence;
66
67 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
68 if (fence == NULL)
69 return NULL;
70
71 /* This reference gets released in amdkfd_fence_release */
72 mmgrab(mm);
73 fence->mm = mm;
74 get_task_comm(fence->timeline_name, current);
75 spin_lock_init(&fence->lock);
76
77 dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
78 context, atomic_inc_return(&fence_seq));
79
80 return fence;
81}
82
83struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
84{
85 struct amdgpu_amdkfd_fence *fence;
86
87 if (!f)
88 return NULL;
89
90 fence = container_of(f, struct amdgpu_amdkfd_fence, base);
91 if (fence && f->ops == &amdkfd_fence_ops)
92 return fence;
93
94 return NULL;
95}
96
97static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
98{
99 return "amdgpu_amdkfd_fence";
100}
101
102static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
103{
104 struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
105
106 return fence->timeline_name;
107}
108
109/**
110 * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
111 * a KFD BO and schedules a job to move the BO.
112 * If fence is already signaled return true.
113 * If fence is not signaled schedule a evict KFD process work item.
114 */
115static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
116{
117 struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
118
119 if (!fence)
120 return false;
121
122 if (dma_fence_is_signaled(f))
123 return true;
124
125 if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
126 return true;
127
128 return false;
129}
130
131/**
132 * amdkfd_fence_release - callback that fence can be freed
133 *
134 * @fence: fence
135 *
136 * This function is called when the reference count becomes zero.
137 * Drops the mm_struct reference and RCU schedules freeing up the fence.
138 */
139static void amdkfd_fence_release(struct dma_fence *f)
140{
141 struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
142
143 /* Unconditionally signal the fence. The process is getting
144 * terminated.
145 */
146 if (WARN_ON(!fence))
147 return; /* Not an amdgpu_amdkfd_fence */
148
149 mmdrop(fence->mm);
150 kfree_rcu(f, rcu);
151}
152
153/**
154 * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
155 * if same return TRUE else return FALSE.
156 *
157 * @f: [IN] fence
158 * @mm: [IN] mm that needs to be verified
159 */
160bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
161{
162 struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
163
164 if (!fence)
165 return false;
166 else if (fence->mm == mm)
167 return true;
168
169 return false;
170}
171
172static const struct dma_fence_ops amdkfd_fence_ops = {
173 .get_driver_name = amdkfd_fence_get_driver_name,
174 .get_timeline_name = amdkfd_fence_get_timeline_name,
175 .enable_signaling = amdkfd_fence_enable_signaling,
176 .signaled = NULL,
177 .wait = dma_fence_default_wait,
178 .release = amdkfd_fence_release,
179};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index a9e6aea0e5f8..ea54e53172b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
139static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); 139static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
140static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 140static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
141 uint8_t vmid); 141 uint8_t vmid);
142static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
143 142
144static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 143static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
145static void set_scratch_backing_va(struct kgd_dev *kgd, 144static void set_scratch_backing_va(struct kgd_dev *kgd,
146 uint64_t va, uint32_t vmid); 145 uint64_t va, uint32_t vmid);
146static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
147 uint32_t page_table_base);
148static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
149static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
147 150
148/* Because of REG_GET_FIELD() being used, we put this function in the 151/* Because of REG_GET_FIELD() being used, we put this function in the
149 * asic specific file. 152 * asic specific file.
@@ -196,12 +199,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
196 .address_watch_get_offset = kgd_address_watch_get_offset, 199 .address_watch_get_offset = kgd_address_watch_get_offset,
197 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, 200 .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
198 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, 201 .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
199 .write_vmid_invalidate_request = write_vmid_invalidate_request,
200 .get_fw_version = get_fw_version, 202 .get_fw_version = get_fw_version,
201 .set_scratch_backing_va = set_scratch_backing_va, 203 .set_scratch_backing_va = set_scratch_backing_va,
202 .get_tile_config = get_tile_config, 204 .get_tile_config = get_tile_config,
203 .get_cu_info = get_cu_info, 205 .get_cu_info = get_cu_info,
204 .get_vram_usage = amdgpu_amdkfd_get_vram_usage 206 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
207 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
208 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
209 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
210 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
211 .set_vm_context_page_table_base = set_vm_context_page_table_base,
212 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
213 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
214 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
215 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
216 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
217 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
218 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
219 .invalidate_tlbs = invalidate_tlbs,
220 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
221 .submit_ib = amdgpu_amdkfd_submit_ib,
205}; 222};
206 223
207struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 224struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -787,14 +804,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
787 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 804 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
788 805
789 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 806 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
790 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 807 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
791}
792
793static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
794{
795 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
796
797 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
798} 808}
799 809
800static void set_scratch_backing_va(struct kgd_dev *kgd, 810static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -812,8 +822,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
812 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 822 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
813 const union amdgpu_firmware_header *hdr; 823 const union amdgpu_firmware_header *hdr;
814 824
815 BUG_ON(kgd == NULL);
816
817 switch (type) { 825 switch (type) {
818 case KGD_ENGINE_PFP: 826 case KGD_ENGINE_PFP:
819 hdr = (const union amdgpu_firmware_header *) 827 hdr = (const union amdgpu_firmware_header *)
@@ -866,3 +874,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
866 return hdr->common.ucode_version; 874 return hdr->common.ucode_version;
867} 875}
868 876
877static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
878 uint32_t page_table_base)
879{
880 struct amdgpu_device *adev = get_amdgpu_device(kgd);
881
882 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
883 pr_err("trying to set page table base for wrong VMID\n");
884 return;
885 }
886 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
887}
888
889static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
890{
891 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
892 int vmid;
893 unsigned int tmp;
894
895 for (vmid = 0; vmid < 16; vmid++) {
896 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
897 continue;
898
899 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
900 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
901 (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
902 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
903 RREG32(mmVM_INVALIDATE_RESPONSE);
904 break;
905 }
906 }
907
908 return 0;
909}
910
911static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
912{
913 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
914
915 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
916 pr_err("non kfd vmid\n");
917 return 0;
918 }
919
920 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
921 RREG32(mmVM_INVALIDATE_RESPONSE);
922 return 0;
923}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index b127259d7d85..89264c9a5e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
81 uint32_t queue_id); 81 uint32_t queue_id);
82static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, 82static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
83 unsigned int utimeout); 83 unsigned int utimeout);
84static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
85static int kgd_address_watch_disable(struct kgd_dev *kgd); 84static int kgd_address_watch_disable(struct kgd_dev *kgd);
86static int kgd_address_watch_execute(struct kgd_dev *kgd, 85static int kgd_address_watch_execute(struct kgd_dev *kgd,
87 unsigned int watch_point_id, 86 unsigned int watch_point_id,
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
99 uint8_t vmid); 98 uint8_t vmid);
100static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, 99static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
101 uint8_t vmid); 100 uint8_t vmid);
102static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
103static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); 101static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
104static void set_scratch_backing_va(struct kgd_dev *kgd, 102static void set_scratch_backing_va(struct kgd_dev *kgd,
105 uint64_t va, uint32_t vmid); 103 uint64_t va, uint32_t vmid);
104static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
105 uint32_t page_table_base);
106static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
107static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
106 108
107/* Because of REG_GET_FIELD() being used, we put this function in the 109/* Because of REG_GET_FIELD() being used, we put this function in the
108 * asic specific file. 110 * asic specific file.
@@ -157,12 +159,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
157 get_atc_vmid_pasid_mapping_pasid, 159 get_atc_vmid_pasid_mapping_pasid,
158 .get_atc_vmid_pasid_mapping_valid = 160 .get_atc_vmid_pasid_mapping_valid =
159 get_atc_vmid_pasid_mapping_valid, 161 get_atc_vmid_pasid_mapping_valid,
160 .write_vmid_invalidate_request = write_vmid_invalidate_request,
161 .get_fw_version = get_fw_version, 162 .get_fw_version = get_fw_version,
162 .set_scratch_backing_va = set_scratch_backing_va, 163 .set_scratch_backing_va = set_scratch_backing_va,
163 .get_tile_config = get_tile_config, 164 .get_tile_config = get_tile_config,
164 .get_cu_info = get_cu_info, 165 .get_cu_info = get_cu_info,
165 .get_vram_usage = amdgpu_amdkfd_get_vram_usage 166 .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
167 .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
168 .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
169 .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
170 .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
171 .set_vm_context_page_table_base = set_vm_context_page_table_base,
172 .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
173 .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
174 .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
175 .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
176 .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
177 .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
178 .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
179 .invalidate_tlbs = invalidate_tlbs,
180 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
181 .submit_ib = amdgpu_amdkfd_submit_ib,
166}; 182};
167 183
168struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) 184struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -704,14 +720,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
704 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 720 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
705 721
706 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); 722 reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
707 return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; 723 return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
708}
709
710static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
711{
712 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
713
714 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
715} 724}
716 725
717static int kgd_address_watch_disable(struct kgd_dev *kgd) 726static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -775,8 +784,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
775 struct amdgpu_device *adev = (struct amdgpu_device *) kgd; 784 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
776 const union amdgpu_firmware_header *hdr; 785 const union amdgpu_firmware_header *hdr;
777 786
778 BUG_ON(kgd == NULL);
779
780 switch (type) { 787 switch (type) {
781 case KGD_ENGINE_PFP: 788 case KGD_ENGINE_PFP:
782 hdr = (const union amdgpu_firmware_header *) 789 hdr = (const union amdgpu_firmware_header *)
@@ -828,3 +835,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
828 /* Only 12 bit in use*/ 835 /* Only 12 bit in use*/
829 return hdr->common.ucode_version; 836 return hdr->common.ucode_version;
830} 837}
838
839static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
840 uint32_t page_table_base)
841{
842 struct amdgpu_device *adev = get_amdgpu_device(kgd);
843
844 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
845 pr_err("trying to set page table base for wrong VMID\n");
846 return;
847 }
848 WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
849}
850
851static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
852{
853 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
854 int vmid;
855 unsigned int tmp;
856
857 for (vmid = 0; vmid < 16; vmid++) {
858 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
859 continue;
860
861 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
862 if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
863 (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
864 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
865 RREG32(mmVM_INVALIDATE_RESPONSE);
866 break;
867 }
868 }
869
870 return 0;
871}
872
873static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
874{
875 struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
876
877 if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
878 pr_err("non kfd vmid %d\n", vmid);
879 return -EINVAL;
880 }
881
882 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
883 RREG32(mmVM_INVALIDATE_RESPONSE);
884 return 0;
885}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
new file mode 100644
index 000000000000..1d6e1479da38
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -0,0 +1,1577 @@
1/*
2 * Copyright 2014-2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#define pr_fmt(fmt) "kfd2kgd: " fmt
24
25#include <linux/list.h>
26#include <drm/drmP.h>
27#include "amdgpu_object.h"
28#include "amdgpu_vm.h"
29#include "amdgpu_amdkfd.h"
30
31/* Special VM and GART address alignment needed for VI pre-Fiji due to
32 * a HW bug.
33 */
34#define VI_BO_SIZE_ALIGN (0x8000)
35
36/* Impose limit on how much memory KFD can use */
37static struct {
38 uint64_t max_system_mem_limit;
39 int64_t system_mem_used;
40 spinlock_t mem_limit_lock;
41} kfd_mem_limit;
42
43/* Struct used for amdgpu_amdkfd_bo_validate */
44struct amdgpu_vm_parser {
45 uint32_t domain;
46 bool wait;
47};
48
49static const char * const domain_bit_to_string[] = {
50 "CPU",
51 "GTT",
52 "VRAM",
53 "GDS",
54 "GWS",
55 "OA"
56};
57
58#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
59
60
61
62static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
63{
64 return (struct amdgpu_device *)kgd;
65}
66
67static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
68 struct kgd_mem *mem)
69{
70 struct kfd_bo_va_list *entry;
71
72 list_for_each_entry(entry, &mem->bo_va_list, bo_list)
73 if (entry->bo_va->base.vm == avm)
74 return false;
75
76 return true;
77}
78
79/* Set memory usage limits. Current, limits are
80 * System (kernel) memory - 3/8th System RAM
81 */
82void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
83{
84 struct sysinfo si;
85 uint64_t mem;
86
87 si_meminfo(&si);
88 mem = si.totalram - si.totalhigh;
89 mem *= si.mem_unit;
90
91 spin_lock_init(&kfd_mem_limit.mem_limit_lock);
92 kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
93 pr_debug("Kernel memory limit %lluM\n",
94 (kfd_mem_limit.max_system_mem_limit >> 20));
95}
96
97static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
98 uint64_t size, u32 domain)
99{
100 size_t acc_size;
101 int ret = 0;
102
103 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
104 sizeof(struct amdgpu_bo));
105
106 spin_lock(&kfd_mem_limit.mem_limit_lock);
107 if (domain == AMDGPU_GEM_DOMAIN_GTT) {
108 if (kfd_mem_limit.system_mem_used + (acc_size + size) >
109 kfd_mem_limit.max_system_mem_limit) {
110 ret = -ENOMEM;
111 goto err_no_mem;
112 }
113 kfd_mem_limit.system_mem_used += (acc_size + size);
114 }
115err_no_mem:
116 spin_unlock(&kfd_mem_limit.mem_limit_lock);
117 return ret;
118}
119
120static void unreserve_system_mem_limit(struct amdgpu_device *adev,
121 uint64_t size, u32 domain)
122{
123 size_t acc_size;
124
125 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
126 sizeof(struct amdgpu_bo));
127
128 spin_lock(&kfd_mem_limit.mem_limit_lock);
129 if (domain == AMDGPU_GEM_DOMAIN_GTT)
130 kfd_mem_limit.system_mem_used -= (acc_size + size);
131 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
132 "kfd system memory accounting unbalanced");
133
134 spin_unlock(&kfd_mem_limit.mem_limit_lock);
135}
136
137void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
138{
139 spin_lock(&kfd_mem_limit.mem_limit_lock);
140
141 if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
142 kfd_mem_limit.system_mem_used -=
143 (bo->tbo.acc_size + amdgpu_bo_size(bo));
144 }
145 WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
146 "kfd system memory accounting unbalanced");
147
148 spin_unlock(&kfd_mem_limit.mem_limit_lock);
149}
150
151
152/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
153 * reservation object.
154 *
155 * @bo: [IN] Remove eviction fence(s) from this BO
156 * @ef: [IN] If ef is specified, then this eviction fence is removed if it
157 * is present in the shared list.
158 * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
159 * from BO's reservation object shared list.
160 * @ef_count: [OUT] Number of fences in ef_list.
161 *
162 * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
163 * called to restore the eviction fences and to avoid memory leak. This is
164 * useful for shared BOs.
165 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
166 */
167static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
168 struct amdgpu_amdkfd_fence *ef,
169 struct amdgpu_amdkfd_fence ***ef_list,
170 unsigned int *ef_count)
171{
172 struct reservation_object_list *fobj;
173 struct reservation_object *resv;
174 unsigned int i = 0, j = 0, k = 0, shared_count;
175 unsigned int count = 0;
176 struct amdgpu_amdkfd_fence **fence_list;
177
178 if (!ef && !ef_list)
179 return -EINVAL;
180
181 if (ef_list) {
182 *ef_list = NULL;
183 *ef_count = 0;
184 }
185
186 resv = bo->tbo.resv;
187 fobj = reservation_object_get_list(resv);
188
189 if (!fobj)
190 return 0;
191
192 preempt_disable();
193 write_seqcount_begin(&resv->seq);
194
195 /* Go through all the shared fences in the resevation object. If
196 * ef is specified and it exists in the list, remove it and reduce the
197 * count. If ef is not specified, then get the count of eviction fences
198 * present.
199 */
200 shared_count = fobj->shared_count;
201 for (i = 0; i < shared_count; ++i) {
202 struct dma_fence *f;
203
204 f = rcu_dereference_protected(fobj->shared[i],
205 reservation_object_held(resv));
206
207 if (ef) {
208 if (f->context == ef->base.context) {
209 dma_fence_put(f);
210 fobj->shared_count--;
211 } else {
212 RCU_INIT_POINTER(fobj->shared[j++], f);
213 }
214 } else if (to_amdgpu_amdkfd_fence(f))
215 count++;
216 }
217 write_seqcount_end(&resv->seq);
218 preempt_enable();
219
220 if (ef || !count)
221 return 0;
222
223 /* Alloc memory for count number of eviction fence pointers. Fill the
224 * ef_list array and ef_count
225 */
226 fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
227 GFP_KERNEL);
228 if (!fence_list)
229 return -ENOMEM;
230
231 preempt_disable();
232 write_seqcount_begin(&resv->seq);
233
234 j = 0;
235 for (i = 0; i < shared_count; ++i) {
236 struct dma_fence *f;
237 struct amdgpu_amdkfd_fence *efence;
238
239 f = rcu_dereference_protected(fobj->shared[i],
240 reservation_object_held(resv));
241
242 efence = to_amdgpu_amdkfd_fence(f);
243 if (efence) {
244 fence_list[k++] = efence;
245 fobj->shared_count--;
246 } else {
247 RCU_INIT_POINTER(fobj->shared[j++], f);
248 }
249 }
250
251 write_seqcount_end(&resv->seq);
252 preempt_enable();
253
254 *ef_list = fence_list;
255 *ef_count = k;
256
257 return 0;
258}
259
260/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
261 * reservation object.
262 *
263 * @bo: [IN] Add eviction fences to this BO
264 * @ef_list: [IN] List of eviction fences to be added
265 * @ef_count: [IN] Number of fences in ef_list.
266 *
267 * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
268 * function.
269 */
270static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
271 struct amdgpu_amdkfd_fence **ef_list,
272 unsigned int ef_count)
273{
274 int i;
275
276 if (!ef_list || !ef_count)
277 return;
278
279 for (i = 0; i < ef_count; i++) {
280 amdgpu_bo_fence(bo, &ef_list[i]->base, true);
281 /* Re-adding the fence takes an additional reference. Drop that
282 * reference.
283 */
284 dma_fence_put(&ef_list[i]->base);
285 }
286
287 kfree(ef_list);
288}
289
290static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
291 bool wait)
292{
293 struct ttm_operation_ctx ctx = { false, false };
294 int ret;
295
296 if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
297 "Called with userptr BO"))
298 return -EINVAL;
299
300 amdgpu_ttm_placement_from_domain(bo, domain);
301
302 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
303 if (ret)
304 goto validate_fail;
305 if (wait) {
306 struct amdgpu_amdkfd_fence **ef_list;
307 unsigned int ef_count;
308
309 ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
310 &ef_count);
311 if (ret)
312 goto validate_fail;
313
314 ttm_bo_wait(&bo->tbo, false, false);
315 amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
316 }
317
318validate_fail:
319 return ret;
320}
321
322static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
323{
324 struct amdgpu_vm_parser *p = param;
325
326 return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
327}
328
329/* vm_validate_pt_pd_bos - Validate page table and directory BOs
330 *
331 * Page directories are not updated here because huge page handling
332 * during page table updates can invalidate page directory entries
333 * again. Page directories are only updated after updating page
334 * tables.
335 */
336static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
337{
338 struct amdgpu_bo *pd = vm->root.base.bo;
339 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
340 struct amdgpu_vm_parser param;
341 uint64_t addr, flags = AMDGPU_PTE_VALID;
342 int ret;
343
344 param.domain = AMDGPU_GEM_DOMAIN_VRAM;
345 param.wait = false;
346
347 ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
348 &param);
349 if (ret) {
350 pr_err("amdgpu: failed to validate PT BOs\n");
351 return ret;
352 }
353
354 ret = amdgpu_amdkfd_validate(&param, pd);
355 if (ret) {
356 pr_err("amdgpu: failed to validate PD\n");
357 return ret;
358 }
359
360 addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
361 amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
362 vm->pd_phys_addr = addr;
363
364 if (vm->use_cpu_for_update) {
365 ret = amdgpu_bo_kmap(pd, NULL);
366 if (ret) {
367 pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
368 return ret;
369 }
370 }
371
372 return 0;
373}
374
375static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
376 struct dma_fence *f)
377{
378 int ret = amdgpu_sync_fence(adev, sync, f, false);
379
380 /* Sync objects can't handle multiple GPUs (contexts) updating
381 * sync->last_vm_update. Fortunately we don't need it for
382 * KFD's purposes, so we can just drop that fence.
383 */
384 if (sync->last_vm_update) {
385 dma_fence_put(sync->last_vm_update);
386 sync->last_vm_update = NULL;
387 }
388
389 return ret;
390}
391
392static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
393{
394 struct amdgpu_bo *pd = vm->root.base.bo;
395 struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
396 int ret;
397
398 ret = amdgpu_vm_update_directories(adev, vm);
399 if (ret)
400 return ret;
401
402 return sync_vm_fence(adev, sync, vm->last_update);
403}
404
405/* add_bo_to_vm - Add a BO to a VM
406 *
407 * Everything that needs to bo done only once when a BO is first added
408 * to a VM. It can later be mapped and unmapped many times without
409 * repeating these steps.
410 *
411 * 1. Allocate and initialize BO VA entry data structure
412 * 2. Add BO to the VM
413 * 3. Determine ASIC-specific PTE flags
414 * 4. Alloc page tables and directories if needed
415 * 4a. Validate new page tables and directories
416 */
417static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
418 struct amdgpu_vm *vm, bool is_aql,
419 struct kfd_bo_va_list **p_bo_va_entry)
420{
421 int ret;
422 struct kfd_bo_va_list *bo_va_entry;
423 struct amdgpu_bo *pd = vm->root.base.bo;
424 struct amdgpu_bo *bo = mem->bo;
425 uint64_t va = mem->va;
426 struct list_head *list_bo_va = &mem->bo_va_list;
427 unsigned long bo_size = bo->tbo.mem.size;
428
429 if (!va) {
430 pr_err("Invalid VA when adding BO to VM\n");
431 return -EINVAL;
432 }
433
434 if (is_aql)
435 va += bo_size;
436
437 bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
438 if (!bo_va_entry)
439 return -ENOMEM;
440
441 pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
442 va + bo_size, vm);
443
444 /* Add BO to VM internal data structures*/
445 bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
446 if (!bo_va_entry->bo_va) {
447 ret = -EINVAL;
448 pr_err("Failed to add BO object to VM. ret == %d\n",
449 ret);
450 goto err_vmadd;
451 }
452
453 bo_va_entry->va = va;
454 bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
455 mem->mapping_flags);
456 bo_va_entry->kgd_dev = (void *)adev;
457 list_add(&bo_va_entry->bo_list, list_bo_va);
458
459 if (p_bo_va_entry)
460 *p_bo_va_entry = bo_va_entry;
461
462 /* Allocate new page tables if needed and validate
463 * them. Clearing of new page tables and validate need to wait
464 * on move fences. We don't want that to trigger the eviction
465 * fence, so remove it temporarily.
466 */
467 amdgpu_amdkfd_remove_eviction_fence(pd,
468 vm->process_info->eviction_fence,
469 NULL, NULL);
470
471 ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
472 if (ret) {
473 pr_err("Failed to allocate pts, err=%d\n", ret);
474 goto err_alloc_pts;
475 }
476
477 ret = vm_validate_pt_pd_bos(vm);
478 if (ret) {
479 pr_err("validate_pt_pd_bos() failed\n");
480 goto err_alloc_pts;
481 }
482
483 /* Add the eviction fence back */
484 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
485
486 return 0;
487
488err_alloc_pts:
489 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
490 amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
491 list_del(&bo_va_entry->bo_list);
492err_vmadd:
493 kfree(bo_va_entry);
494 return ret;
495}
496
497static void remove_bo_from_vm(struct amdgpu_device *adev,
498 struct kfd_bo_va_list *entry, unsigned long size)
499{
500 pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
501 entry->va,
502 entry->va + size, entry);
503 amdgpu_vm_bo_rmv(adev, entry->bo_va);
504 list_del(&entry->bo_list);
505 kfree(entry);
506}
507
508static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
509 struct amdkfd_process_info *process_info)
510{
511 struct ttm_validate_buffer *entry = &mem->validate_list;
512 struct amdgpu_bo *bo = mem->bo;
513
514 INIT_LIST_HEAD(&entry->head);
515 entry->shared = true;
516 entry->bo = &bo->tbo;
517 mutex_lock(&process_info->lock);
518 list_add_tail(&entry->head, &process_info->kfd_bo_list);
519 mutex_unlock(&process_info->lock);
520}
521
522/* Reserving a BO and its page table BOs must happen atomically to
523 * avoid deadlocks. Some operations update multiple VMs at once. Track
524 * all the reservation info in a context structure. Optionally a sync
525 * object can track VM updates.
526 */
527struct bo_vm_reservation_context {
528 struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
529 unsigned int n_vms; /* Number of VMs reserved */
530 struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */
531 struct ww_acquire_ctx ticket; /* Reservation ticket */
532 struct list_head list, duplicates; /* BO lists */
533 struct amdgpu_sync *sync; /* Pointer to sync object */
534 bool reserved; /* Whether BOs are reserved */
535};
536
537enum bo_vm_match {
538 BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */
539 BO_VM_MAPPED, /* Match VMs where a BO is mapped */
540 BO_VM_ALL, /* Match all VMs a BO was added to */
541};
542
543/**
544 * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
545 * @mem: KFD BO structure.
546 * @vm: the VM to reserve.
547 * @ctx: the struct that will be used in unreserve_bo_and_vms().
548 */
549static int reserve_bo_and_vm(struct kgd_mem *mem,
550 struct amdgpu_vm *vm,
551 struct bo_vm_reservation_context *ctx)
552{
553 struct amdgpu_bo *bo = mem->bo;
554 int ret;
555
556 WARN_ON(!vm);
557
558 ctx->reserved = false;
559 ctx->n_vms = 1;
560 ctx->sync = &mem->sync;
561
562 INIT_LIST_HEAD(&ctx->list);
563 INIT_LIST_HEAD(&ctx->duplicates);
564
565 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
566 if (!ctx->vm_pd)
567 return -ENOMEM;
568
569 ctx->kfd_bo.robj = bo;
570 ctx->kfd_bo.priority = 0;
571 ctx->kfd_bo.tv.bo = &bo->tbo;
572 ctx->kfd_bo.tv.shared = true;
573 ctx->kfd_bo.user_pages = NULL;
574 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
575
576 amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
577
578 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
579 false, &ctx->duplicates);
580 if (!ret)
581 ctx->reserved = true;
582 else {
583 pr_err("Failed to reserve buffers in ttm\n");
584 kfree(ctx->vm_pd);
585 ctx->vm_pd = NULL;
586 }
587
588 return ret;
589}
590
591/**
592 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
593 * @mem: KFD BO structure.
594 * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
595 * is used. Otherwise, a single VM associated with the BO.
596 * @map_type: the mapping status that will be used to filter the VMs.
597 * @ctx: the struct that will be used in unreserve_bo_and_vms().
598 *
599 * Returns 0 for success, negative for failure.
600 */
601static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
602 struct amdgpu_vm *vm, enum bo_vm_match map_type,
603 struct bo_vm_reservation_context *ctx)
604{
605 struct amdgpu_bo *bo = mem->bo;
606 struct kfd_bo_va_list *entry;
607 unsigned int i;
608 int ret;
609
610 ctx->reserved = false;
611 ctx->n_vms = 0;
612 ctx->vm_pd = NULL;
613 ctx->sync = &mem->sync;
614
615 INIT_LIST_HEAD(&ctx->list);
616 INIT_LIST_HEAD(&ctx->duplicates);
617
618 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
619 if ((vm && vm != entry->bo_va->base.vm) ||
620 (entry->is_mapped != map_type
621 && map_type != BO_VM_ALL))
622 continue;
623
624 ctx->n_vms++;
625 }
626
627 if (ctx->n_vms != 0) {
628 ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
629 GFP_KERNEL);
630 if (!ctx->vm_pd)
631 return -ENOMEM;
632 }
633
634 ctx->kfd_bo.robj = bo;
635 ctx->kfd_bo.priority = 0;
636 ctx->kfd_bo.tv.bo = &bo->tbo;
637 ctx->kfd_bo.tv.shared = true;
638 ctx->kfd_bo.user_pages = NULL;
639 list_add(&ctx->kfd_bo.tv.head, &ctx->list);
640
641 i = 0;
642 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
643 if ((vm && vm != entry->bo_va->base.vm) ||
644 (entry->is_mapped != map_type
645 && map_type != BO_VM_ALL))
646 continue;
647
648 amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
649 &ctx->vm_pd[i]);
650 i++;
651 }
652
653 ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
654 false, &ctx->duplicates);
655 if (!ret)
656 ctx->reserved = true;
657 else
658 pr_err("Failed to reserve buffers in ttm.\n");
659
660 if (ret) {
661 kfree(ctx->vm_pd);
662 ctx->vm_pd = NULL;
663 }
664
665 return ret;
666}
667
668/**
669 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
670 * @ctx: Reservation context to unreserve
671 * @wait: Optionally wait for a sync object representing pending VM updates
672 * @intr: Whether the wait is interruptible
673 *
674 * Also frees any resources allocated in
675 * reserve_bo_and_(cond_)vm(s). Returns the status from
676 * amdgpu_sync_wait.
677 */
678static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
679 bool wait, bool intr)
680{
681 int ret = 0;
682
683 if (wait)
684 ret = amdgpu_sync_wait(ctx->sync, intr);
685
686 if (ctx->reserved)
687 ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
688 kfree(ctx->vm_pd);
689
690 ctx->sync = NULL;
691
692 ctx->reserved = false;
693 ctx->vm_pd = NULL;
694
695 return ret;
696}
697
698static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
699 struct kfd_bo_va_list *entry,
700 struct amdgpu_sync *sync)
701{
702 struct amdgpu_bo_va *bo_va = entry->bo_va;
703 struct amdgpu_vm *vm = bo_va->base.vm;
704 struct amdgpu_bo *pd = vm->root.base.bo;
705
706 /* Remove eviction fence from PD (and thereby from PTs too as
707 * they share the resv. object). Otherwise during PT update
708 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
709 * get added to job->sync object and job execution would
710 * trigger the eviction fence.
711 */
712 amdgpu_amdkfd_remove_eviction_fence(pd,
713 vm->process_info->eviction_fence,
714 NULL, NULL);
715 amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
716
717 amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
718
719 /* Add the eviction fence back */
720 amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
721
722 sync_vm_fence(adev, sync, bo_va->last_pt_update);
723
724 return 0;
725}
726
727static int update_gpuvm_pte(struct amdgpu_device *adev,
728 struct kfd_bo_va_list *entry,
729 struct amdgpu_sync *sync)
730{
731 int ret;
732 struct amdgpu_vm *vm;
733 struct amdgpu_bo_va *bo_va;
734 struct amdgpu_bo *bo;
735
736 bo_va = entry->bo_va;
737 vm = bo_va->base.vm;
738 bo = bo_va->base.bo;
739
740 /* Update the page tables */
741 ret = amdgpu_vm_bo_update(adev, bo_va, false);
742 if (ret) {
743 pr_err("amdgpu_vm_bo_update failed\n");
744 return ret;
745 }
746
747 return sync_vm_fence(adev, sync, bo_va->last_pt_update);
748}
749
750static int map_bo_to_gpuvm(struct amdgpu_device *adev,
751 struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
752{
753 int ret;
754
755 /* Set virtual address for the allocation */
756 ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
757 amdgpu_bo_size(entry->bo_va->base.bo),
758 entry->pte_flags);
759 if (ret) {
760 pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
761 entry->va, ret);
762 return ret;
763 }
764
765 ret = update_gpuvm_pte(adev, entry, sync);
766 if (ret) {
767 pr_err("update_gpuvm_pte() failed\n");
768 goto update_gpuvm_pte_failed;
769 }
770
771 return 0;
772
773update_gpuvm_pte_failed:
774 unmap_bo_from_gpuvm(adev, entry, sync);
775 return ret;
776}
777
778static int process_validate_vms(struct amdkfd_process_info *process_info)
779{
780 struct amdgpu_vm *peer_vm;
781 int ret;
782
783 list_for_each_entry(peer_vm, &process_info->vm_list_head,
784 vm_list_node) {
785 ret = vm_validate_pt_pd_bos(peer_vm);
786 if (ret)
787 return ret;
788 }
789
790 return 0;
791}
792
793static int process_update_pds(struct amdkfd_process_info *process_info,
794 struct amdgpu_sync *sync)
795{
796 struct amdgpu_vm *peer_vm;
797 int ret;
798
799 list_for_each_entry(peer_vm, &process_info->vm_list_head,
800 vm_list_node) {
801 ret = vm_update_pds(peer_vm, sync);
802 if (ret)
803 return ret;
804 }
805
806 return 0;
807}
808
809static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
810 struct dma_fence **ef)
811{
812 struct amdkfd_process_info *info = NULL;
813 int ret;
814
815 if (!*process_info) {
816 info = kzalloc(sizeof(*info), GFP_KERNEL);
817 if (!info)
818 return -ENOMEM;
819
820 mutex_init(&info->lock);
821 INIT_LIST_HEAD(&info->vm_list_head);
822 INIT_LIST_HEAD(&info->kfd_bo_list);
823
824 info->eviction_fence =
825 amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
826 current->mm);
827 if (!info->eviction_fence) {
828 pr_err("Failed to create eviction fence\n");
829 ret = -ENOMEM;
830 goto create_evict_fence_fail;
831 }
832
833 *process_info = info;
834 *ef = dma_fence_get(&info->eviction_fence->base);
835 }
836
837 vm->process_info = *process_info;
838
839 /* Validate page directory and attach eviction fence */
840 ret = amdgpu_bo_reserve(vm->root.base.bo, true);
841 if (ret)
842 goto reserve_pd_fail;
843 ret = vm_validate_pt_pd_bos(vm);
844 if (ret) {
845 pr_err("validate_pt_pd_bos() failed\n");
846 goto validate_pd_fail;
847 }
848 ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
849 if (ret)
850 goto wait_pd_fail;
851 amdgpu_bo_fence(vm->root.base.bo,
852 &vm->process_info->eviction_fence->base, true);
853 amdgpu_bo_unreserve(vm->root.base.bo);
854
855 /* Update process info */
856 mutex_lock(&vm->process_info->lock);
857 list_add_tail(&vm->vm_list_node,
858 &(vm->process_info->vm_list_head));
859 vm->process_info->n_vms++;
860 mutex_unlock(&vm->process_info->lock);
861
862 return 0;
863
864wait_pd_fail:
865validate_pd_fail:
866 amdgpu_bo_unreserve(vm->root.base.bo);
867reserve_pd_fail:
868 vm->process_info = NULL;
869 if (info) {
870 /* Two fence references: one in info and one in *ef */
871 dma_fence_put(&info->eviction_fence->base);
872 dma_fence_put(*ef);
873 *ef = NULL;
874 *process_info = NULL;
875create_evict_fence_fail:
876 mutex_destroy(&info->lock);
877 kfree(info);
878 }
879 return ret;
880}
881
882int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
883 void **process_info,
884 struct dma_fence **ef)
885{
886 struct amdgpu_device *adev = get_amdgpu_device(kgd);
887 struct amdgpu_vm *new_vm;
888 int ret;
889
890 new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
891 if (!new_vm)
892 return -ENOMEM;
893
894 /* Initialize AMDGPU part of the VM */
895 ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
896 if (ret) {
897 pr_err("Failed init vm ret %d\n", ret);
898 goto amdgpu_vm_init_fail;
899 }
900
901 /* Initialize KFD part of the VM and process info */
902 ret = init_kfd_vm(new_vm, process_info, ef);
903 if (ret)
904 goto init_kfd_vm_fail;
905
906 *vm = (void *) new_vm;
907
908 return 0;
909
910init_kfd_vm_fail:
911 amdgpu_vm_fini(adev, new_vm);
912amdgpu_vm_init_fail:
913 kfree(new_vm);
914 return ret;
915}
916
917int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
918 struct file *filp,
919 void **vm, void **process_info,
920 struct dma_fence **ef)
921{
922 struct amdgpu_device *adev = get_amdgpu_device(kgd);
923 struct drm_file *drm_priv = filp->private_data;
924 struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
925 struct amdgpu_vm *avm = &drv_priv->vm;
926 int ret;
927
928 /* Already a compute VM? */
929 if (avm->process_info)
930 return -EINVAL;
931
932 /* Convert VM into a compute VM */
933 ret = amdgpu_vm_make_compute(adev, avm);
934 if (ret)
935 return ret;
936
937 /* Initialize KFD part of the VM and process info */
938 ret = init_kfd_vm(avm, process_info, ef);
939 if (ret)
940 return ret;
941
942 *vm = (void *)avm;
943
944 return 0;
945}
946
947void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
948 struct amdgpu_vm *vm)
949{
950 struct amdkfd_process_info *process_info = vm->process_info;
951 struct amdgpu_bo *pd = vm->root.base.bo;
952
953 if (!process_info)
954 return;
955
956 /* Release eviction fence from PD */
957 amdgpu_bo_reserve(pd, false);
958 amdgpu_bo_fence(pd, NULL, false);
959 amdgpu_bo_unreserve(pd);
960
961 /* Update process info */
962 mutex_lock(&process_info->lock);
963 process_info->n_vms--;
964 list_del(&vm->vm_list_node);
965 mutex_unlock(&process_info->lock);
966
967 /* Release per-process resources when last compute VM is destroyed */
968 if (!process_info->n_vms) {
969 WARN_ON(!list_empty(&process_info->kfd_bo_list));
970
971 dma_fence_put(&process_info->eviction_fence->base);
972 mutex_destroy(&process_info->lock);
973 kfree(process_info);
974 }
975}
976
977void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
978{
979 struct amdgpu_device *adev = get_amdgpu_device(kgd);
980 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
981
982 if (WARN_ON(!kgd || !vm))
983 return;
984
985 pr_debug("Destroying process vm %p\n", vm);
986
987 /* Release the VM context */
988 amdgpu_vm_fini(adev, avm);
989 kfree(vm);
990}
991
992uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
993{
994 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
995
996 return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
997}
998
999int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1000 struct kgd_dev *kgd, uint64_t va, uint64_t size,
1001 void *vm, struct kgd_mem **mem,
1002 uint64_t *offset, uint32_t flags)
1003{
1004 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1005 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1006 struct amdgpu_bo *bo;
1007 int byte_align;
1008 u32 alloc_domain;
1009 u64 alloc_flags;
1010 uint32_t mapping_flags;
1011 int ret;
1012
1013 /*
1014 * Check on which domain to allocate BO
1015 */
1016 if (flags & ALLOC_MEM_FLAGS_VRAM) {
1017 alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
1018 alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
1019 alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
1020 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
1021 AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
1022 } else if (flags & ALLOC_MEM_FLAGS_GTT) {
1023 alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
1024 alloc_flags = 0;
1025 } else {
1026 return -EINVAL;
1027 }
1028
1029 *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
1030 if (!*mem)
1031 return -ENOMEM;
1032 INIT_LIST_HEAD(&(*mem)->bo_va_list);
1033 mutex_init(&(*mem)->lock);
1034 (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
1035
1036 /* Workaround for AQL queue wraparound bug. Map the same
1037 * memory twice. That means we only actually allocate half
1038 * the memory.
1039 */
1040 if ((*mem)->aql_queue)
1041 size = size >> 1;
1042
1043 /* Workaround for TLB bug on older VI chips */
1044 byte_align = (adev->family == AMDGPU_FAMILY_VI &&
1045 adev->asic_type != CHIP_FIJI &&
1046 adev->asic_type != CHIP_POLARIS10 &&
1047 adev->asic_type != CHIP_POLARIS11) ?
1048 VI_BO_SIZE_ALIGN : 1;
1049
1050 mapping_flags = AMDGPU_VM_PAGE_READABLE;
1051 if (flags & ALLOC_MEM_FLAGS_WRITABLE)
1052 mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
1053 if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
1054 mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
1055 if (flags & ALLOC_MEM_FLAGS_COHERENT)
1056 mapping_flags |= AMDGPU_VM_MTYPE_UC;
1057 else
1058 mapping_flags |= AMDGPU_VM_MTYPE_NC;
1059 (*mem)->mapping_flags = mapping_flags;
1060
1061 amdgpu_sync_create(&(*mem)->sync);
1062
1063 ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
1064 if (ret) {
1065 pr_debug("Insufficient system memory\n");
1066 goto err_reserve_system_mem;
1067 }
1068
1069 pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
1070 va, size, domain_string(alloc_domain));
1071
1072 ret = amdgpu_bo_create(adev, size, byte_align,
1073 alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
1074 if (ret) {
1075 pr_debug("Failed to create BO on domain %s. ret %d\n",
1076 domain_string(alloc_domain), ret);
1077 goto err_bo_create;
1078 }
1079 bo->kfd_bo = *mem;
1080 (*mem)->bo = bo;
1081
1082 (*mem)->va = va;
1083 (*mem)->domain = alloc_domain;
1084 (*mem)->mapped_to_gpu_memory = 0;
1085 (*mem)->process_info = avm->process_info;
1086 add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
1087
1088 if (offset)
1089 *offset = amdgpu_bo_mmap_offset(bo);
1090
1091 return 0;
1092
1093err_bo_create:
1094 unreserve_system_mem_limit(adev, size, alloc_domain);
1095err_reserve_system_mem:
1096 mutex_destroy(&(*mem)->lock);
1097 kfree(*mem);
1098 return ret;
1099}
1100
1101int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
1102 struct kgd_dev *kgd, struct kgd_mem *mem)
1103{
1104 struct amdkfd_process_info *process_info = mem->process_info;
1105 unsigned long bo_size = mem->bo->tbo.mem.size;
1106 struct kfd_bo_va_list *entry, *tmp;
1107 struct bo_vm_reservation_context ctx;
1108 struct ttm_validate_buffer *bo_list_entry;
1109 int ret;
1110
1111 mutex_lock(&mem->lock);
1112
1113 if (mem->mapped_to_gpu_memory > 0) {
1114 pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
1115 mem->va, bo_size);
1116 mutex_unlock(&mem->lock);
1117 return -EBUSY;
1118 }
1119
1120 mutex_unlock(&mem->lock);
1121 /* lock is not needed after this, since mem is unused and will
1122 * be freed anyway
1123 */
1124
1125 /* Make sure restore workers don't access the BO any more */
1126 bo_list_entry = &mem->validate_list;
1127 mutex_lock(&process_info->lock);
1128 list_del(&bo_list_entry->head);
1129 mutex_unlock(&process_info->lock);
1130
1131 ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
1132 if (unlikely(ret))
1133 return ret;
1134
1135 /* The eviction fence should be removed by the last unmap.
1136 * TODO: Log an error condition if the bo still has the eviction fence
1137 * attached
1138 */
1139 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1140 process_info->eviction_fence,
1141 NULL, NULL);
1142 pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
1143 mem->va + bo_size * (1 + mem->aql_queue));
1144
1145 /* Remove from VM internal data structures */
1146 list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
1147 remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
1148 entry, bo_size);
1149
1150 ret = unreserve_bo_and_vms(&ctx, false, false);
1151
1152 /* Free the sync object */
1153 amdgpu_sync_free(&mem->sync);
1154
1155 /* Free the BO*/
1156 amdgpu_bo_unref(&mem->bo);
1157 mutex_destroy(&mem->lock);
1158 kfree(mem);
1159
1160 return ret;
1161}
1162
1163int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1164 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1165{
1166 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1167 struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
1168 int ret;
1169 struct amdgpu_bo *bo;
1170 uint32_t domain;
1171 struct kfd_bo_va_list *entry;
1172 struct bo_vm_reservation_context ctx;
1173 struct kfd_bo_va_list *bo_va_entry = NULL;
1174 struct kfd_bo_va_list *bo_va_entry_aql = NULL;
1175 unsigned long bo_size;
1176
1177 /* Make sure restore is not running concurrently.
1178 */
1179 mutex_lock(&mem->process_info->lock);
1180
1181 mutex_lock(&mem->lock);
1182
1183 bo = mem->bo;
1184
1185 if (!bo) {
1186 pr_err("Invalid BO when mapping memory to GPU\n");
1187 ret = -EINVAL;
1188 goto out;
1189 }
1190
1191 domain = mem->domain;
1192 bo_size = bo->tbo.mem.size;
1193
1194 pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
1195 mem->va,
1196 mem->va + bo_size * (1 + mem->aql_queue),
1197 vm, domain_string(domain));
1198
1199 ret = reserve_bo_and_vm(mem, vm, &ctx);
1200 if (unlikely(ret))
1201 goto out;
1202
1203 if (check_if_add_bo_to_vm(avm, mem)) {
1204 ret = add_bo_to_vm(adev, mem, avm, false,
1205 &bo_va_entry);
1206 if (ret)
1207 goto add_bo_to_vm_failed;
1208 if (mem->aql_queue) {
1209 ret = add_bo_to_vm(adev, mem, avm,
1210 true, &bo_va_entry_aql);
1211 if (ret)
1212 goto add_bo_to_vm_failed_aql;
1213 }
1214 } else {
1215 ret = vm_validate_pt_pd_bos(avm);
1216 if (unlikely(ret))
1217 goto add_bo_to_vm_failed;
1218 }
1219
1220 if (mem->mapped_to_gpu_memory == 0) {
1221 /* Validate BO only once. The eviction fence gets added to BO
1222 * the first time it is mapped. Validate will wait for all
1223 * background evictions to complete.
1224 */
1225 ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
1226 if (ret) {
1227 pr_debug("Validate failed\n");
1228 goto map_bo_to_gpuvm_failed;
1229 }
1230 }
1231
1232 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1233 if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
1234 pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
1235 entry->va, entry->va + bo_size,
1236 entry);
1237
1238 ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
1239 if (ret) {
1240 pr_err("Failed to map radeon bo to gpuvm\n");
1241 goto map_bo_to_gpuvm_failed;
1242 }
1243
1244 ret = vm_update_pds(vm, ctx.sync);
1245 if (ret) {
1246 pr_err("Failed to update page directories\n");
1247 goto map_bo_to_gpuvm_failed;
1248 }
1249
1250 entry->is_mapped = true;
1251 mem->mapped_to_gpu_memory++;
1252 pr_debug("\t INC mapping count %d\n",
1253 mem->mapped_to_gpu_memory);
1254 }
1255 }
1256
1257 if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
1258 amdgpu_bo_fence(bo,
1259 &avm->process_info->eviction_fence->base,
1260 true);
1261 ret = unreserve_bo_and_vms(&ctx, false, false);
1262
1263 goto out;
1264
1265map_bo_to_gpuvm_failed:
1266 if (bo_va_entry_aql)
1267 remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
1268add_bo_to_vm_failed_aql:
1269 if (bo_va_entry)
1270 remove_bo_from_vm(adev, bo_va_entry, bo_size);
1271add_bo_to_vm_failed:
1272 unreserve_bo_and_vms(&ctx, false, false);
1273out:
1274 mutex_unlock(&mem->process_info->lock);
1275 mutex_unlock(&mem->lock);
1276 return ret;
1277}
1278
1279int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1280 struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
1281{
1282 struct amdgpu_device *adev = get_amdgpu_device(kgd);
1283 struct amdkfd_process_info *process_info =
1284 ((struct amdgpu_vm *)vm)->process_info;
1285 unsigned long bo_size = mem->bo->tbo.mem.size;
1286 struct kfd_bo_va_list *entry;
1287 struct bo_vm_reservation_context ctx;
1288 int ret;
1289
1290 mutex_lock(&mem->lock);
1291
1292 ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
1293 if (unlikely(ret))
1294 goto out;
1295 /* If no VMs were reserved, it means the BO wasn't actually mapped */
1296 if (ctx.n_vms == 0) {
1297 ret = -EINVAL;
1298 goto unreserve_out;
1299 }
1300
1301 ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
1302 if (unlikely(ret))
1303 goto unreserve_out;
1304
1305 pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
1306 mem->va,
1307 mem->va + bo_size * (1 + mem->aql_queue),
1308 vm);
1309
1310 list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
1311 if (entry->bo_va->base.vm == vm && entry->is_mapped) {
1312 pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
1313 entry->va,
1314 entry->va + bo_size,
1315 entry);
1316
1317 ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
1318 if (ret == 0) {
1319 entry->is_mapped = false;
1320 } else {
1321 pr_err("failed to unmap VA 0x%llx\n",
1322 mem->va);
1323 goto unreserve_out;
1324 }
1325
1326 mem->mapped_to_gpu_memory--;
1327 pr_debug("\t DEC mapping count %d\n",
1328 mem->mapped_to_gpu_memory);
1329 }
1330 }
1331
1332 /* If BO is unmapped from all VMs, unfence it. It can be evicted if
1333 * required.
1334 */
1335 if (mem->mapped_to_gpu_memory == 0 &&
1336 !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
1337 amdgpu_amdkfd_remove_eviction_fence(mem->bo,
1338 process_info->eviction_fence,
1339 NULL, NULL);
1340
1341unreserve_out:
1342 unreserve_bo_and_vms(&ctx, false, false);
1343out:
1344 mutex_unlock(&mem->lock);
1345 return ret;
1346}
1347
1348int amdgpu_amdkfd_gpuvm_sync_memory(
1349 struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
1350{
1351 struct amdgpu_sync sync;
1352 int ret;
1353
1354 amdgpu_sync_create(&sync);
1355
1356 mutex_lock(&mem->lock);
1357 amdgpu_sync_clone(&mem->sync, &sync);
1358 mutex_unlock(&mem->lock);
1359
1360 ret = amdgpu_sync_wait(&sync, intr);
1361 amdgpu_sync_free(&sync);
1362 return ret;
1363}
1364
1365int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
1366 struct kgd_mem *mem, void **kptr, uint64_t *size)
1367{
1368 int ret;
1369 struct amdgpu_bo *bo = mem->bo;
1370
1371 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
1372 pr_err("userptr can't be mapped to kernel\n");
1373 return -EINVAL;
1374 }
1375
1376 /* delete kgd_mem from kfd_bo_list to avoid re-validating
1377 * this BO in BO's restoring after eviction.
1378 */
1379 mutex_lock(&mem->process_info->lock);
1380
1381 ret = amdgpu_bo_reserve(bo, true);
1382 if (ret) {
1383 pr_err("Failed to reserve bo. ret %d\n", ret);
1384 goto bo_reserve_failed;
1385 }
1386
1387 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
1388 if (ret) {
1389 pr_err("Failed to pin bo. ret %d\n", ret);
1390 goto pin_failed;
1391 }
1392
1393 ret = amdgpu_bo_kmap(bo, kptr);
1394 if (ret) {
1395 pr_err("Failed to map bo to kernel. ret %d\n", ret);
1396 goto kmap_failed;
1397 }
1398
1399 amdgpu_amdkfd_remove_eviction_fence(
1400 bo, mem->process_info->eviction_fence, NULL, NULL);
1401 list_del_init(&mem->validate_list.head);
1402
1403 if (size)
1404 *size = amdgpu_bo_size(bo);
1405
1406 amdgpu_bo_unreserve(bo);
1407
1408 mutex_unlock(&mem->process_info->lock);
1409 return 0;
1410
1411kmap_failed:
1412 amdgpu_bo_unpin(bo);
1413pin_failed:
1414 amdgpu_bo_unreserve(bo);
1415bo_reserve_failed:
1416 mutex_unlock(&mem->process_info->lock);
1417
1418 return ret;
1419}
1420
1421/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
1422 * KFD process identified by process_info
1423 *
1424 * @process_info: amdkfd_process_info of the KFD process
1425 *
1426 * After memory eviction, restore thread calls this function. The function
1427 * should be called when the Process is still valid. BO restore involves -
1428 *
1429 * 1. Release old eviction fence and create new one
1430 * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
1431 * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of
1432 * BOs that need to be reserved.
1433 * 4. Reserve all the BOs
1434 * 5. Validate of PD and PT BOs.
1435 * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence
1436 * 7. Add fence to all PD and PT BOs.
1437 * 8. Unreserve all BOs
1438 */
1439int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
1440{
1441 struct amdgpu_bo_list_entry *pd_bo_list;
1442 struct amdkfd_process_info *process_info = info;
1443 struct amdgpu_vm *peer_vm;
1444 struct kgd_mem *mem;
1445 struct bo_vm_reservation_context ctx;
1446 struct amdgpu_amdkfd_fence *new_fence;
1447 int ret = 0, i;
1448 struct list_head duplicate_save;
1449 struct amdgpu_sync sync_obj;
1450
1451 INIT_LIST_HEAD(&duplicate_save);
1452 INIT_LIST_HEAD(&ctx.list);
1453 INIT_LIST_HEAD(&ctx.duplicates);
1454
1455 pd_bo_list = kcalloc(process_info->n_vms,
1456 sizeof(struct amdgpu_bo_list_entry),
1457 GFP_KERNEL);
1458 if (!pd_bo_list)
1459 return -ENOMEM;
1460
1461 i = 0;
1462 mutex_lock(&process_info->lock);
1463 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1464 vm_list_node)
1465 amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
1466
1467 /* Reserve all BOs and page tables/directory. Add all BOs from
1468 * kfd_bo_list to ctx.list
1469 */
1470 list_for_each_entry(mem, &process_info->kfd_bo_list,
1471 validate_list.head) {
1472
1473 list_add_tail(&mem->resv_list.head, &ctx.list);
1474 mem->resv_list.bo = mem->validate_list.bo;
1475 mem->resv_list.shared = mem->validate_list.shared;
1476 }
1477
1478 ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
1479 false, &duplicate_save);
1480 if (ret) {
1481 pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
1482 goto ttm_reserve_fail;
1483 }
1484
1485 amdgpu_sync_create(&sync_obj);
1486
1487 /* Validate PDs and PTs */
1488 ret = process_validate_vms(process_info);
1489 if (ret)
1490 goto validate_map_fail;
1491
1492 /* Wait for PD/PTs validate to finish */
1493 /* FIXME: I think this isn't needed */
1494 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1495 vm_list_node) {
1496 struct amdgpu_bo *bo = peer_vm->root.base.bo;
1497
1498 ttm_bo_wait(&bo->tbo, false, false);
1499 }
1500
1501 /* Validate BOs and map them to GPUVM (update VM page tables). */
1502 list_for_each_entry(mem, &process_info->kfd_bo_list,
1503 validate_list.head) {
1504
1505 struct amdgpu_bo *bo = mem->bo;
1506 uint32_t domain = mem->domain;
1507 struct kfd_bo_va_list *bo_va_entry;
1508
1509 ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
1510 if (ret) {
1511 pr_debug("Memory eviction: Validate BOs failed. Try again\n");
1512 goto validate_map_fail;
1513 }
1514
1515 list_for_each_entry(bo_va_entry, &mem->bo_va_list,
1516 bo_list) {
1517 ret = update_gpuvm_pte((struct amdgpu_device *)
1518 bo_va_entry->kgd_dev,
1519 bo_va_entry,
1520 &sync_obj);
1521 if (ret) {
1522 pr_debug("Memory eviction: update PTE failed. Try again\n");
1523 goto validate_map_fail;
1524 }
1525 }
1526 }
1527
1528 /* Update page directories */
1529 ret = process_update_pds(process_info, &sync_obj);
1530 if (ret) {
1531 pr_debug("Memory eviction: update PDs failed. Try again\n");
1532 goto validate_map_fail;
1533 }
1534
1535 amdgpu_sync_wait(&sync_obj, false);
1536
1537 /* Release old eviction fence and create new one, because fence only
1538 * goes from unsignaled to signaled, fence cannot be reused.
1539 * Use context and mm from the old fence.
1540 */
1541 new_fence = amdgpu_amdkfd_fence_create(
1542 process_info->eviction_fence->base.context,
1543 process_info->eviction_fence->mm);
1544 if (!new_fence) {
1545 pr_err("Failed to create eviction fence\n");
1546 ret = -ENOMEM;
1547 goto validate_map_fail;
1548 }
1549 dma_fence_put(&process_info->eviction_fence->base);
1550 process_info->eviction_fence = new_fence;
1551 *ef = dma_fence_get(&new_fence->base);
1552
1553 /* Wait for validate to finish and attach new eviction fence */
1554 list_for_each_entry(mem, &process_info->kfd_bo_list,
1555 validate_list.head)
1556 ttm_bo_wait(&mem->bo->tbo, false, false);
1557 list_for_each_entry(mem, &process_info->kfd_bo_list,
1558 validate_list.head)
1559 amdgpu_bo_fence(mem->bo,
1560 &process_info->eviction_fence->base, true);
1561
1562 /* Attach eviction fence to PD / PT BOs */
1563 list_for_each_entry(peer_vm, &process_info->vm_list_head,
1564 vm_list_node) {
1565 struct amdgpu_bo *bo = peer_vm->root.base.bo;
1566
1567 amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
1568 }
1569
1570validate_map_fail:
1571 ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
1572 amdgpu_sync_free(&sync_obj);
1573ttm_reserve_fail:
1574 mutex_unlock(&process_info->lock);
1575 kfree(pd_bo_list);
1576 return ret;
1577}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index ff8efd0f8fd5..a0f48cb9b8f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -114,6 +114,9 @@ union igp_info {
114 struct atom_integrated_system_info_v1_11 v11; 114 struct atom_integrated_system_info_v1_11 v11;
115}; 115};
116 116
117union umc_info {
118 struct atom_umc_info_v3_1 v31;
119};
117/* 120/*
118 * Return vram width from integrated system info table, if available, 121 * Return vram width from integrated system info table, if available,
119 * or 0 if not. 122 * or 0 if not.
@@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
143 return 0; 146 return 0;
144} 147}
145 148
149static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
150 int atom_mem_type)
151{
152 int vram_type;
153
154 if (adev->flags & AMD_IS_APU) {
155 switch (atom_mem_type) {
156 case Ddr2MemType:
157 case LpDdr2MemType:
158 vram_type = AMDGPU_VRAM_TYPE_DDR2;
159 break;
160 case Ddr3MemType:
161 case LpDdr3MemType:
162 vram_type = AMDGPU_VRAM_TYPE_DDR3;
163 break;
164 case Ddr4MemType:
165 case LpDdr4MemType:
166 vram_type = AMDGPU_VRAM_TYPE_DDR4;
167 break;
168 default:
169 vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
170 break;
171 }
172 } else {
173 switch (atom_mem_type) {
174 case ATOM_DGPU_VRAM_TYPE_GDDR5:
175 vram_type = AMDGPU_VRAM_TYPE_GDDR5;
176 break;
177 case ATOM_DGPU_VRAM_TYPE_HBM:
178 vram_type = AMDGPU_VRAM_TYPE_HBM;
179 break;
180 default:
181 vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
182 break;
183 }
184 }
185
186 return vram_type;
187}
188/*
189 * Return vram type from either integrated system info table
190 * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
191 */
192int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
193{
194 struct amdgpu_mode_info *mode_info = &adev->mode_info;
195 int index;
196 u16 data_offset, size;
197 union igp_info *igp_info;
198 union umc_info *umc_info;
199 u8 frev, crev;
200 u8 mem_type;
201
202 if (adev->flags & AMD_IS_APU)
203 index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
204 integratedsysteminfo);
205 else
206 index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
207 umc_info);
208 if (amdgpu_atom_parse_data_header(mode_info->atom_context,
209 index, &size,
210 &frev, &crev, &data_offset)) {
211 if (adev->flags & AMD_IS_APU) {
212 igp_info = (union igp_info *)
213 (mode_info->atom_context->bios + data_offset);
214 switch (crev) {
215 case 11:
216 mem_type = igp_info->v11.memorytype;
217 return convert_atom_mem_type_to_vram_type(adev, mem_type);
218 default:
219 return 0;
220 }
221 } else {
222 umc_info = (union umc_info *)
223 (mode_info->atom_context->bios + data_offset);
224 switch (crev) {
225 case 1:
226 mem_type = umc_info->v31.vram_type;
227 return convert_atom_mem_type_to_vram_type(adev, mem_type);
228 default:
229 return 0;
230 }
231 }
232 }
233
234 return 0;
235}
236
146union firmware_info { 237union firmware_info {
147 struct atom_firmware_info_v3_1 v31; 238 struct atom_firmware_info_v3_1 v31;
148}; 239};
@@ -151,10 +242,6 @@ union smu_info {
151 struct atom_smu_info_v3_1 v31; 242 struct atom_smu_info_v3_1 v31;
152}; 243};
153 244
154union umc_info {
155 struct atom_umc_info_v3_1 v31;
156};
157
158int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) 245int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
159{ 246{
160 struct amdgpu_mode_info *mode_info = &adev->mode_info; 247 struct amdgpu_mode_info *mode_info = &adev->mode_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 288b97e54347..7689c961c4ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
28void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); 28void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
29int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); 29int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
30int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); 30int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
31int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
31int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); 32int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
32 33
33#endif 34#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index c53095b3b0fb..1ae5ae8c45a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -569,6 +569,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
569 { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, 569 { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
570 { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, 570 { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
571 { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, 571 { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
572 { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX },
572 { 0, 0, 0, 0, 0 }, 573 { 0, 0, 0, 0, 0 },
573}; 574};
574 575
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 63ec1e1bb6aa..02b849be083b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
80 int time; 80 int time;
81 81
82 n = AMDGPU_BENCHMARK_ITERATIONS; 82 n = AMDGPU_BENCHMARK_ITERATIONS;
83 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, 83 r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
84 NULL, 0, &sobj); 84 ttm_bo_type_kernel, NULL, &sobj);
85 if (r) { 85 if (r) {
86 goto out_cleanup; 86 goto out_cleanup;
87 } 87 }
@@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
93 if (r) { 93 if (r) {
94 goto out_cleanup; 94 goto out_cleanup;
95 } 95 }
96 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, 96 r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
97 NULL, 0, &dobj); 97 ttm_bo_type_kernel, NULL, &dobj);
98 if (r) { 98 if (r) {
99 goto out_cleanup; 99 goto out_cleanup;
100 } 100 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 59089e027f4d..92be7f6de197 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
233 for (i = 0; i < list->num_entries; i++) { 233 for (i = 0; i < list->num_entries; i++) {
234 unsigned priority = list->array[i].priority; 234 unsigned priority = list->array[i].priority;
235 235
236 list_add_tail(&list->array[i].tv.head, 236 if (!list->array[i].robj->parent)
237 &bucket[priority]); 237 list_add_tail(&list->array[i].tv.head,
238 &bucket[priority]);
239
238 list->array[i].user_pages = NULL; 240 list->array[i].user_pages = NULL;
239 } 241 }
240 242
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 4466f3535e2d..71a57b2f7f04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -24,12 +24,10 @@
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/pci.h> 26#include <linux/pci.h>
27#include <linux/acpi.h>
28#include <drm/drmP.h> 27#include <drm/drmP.h>
29#include <linux/firmware.h> 28#include <linux/firmware.h>
30#include <drm/amdgpu_drm.h> 29#include <drm/amdgpu_drm.h>
31#include "amdgpu.h" 30#include "amdgpu.h"
32#include "cgs_linux.h"
33#include "atom.h" 31#include "atom.h"
34#include "amdgpu_ucode.h" 32#include "amdgpu_ucode.h"
35 33
@@ -42,152 +40,6 @@ struct amdgpu_cgs_device {
42 struct amdgpu_device *adev = \ 40 struct amdgpu_device *adev = \
43 ((struct amdgpu_cgs_device *)cgs_device)->adev 41 ((struct amdgpu_cgs_device *)cgs_device)->adev
44 42
45static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
46 int (*call_back_func)(struct amd_pp_init *, void **))
47{
48 CGS_FUNC_ADEV;
49 struct amd_pp_init pp_init;
50 struct amd_powerplay *amd_pp;
51
52 if (call_back_func == NULL)
53 return NULL;
54
55 amd_pp = &(adev->powerplay);
56 pp_init.chip_family = adev->family;
57 pp_init.chip_id = adev->asic_type;
58 pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
59 pp_init.feature_mask = amdgpu_pp_feature_mask;
60 pp_init.device = cgs_device;
61 if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
62 return NULL;
63
64 return adev->powerplay.pp_handle;
65}
66
67static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
68 enum cgs_gpu_mem_type type,
69 uint64_t size, uint64_t align,
70 cgs_handle_t *handle)
71{
72 CGS_FUNC_ADEV;
73 uint16_t flags = 0;
74 int ret = 0;
75 uint32_t domain = 0;
76 struct amdgpu_bo *obj;
77
78 /* fail if the alignment is not a power of 2 */
79 if (((align != 1) && (align & (align - 1)))
80 || size == 0 || align == 0)
81 return -EINVAL;
82
83
84 switch(type) {
85 case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
86 case CGS_GPU_MEM_TYPE__VISIBLE_FB:
87 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
88 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
89 domain = AMDGPU_GEM_DOMAIN_VRAM;
90 break;
91 case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
92 case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
93 flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
94 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
95 domain = AMDGPU_GEM_DOMAIN_VRAM;
96 break;
97 case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
98 domain = AMDGPU_GEM_DOMAIN_GTT;
99 break;
100 case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
101 flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
102 domain = AMDGPU_GEM_DOMAIN_GTT;
103 break;
104 default:
105 return -EINVAL;
106 }
107
108
109 *handle = 0;
110
111 ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
112 NULL, NULL, 0, &obj);
113 if (ret) {
114 DRM_ERROR("(%d) bo create failed\n", ret);
115 return ret;
116 }
117 *handle = (cgs_handle_t)obj;
118
119 return ret;
120}
121
122static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
123{
124 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
125
126 if (obj) {
127 int r = amdgpu_bo_reserve(obj, true);
128 if (likely(r == 0)) {
129 amdgpu_bo_kunmap(obj);
130 amdgpu_bo_unpin(obj);
131 amdgpu_bo_unreserve(obj);
132 }
133 amdgpu_bo_unref(&obj);
134
135 }
136 return 0;
137}
138
139static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
140 uint64_t *mcaddr)
141{
142 int r;
143 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
144
145 WARN_ON_ONCE(obj->placement.num_placement > 1);
146
147 r = amdgpu_bo_reserve(obj, true);
148 if (unlikely(r != 0))
149 return r;
150 r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
151 amdgpu_bo_unreserve(obj);
152 return r;
153}
154
155static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
156{
157 int r;
158 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
159 r = amdgpu_bo_reserve(obj, true);
160 if (unlikely(r != 0))
161 return r;
162 r = amdgpu_bo_unpin(obj);
163 amdgpu_bo_unreserve(obj);
164 return r;
165}
166
167static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
168 void **map)
169{
170 int r;
171 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
172 r = amdgpu_bo_reserve(obj, true);
173 if (unlikely(r != 0))
174 return r;
175 r = amdgpu_bo_kmap(obj, map);
176 amdgpu_bo_unreserve(obj);
177 return r;
178}
179
180static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
181{
182 int r;
183 struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
184 r = amdgpu_bo_reserve(obj, true);
185 if (unlikely(r != 0))
186 return r;
187 amdgpu_bo_kunmap(obj);
188 amdgpu_bo_unreserve(obj);
189 return r;
190}
191 43
192static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) 44static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
193{ 45{
@@ -329,109 +181,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne
329 adev->mode_info.atom_context, table, args); 181 adev->mode_info.atom_context, table, args);
330} 182}
331 183
332struct cgs_irq_params {
333 unsigned src_id;
334 cgs_irq_source_set_func_t set;
335 cgs_irq_handler_func_t handler;
336 void *private_data;
337};
338
339static int cgs_set_irq_state(struct amdgpu_device *adev,
340 struct amdgpu_irq_src *src,
341 unsigned type,
342 enum amdgpu_interrupt_state state)
343{
344 struct cgs_irq_params *irq_params =
345 (struct cgs_irq_params *)src->data;
346 if (!irq_params)
347 return -EINVAL;
348 if (!irq_params->set)
349 return -EINVAL;
350 return irq_params->set(irq_params->private_data,
351 irq_params->src_id,
352 type,
353 (int)state);
354}
355
356static int cgs_process_irq(struct amdgpu_device *adev,
357 struct amdgpu_irq_src *source,
358 struct amdgpu_iv_entry *entry)
359{
360 struct cgs_irq_params *irq_params =
361 (struct cgs_irq_params *)source->data;
362 if (!irq_params)
363 return -EINVAL;
364 if (!irq_params->handler)
365 return -EINVAL;
366 return irq_params->handler(irq_params->private_data,
367 irq_params->src_id,
368 entry->iv_entry);
369}
370
371static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
372 .set = cgs_set_irq_state,
373 .process = cgs_process_irq,
374};
375
376static int amdgpu_cgs_add_irq_source(void *cgs_device,
377 unsigned client_id,
378 unsigned src_id,
379 unsigned num_types,
380 cgs_irq_source_set_func_t set,
381 cgs_irq_handler_func_t handler,
382 void *private_data)
383{
384 CGS_FUNC_ADEV;
385 int ret = 0;
386 struct cgs_irq_params *irq_params;
387 struct amdgpu_irq_src *source =
388 kzalloc(sizeof(struct amdgpu_irq_src), GFP_KERNEL);
389 if (!source)
390 return -ENOMEM;
391 irq_params =
392 kzalloc(sizeof(struct cgs_irq_params), GFP_KERNEL);
393 if (!irq_params) {
394 kfree(source);
395 return -ENOMEM;
396 }
397 source->num_types = num_types;
398 source->funcs = &cgs_irq_funcs;
399 irq_params->src_id = src_id;
400 irq_params->set = set;
401 irq_params->handler = handler;
402 irq_params->private_data = private_data;
403 source->data = (void *)irq_params;
404 ret = amdgpu_irq_add_id(adev, client_id, src_id, source);
405 if (ret) {
406 kfree(irq_params);
407 kfree(source);
408 }
409
410 return ret;
411}
412
413static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id,
414 unsigned src_id, unsigned type)
415{
416 CGS_FUNC_ADEV;
417
418 if (!adev->irq.client[client_id].sources)
419 return -EINVAL;
420
421 return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type);
422}
423
424static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id,
425 unsigned src_id, unsigned type)
426{
427 CGS_FUNC_ADEV;
428
429 if (!adev->irq.client[client_id].sources)
430 return -EINVAL;
431
432 return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type);
433}
434
435static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, 184static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
436 enum amd_ip_block_type block_type, 185 enum amd_ip_block_type block_type,
437 enum amd_clockgating_state state) 186 enum amd_clockgating_state state)
@@ -801,11 +550,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
801 else 550 else
802 strcpy(fw_name, "amdgpu/vega10_smc.bin"); 551 strcpy(fw_name, "amdgpu/vega10_smc.bin");
803 break; 552 break;
804 case CHIP_CARRIZO: 553 case CHIP_VEGA12:
805 case CHIP_STONEY: 554 strcpy(fw_name, "amdgpu/vega12_smc.bin");
806 case CHIP_RAVEN: 555 break;
807 adev->pm.fw_version = info->version;
808 return 0;
809 default: 556 default:
810 DRM_ERROR("SMC firmware not supported\n"); 557 DRM_ERROR("SMC firmware not supported\n");
811 return -EINVAL; 558 return -EINVAL;
@@ -857,61 +604,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
857 return amdgpu_sriov_vf(adev); 604 return amdgpu_sriov_vf(adev);
858} 605}
859 606
860static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
861 struct cgs_system_info *sys_info)
862{
863 CGS_FUNC_ADEV;
864
865 if (NULL == sys_info)
866 return -ENODEV;
867
868 if (sizeof(struct cgs_system_info) != sys_info->size)
869 return -ENODEV;
870
871 switch (sys_info->info_id) {
872 case CGS_SYSTEM_INFO_ADAPTER_BDF_ID:
873 sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8);
874 break;
875 case CGS_SYSTEM_INFO_PCIE_GEN_INFO:
876 sys_info->value = adev->pm.pcie_gen_mask;
877 break;
878 case CGS_SYSTEM_INFO_PCIE_MLW:
879 sys_info->value = adev->pm.pcie_mlw_mask;
880 break;
881 case CGS_SYSTEM_INFO_PCIE_DEV:
882 sys_info->value = adev->pdev->device;
883 break;
884 case CGS_SYSTEM_INFO_PCIE_REV:
885 sys_info->value = adev->pdev->revision;
886 break;
887 case CGS_SYSTEM_INFO_CG_FLAGS:
888 sys_info->value = adev->cg_flags;
889 break;
890 case CGS_SYSTEM_INFO_PG_FLAGS:
891 sys_info->value = adev->pg_flags;
892 break;
893 case CGS_SYSTEM_INFO_GFX_CU_INFO:
894 sys_info->value = adev->gfx.cu_info.number;
895 break;
896 case CGS_SYSTEM_INFO_GFX_SE_INFO:
897 sys_info->value = adev->gfx.config.max_shader_engines;
898 break;
899 case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID:
900 sys_info->value = adev->pdev->subsystem_device;
901 break;
902 case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
903 sys_info->value = adev->pdev->subsystem_vendor;
904 break;
905 case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN:
906 sys_info->value = adev->pdev->devfn;
907 break;
908 default:
909 return -ENODEV;
910 }
911
912 return 0;
913}
914
915static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, 607static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
916 struct cgs_display_info *info) 608 struct cgs_display_info *info)
917{ 609{
@@ -922,12 +614,9 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
922 return -EINVAL; 614 return -EINVAL;
923 615
924 mode_info = info->mode_info; 616 mode_info = info->mode_info;
925 if (mode_info) { 617 if (mode_info)
926 /* if the displays are off, vblank time is max */ 618 /* if the displays are off, vblank time is max */
927 mode_info->vblank_time_us = 0xffffffff; 619 mode_info->vblank_time_us = 0xffffffff;
928 /* always set the reference clock */
929 mode_info->ref_clock = adev->clock.spll.reference_freq;
930 }
931 620
932 if (!amdgpu_device_has_dc_support(adev)) { 621 if (!amdgpu_device_has_dc_support(adev)) {
933 struct amdgpu_crtc *amdgpu_crtc; 622 struct amdgpu_crtc *amdgpu_crtc;
@@ -953,6 +642,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
953 (amdgpu_crtc->v_border * 2); 642 (amdgpu_crtc->v_border * 2);
954 mode_info->vblank_time_us = vblank_lines * line_time_us; 643 mode_info->vblank_time_us = vblank_lines * line_time_us;
955 mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); 644 mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
645 /* we have issues with mclk switching with refresh rates
646 * over 120 hz on the non-DC code.
647 */
648 if (mode_info->refresh_rate > 120)
649 mode_info->vblank_time_us = 0;
956 mode_info = NULL; 650 mode_info = NULL;
957 } 651 }
958 } 652 }
@@ -977,223 +671,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena
977 return 0; 671 return 0;
978} 672}
979 673
980/** \brief evaluate acpi namespace object, handle or pathname must be valid
981 * \param cgs_device
982 * \param info input/output arguments for the control method
983 * \return status
984 */
985
986#if defined(CONFIG_ACPI)
987static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
988 struct cgs_acpi_method_info *info)
989{
990 CGS_FUNC_ADEV;
991 acpi_handle handle;
992 struct acpi_object_list input;
993 struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
994 union acpi_object *params, *obj;
995 uint8_t name[5] = {'\0'};
996 struct cgs_acpi_method_argument *argument;
997 uint32_t i, count;
998 acpi_status status;
999 int result;
1000
1001 handle = ACPI_HANDLE(&adev->pdev->dev);
1002 if (!handle)
1003 return -ENODEV;
1004
1005 memset(&input, 0, sizeof(struct acpi_object_list));
1006
1007 /* validate input info */
1008 if (info->size != sizeof(struct cgs_acpi_method_info))
1009 return -EINVAL;
1010
1011 input.count = info->input_count;
1012 if (info->input_count > 0) {
1013 if (info->pinput_argument == NULL)
1014 return -EINVAL;
1015 argument = info->pinput_argument;
1016 for (i = 0; i < info->input_count; i++) {
1017 if (((argument->type == ACPI_TYPE_STRING) ||
1018 (argument->type == ACPI_TYPE_BUFFER)) &&
1019 (argument->pointer == NULL))
1020 return -EINVAL;
1021 argument++;
1022 }
1023 }
1024
1025 if (info->output_count > 0) {
1026 if (info->poutput_argument == NULL)
1027 return -EINVAL;
1028 argument = info->poutput_argument;
1029 for (i = 0; i < info->output_count; i++) {
1030 if (((argument->type == ACPI_TYPE_STRING) ||
1031 (argument->type == ACPI_TYPE_BUFFER))
1032 && (argument->pointer == NULL))
1033 return -EINVAL;
1034 argument++;
1035 }
1036 }
1037
1038 /* The path name passed to acpi_evaluate_object should be null terminated */
1039 if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) {
1040 strncpy(name, (char *)&(info->name), sizeof(uint32_t));
1041 name[4] = '\0';
1042 }
1043
1044 /* parse input parameters */
1045 if (input.count > 0) {
1046 input.pointer = params =
1047 kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL);
1048 if (params == NULL)
1049 return -EINVAL;
1050
1051 argument = info->pinput_argument;
1052
1053 for (i = 0; i < input.count; i++) {
1054 params->type = argument->type;
1055 switch (params->type) {
1056 case ACPI_TYPE_INTEGER:
1057 params->integer.value = argument->value;
1058 break;
1059 case ACPI_TYPE_STRING:
1060 params->string.length = argument->data_length;
1061 params->string.pointer = argument->pointer;
1062 break;
1063 case ACPI_TYPE_BUFFER:
1064 params->buffer.length = argument->data_length;
1065 params->buffer.pointer = argument->pointer;
1066 break;
1067 default:
1068 break;
1069 }
1070 params++;
1071 argument++;
1072 }
1073 }
1074
1075 /* parse output info */
1076 count = info->output_count;
1077 argument = info->poutput_argument;
1078
1079 /* evaluate the acpi method */
1080 status = acpi_evaluate_object(handle, name, &input, &output);
1081
1082 if (ACPI_FAILURE(status)) {
1083 result = -EIO;
1084 goto free_input;
1085 }
1086
1087 /* return the output info */
1088 obj = output.pointer;
1089
1090 if (count > 1) {
1091 if ((obj->type != ACPI_TYPE_PACKAGE) ||
1092 (obj->package.count != count)) {
1093 result = -EIO;
1094 goto free_obj;
1095 }
1096 params = obj->package.elements;
1097 } else
1098 params = obj;
1099
1100 if (params == NULL) {
1101 result = -EIO;
1102 goto free_obj;
1103 }
1104
1105 for (i = 0; i < count; i++) {
1106 if (argument->type != params->type) {
1107 result = -EIO;
1108 goto free_obj;
1109 }
1110 switch (params->type) {
1111 case ACPI_TYPE_INTEGER:
1112 argument->value = params->integer.value;
1113 break;
1114 case ACPI_TYPE_STRING:
1115 if ((params->string.length != argument->data_length) ||
1116 (params->string.pointer == NULL)) {
1117 result = -EIO;
1118 goto free_obj;
1119 }
1120 strncpy(argument->pointer,
1121 params->string.pointer,
1122 params->string.length);
1123 break;
1124 case ACPI_TYPE_BUFFER:
1125 if (params->buffer.pointer == NULL) {
1126 result = -EIO;
1127 goto free_obj;
1128 }
1129 memcpy(argument->pointer,
1130 params->buffer.pointer,
1131 argument->data_length);
1132 break;
1133 default:
1134 break;
1135 }
1136 argument++;
1137 params++;
1138 }
1139
1140 result = 0;
1141free_obj:
1142 kfree(obj);
1143free_input:
1144 kfree((void *)input.pointer);
1145 return result;
1146}
1147#else
1148static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
1149 struct cgs_acpi_method_info *info)
1150{
1151 return -EIO;
1152}
1153#endif
1154
1155static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
1156 uint32_t acpi_method,
1157 uint32_t acpi_function,
1158 void *pinput, void *poutput,
1159 uint32_t output_count,
1160 uint32_t input_size,
1161 uint32_t output_size)
1162{
1163 struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} };
1164 struct cgs_acpi_method_argument acpi_output = {0};
1165 struct cgs_acpi_method_info info = {0};
1166
1167 acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
1168 acpi_input[0].data_length = sizeof(uint32_t);
1169 acpi_input[0].value = acpi_function;
1170
1171 acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
1172 acpi_input[1].data_length = input_size;
1173 acpi_input[1].pointer = pinput;
1174
1175 acpi_output.type = CGS_ACPI_TYPE_BUFFER;
1176 acpi_output.data_length = output_size;
1177 acpi_output.pointer = poutput;
1178
1179 info.size = sizeof(struct cgs_acpi_method_info);
1180 info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT;
1181 info.input_count = 2;
1182 info.name = acpi_method;
1183 info.pinput_argument = acpi_input;
1184 info.output_count = output_count;
1185 info.poutput_argument = &acpi_output;
1186
1187 return amdgpu_cgs_acpi_eval_object(cgs_device, &info);
1188}
1189
1190static const struct cgs_ops amdgpu_cgs_ops = { 674static const struct cgs_ops amdgpu_cgs_ops = {
1191 .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
1192 .free_gpu_mem = amdgpu_cgs_free_gpu_mem,
1193 .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
1194 .gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem,
1195 .kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem,
1196 .kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem,
1197 .read_register = amdgpu_cgs_read_register, 675 .read_register = amdgpu_cgs_read_register,
1198 .write_register = amdgpu_cgs_write_register, 676 .write_register = amdgpu_cgs_write_register,
1199 .read_ind_register = amdgpu_cgs_read_ind_register, 677 .read_ind_register = amdgpu_cgs_read_ind_register,
@@ -1208,18 +686,9 @@ static const struct cgs_ops amdgpu_cgs_ops = {
1208 .set_clockgating_state = amdgpu_cgs_set_clockgating_state, 686 .set_clockgating_state = amdgpu_cgs_set_clockgating_state,
1209 .get_active_displays_info = amdgpu_cgs_get_active_displays_info, 687 .get_active_displays_info = amdgpu_cgs_get_active_displays_info,
1210 .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, 688 .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
1211 .call_acpi_method = amdgpu_cgs_call_acpi_method,
1212 .query_system_info = amdgpu_cgs_query_system_info,
1213 .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, 689 .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
1214 .enter_safe_mode = amdgpu_cgs_enter_safe_mode, 690 .enter_safe_mode = amdgpu_cgs_enter_safe_mode,
1215 .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, 691 .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
1216 .register_pp_handle = amdgpu_cgs_register_pp_handle,
1217};
1218
1219static const struct cgs_os_ops amdgpu_cgs_os_ops = {
1220 .add_irq_source = amdgpu_cgs_add_irq_source,
1221 .irq_get = amdgpu_cgs_irq_get,
1222 .irq_put = amdgpu_cgs_irq_put
1223}; 692};
1224 693
1225struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) 694struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
@@ -1233,7 +702,6 @@ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
1233 } 702 }
1234 703
1235 cgs_device->base.ops = &amdgpu_cgs_ops; 704 cgs_device->base.ops = &amdgpu_cgs_ops;
1236 cgs_device->base.os_ops = &amdgpu_cgs_os_ops;
1237 cgs_device->adev = adev; 705 cgs_device->adev = adev;
1238 706
1239 return (struct cgs_device *)cgs_device; 707 return (struct cgs_device *)cgs_device;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 74d2efaec52f..96501ff0e55b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
69 /* don't do anything if sink is not display port, i.e., 69 /* don't do anything if sink is not display port, i.e.,
70 * passive dp->(dvi|hdmi) adaptor 70 * passive dp->(dvi|hdmi) adaptor
71 */ 71 */
72 if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { 72 if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
73 int saved_dpms = connector->dpms; 73 amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
74 /* Only turn off the display if it's physically disconnected */ 74 amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
75 if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { 75 /* Don't start link training before we have the DPCD */
76 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); 76 if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
77 } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { 77 return;
78 /* Don't try to start link training before we 78
79 * have the dpcd */ 79 /* Turn the connector off and back on immediately, which
80 if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) 80 * will trigger link training
81 return; 81 */
82 82 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
83 /* set it to OFF so that drm_helper_connector_dpms() 83 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
84 * won't return immediately since the current state
85 * is ON at this point.
86 */
87 connector->dpms = DRM_MODE_DPMS_OFF;
88 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
89 }
90 connector->dpms = saved_dpms;
91 } 84 }
92 } 85 }
93} 86}
@@ -885,7 +878,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
885 ret = connector_status_disconnected; 878 ret = connector_status_disconnected;
886 879
887 if (amdgpu_connector->ddc_bus) 880 if (amdgpu_connector->ddc_bus)
888 dret = amdgpu_ddc_probe(amdgpu_connector, false); 881 dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
889 if (dret) { 882 if (dret) {
890 amdgpu_connector->detected_by_load = false; 883 amdgpu_connector->detected_by_load = false;
891 amdgpu_connector_free_edid(connector); 884 amdgpu_connector_free_edid(connector);
@@ -1010,7 +1003,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
1010 } 1003 }
1011 1004
1012 if (amdgpu_connector->ddc_bus) 1005 if (amdgpu_connector->ddc_bus)
1013 dret = amdgpu_ddc_probe(amdgpu_connector, false); 1006 dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
1014 if (dret) { 1007 if (dret) {
1015 amdgpu_connector->detected_by_load = false; 1008 amdgpu_connector->detected_by_load = false;
1016 amdgpu_connector_free_edid(connector); 1009 amdgpu_connector_free_edid(connector);
@@ -1417,7 +1410,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
1417 /* setup ddc on the bridge */ 1410 /* setup ddc on the bridge */
1418 amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder); 1411 amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
1419 /* bridge chips are always aux */ 1412 /* bridge chips are always aux */
1420 if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */ 1413 /* try DDC */
1414 if (amdgpu_display_ddc_probe(amdgpu_connector, true))
1421 ret = connector_status_connected; 1415 ret = connector_status_connected;
1422 else if (amdgpu_connector->dac_load_detect) { /* try load detection */ 1416 else if (amdgpu_connector->dac_load_detect) { /* try load detection */
1423 const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; 1417 const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1437,7 +1431,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
1437 ret = connector_status_connected; 1431 ret = connector_status_connected;
1438 } else { 1432 } else {
1439 /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */ 1433 /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
1440 if (amdgpu_ddc_probe(amdgpu_connector, false)) 1434 if (amdgpu_display_ddc_probe(amdgpu_connector,
1435 false))
1441 ret = connector_status_connected; 1436 ret = connector_status_connected;
1442 } 1437 }
1443 } 1438 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e80fc38141b5..dc34b50e6b29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
257 return; 257 return;
258 } 258 }
259 259
260 total_vram = adev->mc.real_vram_size - adev->vram_pin_size; 260 total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
261 used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 261 used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
262 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; 262 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
263 263
@@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
302 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); 302 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
303 303
304 /* Do the same for visible VRAM if half of it is free */ 304 /* Do the same for visible VRAM if half of it is free */
305 if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { 305 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
306 u64 total_vis_vram = adev->mc.visible_vram_size; 306 u64 total_vis_vram = adev->gmc.visible_vram_size;
307 u64 used_vis_vram = 307 u64 used_vis_vram =
308 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 308 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
309 309
@@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
346 struct ttm_operation_ctx ctx = { 346 struct ttm_operation_ctx ctx = {
347 .interruptible = true, 347 .interruptible = true,
348 .no_wait_gpu = false, 348 .no_wait_gpu = false,
349 .allow_reserved_eviction = false, 349 .resv = bo->tbo.resv,
350 .resv = bo->tbo.resv 350 .flags = 0
351 }; 351 };
352 uint32_t domain; 352 uint32_t domain;
353 int r; 353 int r;
@@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
359 * to move it. Don't move anything if the threshold is zero. 359 * to move it. Don't move anything if the threshold is zero.
360 */ 360 */
361 if (p->bytes_moved < p->bytes_moved_threshold) { 361 if (p->bytes_moved < p->bytes_moved_threshold) {
362 if (adev->mc.visible_vram_size < adev->mc.real_vram_size && 362 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
363 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 363 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
364 /* And don't move a CPU_ACCESS_REQUIRED BO to limited 364 /* And don't move a CPU_ACCESS_REQUIRED BO to limited
365 * visible VRAM if we've depleted our allowance to do 365 * visible VRAM if we've depleted our allowance to do
@@ -381,9 +381,9 @@ retry:
381 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 381 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
382 382
383 p->bytes_moved += ctx.bytes_moved; 383 p->bytes_moved += ctx.bytes_moved;
384 if (adev->mc.visible_vram_size < adev->mc.real_vram_size && 384 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
385 bo->tbo.mem.mem_type == TTM_PL_VRAM && 385 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
386 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) 386 bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
387 p->bytes_moved_vis += ctx.bytes_moved; 387 p->bytes_moved_vis += ctx.bytes_moved;
388 388
389 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { 389 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
437 /* Good we can try to move this BO somewhere else */ 437 /* Good we can try to move this BO somewhere else */
438 amdgpu_ttm_placement_from_domain(bo, other); 438 amdgpu_ttm_placement_from_domain(bo, other);
439 update_bytes_moved_vis = 439 update_bytes_moved_vis =
440 adev->mc.visible_vram_size < adev->mc.real_vram_size && 440 adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
441 bo->tbo.mem.mem_type == TTM_PL_VRAM && 441 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
442 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; 442 bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
443 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 443 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
444 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 444 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
445 bytes_moved = atomic64_read(&adev->num_bytes_moved) - 445 bytes_moved = atomic64_read(&adev->num_bytes_moved) -
@@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
542 INIT_LIST_HEAD(&duplicates); 542 INIT_LIST_HEAD(&duplicates);
543 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 543 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
544 544
545 if (p->uf_entry.robj) 545 if (p->uf_entry.robj && !p->uf_entry.robj->parent)
546 list_add(&p->uf_entry.tv.head, &p->validated); 546 list_add(&p->uf_entry.tv.head, &p->validated);
547 547
548 while (1) { 548 while (1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index ee76b468774a..448d69fe3756 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -64,16 +64,21 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
64 64
65#if defined(CONFIG_DEBUG_FS) 65#if defined(CONFIG_DEBUG_FS)
66 66
67static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, 67
68 size_t size, loff_t *pos) 68static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
69 char __user *buf, size_t size, loff_t *pos)
69{ 70{
70 struct amdgpu_device *adev = file_inode(f)->i_private; 71 struct amdgpu_device *adev = file_inode(f)->i_private;
71 ssize_t result = 0; 72 ssize_t result = 0;
72 int r; 73 int r;
73 bool pm_pg_lock, use_bank; 74 bool pm_pg_lock, use_bank, use_ring;
74 unsigned instance_bank, sh_bank, se_bank; 75 unsigned instance_bank, sh_bank, se_bank, me, pipe, queue;
75 76
76 if (size & 0x3 || *pos & 0x3) 77 pm_pg_lock = use_bank = use_ring = false;
78 instance_bank = sh_bank = se_bank = me = pipe = queue = 0;
79
80 if (size & 0x3 || *pos & 0x3 ||
81 ((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
77 return -EINVAL; 82 return -EINVAL;
78 83
79 /* are we reading registers for which a PG lock is necessary? */ 84 /* are we reading registers for which a PG lock is necessary? */
@@ -91,8 +96,15 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
91 if (instance_bank == 0x3FF) 96 if (instance_bank == 0x3FF)
92 instance_bank = 0xFFFFFFFF; 97 instance_bank = 0xFFFFFFFF;
93 use_bank = 1; 98 use_bank = 1;
99 } else if (*pos & (1ULL << 61)) {
100
101 me = (*pos & GENMASK_ULL(33, 24)) >> 24;
102 pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
103 queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
104
105 use_ring = 1;
94 } else { 106 } else {
95 use_bank = 0; 107 use_bank = use_ring = 0;
96 } 108 }
97 109
98 *pos &= (1UL << 22) - 1; 110 *pos &= (1UL << 22) - 1;
@@ -104,6 +116,9 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
104 mutex_lock(&adev->grbm_idx_mutex); 116 mutex_lock(&adev->grbm_idx_mutex);
105 amdgpu_gfx_select_se_sh(adev, se_bank, 117 amdgpu_gfx_select_se_sh(adev, se_bank,
106 sh_bank, instance_bank); 118 sh_bank, instance_bank);
119 } else if (use_ring) {
120 mutex_lock(&adev->srbm_mutex);
121 amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue);
107 } 122 }
108 123
109 if (pm_pg_lock) 124 if (pm_pg_lock)
@@ -115,8 +130,14 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
115 if (*pos > adev->rmmio_size) 130 if (*pos > adev->rmmio_size)
116 goto end; 131 goto end;
117 132
118 value = RREG32(*pos >> 2); 133 if (read) {
119 r = put_user(value, (uint32_t *)buf); 134 value = RREG32(*pos >> 2);
135 r = put_user(value, (uint32_t *)buf);
136 } else {
137 r = get_user(value, (uint32_t *)buf);
138 if (!r)
139 WREG32(*pos >> 2, value);
140 }
120 if (r) { 141 if (r) {
121 result = r; 142 result = r;
122 goto end; 143 goto end;
@@ -132,6 +153,9 @@ end:
132 if (use_bank) { 153 if (use_bank) {
133 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 154 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
134 mutex_unlock(&adev->grbm_idx_mutex); 155 mutex_unlock(&adev->grbm_idx_mutex);
156 } else if (use_ring) {
157 amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0);
158 mutex_unlock(&adev->srbm_mutex);
135 } 159 }
136 160
137 if (pm_pg_lock) 161 if (pm_pg_lock)
@@ -140,78 +164,17 @@ end:
140 return result; 164 return result;
141} 165}
142 166
167
168static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
169 size_t size, loff_t *pos)
170{
171 return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
172}
173
143static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, 174static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
144 size_t size, loff_t *pos) 175 size_t size, loff_t *pos)
145{ 176{
146 struct amdgpu_device *adev = file_inode(f)->i_private; 177 return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
147 ssize_t result = 0;
148 int r;
149 bool pm_pg_lock, use_bank;
150 unsigned instance_bank, sh_bank, se_bank;
151
152 if (size & 0x3 || *pos & 0x3)
153 return -EINVAL;
154
155 /* are we reading registers for which a PG lock is necessary? */
156 pm_pg_lock = (*pos >> 23) & 1;
157
158 if (*pos & (1ULL << 62)) {
159 se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
160 sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
161 instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
162
163 if (se_bank == 0x3FF)
164 se_bank = 0xFFFFFFFF;
165 if (sh_bank == 0x3FF)
166 sh_bank = 0xFFFFFFFF;
167 if (instance_bank == 0x3FF)
168 instance_bank = 0xFFFFFFFF;
169 use_bank = 1;
170 } else {
171 use_bank = 0;
172 }
173
174 *pos &= (1UL << 22) - 1;
175
176 if (use_bank) {
177 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
178 (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
179 return -EINVAL;
180 mutex_lock(&adev->grbm_idx_mutex);
181 amdgpu_gfx_select_se_sh(adev, se_bank,
182 sh_bank, instance_bank);
183 }
184
185 if (pm_pg_lock)
186 mutex_lock(&adev->pm.mutex);
187
188 while (size) {
189 uint32_t value;
190
191 if (*pos > adev->rmmio_size)
192 return result;
193
194 r = get_user(value, (uint32_t *)buf);
195 if (r)
196 return r;
197
198 WREG32(*pos >> 2, value);
199
200 result += 4;
201 buf += 4;
202 *pos += 4;
203 size -= 4;
204 }
205
206 if (use_bank) {
207 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
208 mutex_unlock(&adev->grbm_idx_mutex);
209 }
210
211 if (pm_pg_lock)
212 mutex_unlock(&adev->pm.mutex);
213
214 return result;
215} 178}
216 179
217static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, 180static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
@@ -767,10 +730,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
767 return 0; 730 return 0;
768} 731}
769 732
733static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
734{
735 struct drm_info_node *node = (struct drm_info_node *)m->private;
736 struct drm_device *dev = node->minor->dev;
737 struct amdgpu_device *adev = dev->dev_private;
738
739 seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
740 return 0;
741}
742
770static const struct drm_info_list amdgpu_debugfs_list[] = { 743static const struct drm_info_list amdgpu_debugfs_list[] = {
771 {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, 744 {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
772 {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, 745 {"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
773 {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram} 746 {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
747 {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
774}; 748};
775 749
776int amdgpu_debugfs_init(struct amdgpu_device *adev) 750int amdgpu_debugfs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index af1b879a9ee9..34af664b9f93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -59,6 +59,7 @@
59#include "amdgpu_pm.h" 59#include "amdgpu_pm.h"
60 60
61MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 61MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
62MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
62MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 63MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
63 64
64#define AMDGPU_RESUME_MS 2000 65#define AMDGPU_RESUME_MS 2000
@@ -83,10 +84,21 @@ static const char *amdgpu_asic_name[] = {
83 "POLARIS11", 84 "POLARIS11",
84 "POLARIS12", 85 "POLARIS12",
85 "VEGA10", 86 "VEGA10",
87 "VEGA12",
86 "RAVEN", 88 "RAVEN",
87 "LAST", 89 "LAST",
88}; 90};
89 91
92static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
93
94/**
95 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
96 *
97 * @dev: drm_device pointer
98 *
99 * Returns true if the device is a dGPU with HG/PX power control,
100 * otherwise return false.
101 */
90bool amdgpu_device_is_px(struct drm_device *dev) 102bool amdgpu_device_is_px(struct drm_device *dev)
91{ 103{
92 struct amdgpu_device *adev = dev->dev_private; 104 struct amdgpu_device *adev = dev->dev_private;
@@ -99,6 +111,15 @@ bool amdgpu_device_is_px(struct drm_device *dev)
99/* 111/*
100 * MMIO register access helper functions. 112 * MMIO register access helper functions.
101 */ 113 */
114/**
115 * amdgpu_mm_rreg - read a memory mapped IO register
116 *
117 * @adev: amdgpu_device pointer
118 * @reg: dword aligned register offset
119 * @acc_flags: access flags which require special behavior
120 *
121 * Returns the 32 bit value from the offset specified.
122 */
102uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, 123uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
103 uint32_t acc_flags) 124 uint32_t acc_flags)
104{ 125{
@@ -121,6 +142,58 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
121 return ret; 142 return ret;
122} 143}
123 144
145/*
146 * MMIO register read with bytes helper functions
147 * @offset:bytes offset from MMIO start
148 *
149*/
150
151/**
152 * amdgpu_mm_rreg8 - read a memory mapped IO register
153 *
154 * @adev: amdgpu_device pointer
155 * @offset: byte aligned register offset
156 *
157 * Returns the 8 bit value from the offset specified.
158 */
159uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
160 if (offset < adev->rmmio_size)
161 return (readb(adev->rmmio + offset));
162 BUG();
163}
164
165/*
166 * MMIO register write with bytes helper functions
167 * @offset:bytes offset from MMIO start
168 * @value: the value want to be written to the register
169 *
170*/
171/**
172 * amdgpu_mm_wreg8 - read a memory mapped IO register
173 *
174 * @adev: amdgpu_device pointer
175 * @offset: byte aligned register offset
176 * @value: 8 bit value to write
177 *
178 * Writes the value specified to the offset specified.
179 */
180void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
181 if (offset < adev->rmmio_size)
182 writeb(value, adev->rmmio + offset);
183 else
184 BUG();
185}
186
187/**
188 * amdgpu_mm_wreg - write to a memory mapped IO register
189 *
190 * @adev: amdgpu_device pointer
191 * @reg: dword aligned register offset
192 * @v: 32 bit value to write to the register
193 * @acc_flags: access flags which require special behavior
194 *
195 * Writes the value specified to the offset specified.
196 */
124void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, 197void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
125 uint32_t acc_flags) 198 uint32_t acc_flags)
126{ 199{
@@ -149,6 +222,14 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
149 } 222 }
150} 223}
151 224
225/**
226 * amdgpu_io_rreg - read an IO register
227 *
228 * @adev: amdgpu_device pointer
229 * @reg: dword aligned register offset
230 *
231 * Returns the 32 bit value from the offset specified.
232 */
152u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) 233u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
153{ 234{
154 if ((reg * 4) < adev->rio_mem_size) 235 if ((reg * 4) < adev->rio_mem_size)
@@ -159,6 +240,15 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
159 } 240 }
160} 241}
161 242
243/**
244 * amdgpu_io_wreg - write to an IO register
245 *
246 * @adev: amdgpu_device pointer
247 * @reg: dword aligned register offset
248 * @v: 32 bit value to write to the register
249 *
250 * Writes the value specified to the offset specified.
251 */
162void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) 252void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
163{ 253{
164 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { 254 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
@@ -327,6 +417,14 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
327 BUG(); 417 BUG();
328} 418}
329 419
420/**
421 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
422 *
423 * @adev: amdgpu device pointer
424 *
425 * Allocates a scratch page of VRAM for use by various things in the
426 * driver.
427 */
330static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) 428static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
331{ 429{
332 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, 430 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
@@ -336,6 +434,13 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
336 (void **)&adev->vram_scratch.ptr); 434 (void **)&adev->vram_scratch.ptr);
337} 435}
338 436
437/**
438 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
439 *
440 * @adev: amdgpu device pointer
441 *
442 * Frees the VRAM scratch page.
443 */
339static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) 444static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
340{ 445{
341 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); 446 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
@@ -377,6 +482,14 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
377 } 482 }
378} 483}
379 484
485/**
486 * amdgpu_device_pci_config_reset - reset the GPU
487 *
488 * @adev: amdgpu_device pointer
489 *
490 * Resets the GPU using the pci config reset sequence.
491 * Only applicable to asics prior to vega10.
492 */
380void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 493void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
381{ 494{
382 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 495 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
@@ -537,6 +650,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
537 650
538/** 651/**
539 * amdgpu_device_vram_location - try to find VRAM location 652 * amdgpu_device_vram_location - try to find VRAM location
653 *
540 * @adev: amdgpu device structure holding all necessary informations 654 * @adev: amdgpu device structure holding all necessary informations
541 * @mc: memory controller structure holding memory informations 655 * @mc: memory controller structure holding memory informations
542 * @base: base address at which to put VRAM 656 * @base: base address at which to put VRAM
@@ -545,7 +659,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
545 * as parameter. 659 * as parameter.
546 */ 660 */
547void amdgpu_device_vram_location(struct amdgpu_device *adev, 661void amdgpu_device_vram_location(struct amdgpu_device *adev,
548 struct amdgpu_mc *mc, u64 base) 662 struct amdgpu_gmc *mc, u64 base)
549{ 663{
550 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; 664 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
551 665
@@ -560,6 +674,7 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
560 674
561/** 675/**
562 * amdgpu_device_gart_location - try to find GTT location 676 * amdgpu_device_gart_location - try to find GTT location
677 *
563 * @adev: amdgpu device structure holding all necessary informations 678 * @adev: amdgpu device structure holding all necessary informations
564 * @mc: memory controller structure holding memory informations 679 * @mc: memory controller structure holding memory informations
565 * 680 *
@@ -571,11 +686,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
571 * FIXME: when reducing GTT size align new size on power of 2. 686 * FIXME: when reducing GTT size align new size on power of 2.
572 */ 687 */
573void amdgpu_device_gart_location(struct amdgpu_device *adev, 688void amdgpu_device_gart_location(struct amdgpu_device *adev,
574 struct amdgpu_mc *mc) 689 struct amdgpu_gmc *mc)
575{ 690{
576 u64 size_af, size_bf; 691 u64 size_af, size_bf;
577 692
578 size_af = adev->mc.mc_mask - mc->vram_end; 693 size_af = adev->gmc.mc_mask - mc->vram_end;
579 size_bf = mc->vram_start; 694 size_bf = mc->vram_start;
580 if (size_bf > size_af) { 695 if (size_bf > size_af) {
581 if (mc->gart_size > size_bf) { 696 if (mc->gart_size > size_bf) {
@@ -609,7 +724,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
609 */ 724 */
610int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 725int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
611{ 726{
612 u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size); 727 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
613 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; 728 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
614 struct pci_bus *root; 729 struct pci_bus *root;
615 struct resource *res; 730 struct resource *res;
@@ -746,6 +861,16 @@ static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
746 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 861 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
747} 862}
748 863
864/**
865 * amdgpu_device_check_block_size - validate the vm block size
866 *
867 * @adev: amdgpu_device pointer
868 *
869 * Validates the vm block size specified via module parameter.
870 * The vm block size defines number of bits in page table versus page directory,
871 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
872 * page table and the remaining bits are in the page directory.
873 */
749static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 874static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
750{ 875{
751 /* defines number of bits in page table versus page directory, 876 /* defines number of bits in page table versus page directory,
@@ -761,6 +886,14 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
761 } 886 }
762} 887}
763 888
889/**
890 * amdgpu_device_check_vm_size - validate the vm size
891 *
892 * @adev: amdgpu_device pointer
893 *
894 * Validates the vm size in GB specified via module parameter.
895 * The VM size is the size of the GPU virtual memory space in GB.
896 */
764static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 897static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
765{ 898{
766 /* no need to check the default value */ 899 /* no need to check the default value */
@@ -830,6 +963,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
830 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); 963 dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
831 amdgpu_lockup_timeout = 10000; 964 amdgpu_lockup_timeout = 10000;
832 } 965 }
966
967 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
833} 968}
834 969
835/** 970/**
@@ -893,6 +1028,17 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
893 .can_switch = amdgpu_switcheroo_can_switch, 1028 .can_switch = amdgpu_switcheroo_can_switch,
894}; 1029};
895 1030
1031/**
1032 * amdgpu_device_ip_set_clockgating_state - set the CG state
1033 *
1034 * @adev: amdgpu_device pointer
1035 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1036 * @state: clockgating state (gate or ungate)
1037 *
1038 * Sets the requested clockgating state for all instances of
1039 * the hardware IP specified.
1040 * Returns the error code from the last instance.
1041 */
896int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, 1042int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
897 enum amd_ip_block_type block_type, 1043 enum amd_ip_block_type block_type,
898 enum amd_clockgating_state state) 1044 enum amd_clockgating_state state)
@@ -915,6 +1061,17 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
915 return r; 1061 return r;
916} 1062}
917 1063
1064/**
1065 * amdgpu_device_ip_set_powergating_state - set the PG state
1066 *
1067 * @adev: amdgpu_device pointer
1068 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1069 * @state: powergating state (gate or ungate)
1070 *
1071 * Sets the requested powergating state for all instances of
1072 * the hardware IP specified.
1073 * Returns the error code from the last instance.
1074 */
918int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, 1075int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
919 enum amd_ip_block_type block_type, 1076 enum amd_ip_block_type block_type,
920 enum amd_powergating_state state) 1077 enum amd_powergating_state state)
@@ -937,6 +1094,17 @@ int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
937 return r; 1094 return r;
938} 1095}
939 1096
1097/**
1098 * amdgpu_device_ip_get_clockgating_state - get the CG state
1099 *
1100 * @adev: amdgpu_device pointer
1101 * @flags: clockgating feature flags
1102 *
1103 * Walks the list of IPs on the device and updates the clockgating
1104 * flags for each IP.
1105 * Updates @flags with the feature flags for each hardware IP where
1106 * clockgating is enabled.
1107 */
940void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1108void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
941 u32 *flags) 1109 u32 *flags)
942{ 1110{
@@ -950,6 +1118,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
950 } 1118 }
951} 1119}
952 1120
1121/**
1122 * amdgpu_device_ip_wait_for_idle - wait for idle
1123 *
1124 * @adev: amdgpu_device pointer
1125 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1126 *
1127 * Waits for the request hardware IP to be idle.
1128 * Returns 0 for success or a negative error code on failure.
1129 */
953int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1130int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
954 enum amd_ip_block_type block_type) 1131 enum amd_ip_block_type block_type)
955{ 1132{
@@ -969,6 +1146,15 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
969 1146
970} 1147}
971 1148
1149/**
1150 * amdgpu_device_ip_is_idle - is the hardware IP idle
1151 *
1152 * @adev: amdgpu_device pointer
1153 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1154 *
1155 * Check if the hardware IP is idle or not.
1156 * Returns true if it the IP is idle, false if not.
1157 */
972bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1158bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
973 enum amd_ip_block_type block_type) 1159 enum amd_ip_block_type block_type)
974{ 1160{
@@ -984,6 +1170,15 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
984 1170
985} 1171}
986 1172
1173/**
1174 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1175 *
1176 * @adev: amdgpu_device pointer
1177 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1178 *
1179 * Returns a pointer to the hardware IP block structure
1180 * if it exists for the asic, otherwise NULL.
1181 */
987struct amdgpu_ip_block * 1182struct amdgpu_ip_block *
988amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1183amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
989 enum amd_ip_block_type type) 1184 enum amd_ip_block_type type)
@@ -1037,7 +1232,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1037 if (!ip_block_version) 1232 if (!ip_block_version)
1038 return -EINVAL; 1233 return -EINVAL;
1039 1234
1040 DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, 1235 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1041 ip_block_version->funcs->name); 1236 ip_block_version->funcs->name);
1042 1237
1043 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1238 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1045,6 +1240,18 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1045 return 0; 1240 return 0;
1046} 1241}
1047 1242
1243/**
1244 * amdgpu_device_enable_virtual_display - enable virtual display feature
1245 *
1246 * @adev: amdgpu_device pointer
1247 *
1248 * Enabled the virtual display feature if the user has enabled it via
1249 * the module parameter virtual_display. This feature provides a virtual
1250 * display hardware on headless boards or in virtualized environments.
1251 * This function parses and validates the configuration string specified by
1252 * the user and configues the virtual display configuration (number of
1253 * virtual connectors, crtcs, etc.) specified.
1254 */
1048static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1255static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1049{ 1256{
1050 adev->enable_virtual_display = false; 1257 adev->enable_virtual_display = false;
@@ -1090,6 +1297,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1090 } 1297 }
1091} 1298}
1092 1299
1300/**
1301 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1302 *
1303 * @adev: amdgpu_device pointer
1304 *
1305 * Parses the asic configuration parameters specified in the gpu info
1306 * firmware and makes them availale to the driver for use in configuring
1307 * the asic.
1308 * Returns 0 on success, -EINVAL on failure.
1309 */
1093static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1310static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1094{ 1311{
1095 const char *chip_name; 1312 const char *chip_name;
@@ -1127,6 +1344,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1127 case CHIP_VEGA10: 1344 case CHIP_VEGA10:
1128 chip_name = "vega10"; 1345 chip_name = "vega10";
1129 break; 1346 break;
1347 case CHIP_VEGA12:
1348 chip_name = "vega12";
1349 break;
1130 case CHIP_RAVEN: 1350 case CHIP_RAVEN:
1131 chip_name = "raven"; 1351 chip_name = "raven";
1132 break; 1352 break;
@@ -1188,6 +1408,16 @@ out:
1188 return err; 1408 return err;
1189} 1409}
1190 1410
1411/**
1412 * amdgpu_device_ip_early_init - run early init for hardware IPs
1413 *
1414 * @adev: amdgpu_device pointer
1415 *
1416 * Early initialization pass for hardware IPs. The hardware IPs that make
1417 * up each asic are discovered each IP's early_init callback is run. This
1418 * is the first stage in initializing the asic.
1419 * Returns 0 on success, negative error code on failure.
1420 */
1191static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 1421static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1192{ 1422{
1193 int i, r; 1423 int i, r;
@@ -1240,8 +1470,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1240 return r; 1470 return r;
1241 break; 1471 break;
1242#endif 1472#endif
1243 case CHIP_VEGA10: 1473 case CHIP_VEGA10:
1244 case CHIP_RAVEN: 1474 case CHIP_VEGA12:
1475 case CHIP_RAVEN:
1245 if (adev->asic_type == CHIP_RAVEN) 1476 if (adev->asic_type == CHIP_RAVEN)
1246 adev->family = AMDGPU_FAMILY_RV; 1477 adev->family = AMDGPU_FAMILY_RV;
1247 else 1478 else
@@ -1297,6 +1528,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
1297 return 0; 1528 return 0;
1298} 1529}
1299 1530
1531/**
1532 * amdgpu_device_ip_init - run init for hardware IPs
1533 *
1534 * @adev: amdgpu_device pointer
1535 *
1536 * Main initialization pass for hardware IPs. The list of all the hardware
1537 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1538 * are run. sw_init initializes the software state associated with each IP
1539 * and hw_init initializes the hardware associated with each IP.
1540 * Returns 0 on success, negative error code on failure.
1541 */
1300static int amdgpu_device_ip_init(struct amdgpu_device *adev) 1542static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1301{ 1543{
1302 int i, r; 1544 int i, r;
@@ -1311,6 +1553,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1311 return r; 1553 return r;
1312 } 1554 }
1313 adev->ip_blocks[i].status.sw = true; 1555 adev->ip_blocks[i].status.sw = true;
1556
1314 /* need to do gmc hw init early so we can allocate gpu mem */ 1557 /* need to do gmc hw init early so we can allocate gpu mem */
1315 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 1558 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
1316 r = amdgpu_device_vram_scratch_init(adev); 1559 r = amdgpu_device_vram_scratch_init(adev);
@@ -1344,8 +1587,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1344 for (i = 0; i < adev->num_ip_blocks; i++) { 1587 for (i = 0; i < adev->num_ip_blocks; i++) {
1345 if (!adev->ip_blocks[i].status.sw) 1588 if (!adev->ip_blocks[i].status.sw)
1346 continue; 1589 continue;
1347 /* gmc hw init is done early */ 1590 if (adev->ip_blocks[i].status.hw)
1348 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC)
1349 continue; 1591 continue;
1350 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1592 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1351 if (r) { 1593 if (r) {
@@ -1364,27 +1606,61 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1364 return 0; 1606 return 0;
1365} 1607}
1366 1608
1609/**
1610 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1611 *
1612 * @adev: amdgpu_device pointer
1613 *
1614 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1615 * this function before a GPU reset. If the value is retained after a
1616 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1617 */
1367static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 1618static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
1368{ 1619{
1369 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 1620 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1370} 1621}
1371 1622
1623/**
1624 * amdgpu_device_check_vram_lost - check if vram is valid
1625 *
1626 * @adev: amdgpu_device pointer
1627 *
1628 * Checks the reset magic value written to the gart pointer in VRAM.
1629 * The driver calls this after a GPU reset to see if the contents of
1630 * VRAM is lost or now.
1631 * returns true if vram is lost, false if not.
1632 */
1372static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 1633static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
1373{ 1634{
1374 return !!memcmp(adev->gart.ptr, adev->reset_magic, 1635 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1375 AMDGPU_RESET_MAGIC_NUM); 1636 AMDGPU_RESET_MAGIC_NUM);
1376} 1637}
1377 1638
1639/**
1640 * amdgpu_device_ip_late_set_cg_state - late init for clockgating
1641 *
1642 * @adev: amdgpu_device pointer
1643 *
1644 * Late initialization pass enabling clockgating for hardware IPs.
1645 * The list of all the hardware IPs that make up the asic is walked and the
1646 * set_clockgating_state callbacks are run. This stage is run late
1647 * in the init process.
1648 * Returns 0 on success, negative error code on failure.
1649 */
1378static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) 1650static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
1379{ 1651{
1380 int i = 0, r; 1652 int i = 0, r;
1381 1653
1654 if (amdgpu_emu_mode == 1)
1655 return 0;
1656
1382 for (i = 0; i < adev->num_ip_blocks; i++) { 1657 for (i = 0; i < adev->num_ip_blocks; i++) {
1383 if (!adev->ip_blocks[i].status.valid) 1658 if (!adev->ip_blocks[i].status.valid)
1384 continue; 1659 continue;
1385 /* skip CG for VCE/UVD, it's handled specially */ 1660 /* skip CG for VCE/UVD, it's handled specially */
1386 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1661 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1387 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { 1662 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1663 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1388 /* enable clockgating to save power */ 1664 /* enable clockgating to save power */
1389 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1665 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1390 AMD_CG_STATE_GATE); 1666 AMD_CG_STATE_GATE);
@@ -1398,6 +1674,18 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
1398 return 0; 1674 return 0;
1399} 1675}
1400 1676
1677/**
1678 * amdgpu_device_ip_late_init - run late init for hardware IPs
1679 *
1680 * @adev: amdgpu_device pointer
1681 *
1682 * Late initialization pass for hardware IPs. The list of all the hardware
1683 * IPs that make up the asic is walked and the late_init callbacks are run.
1684 * late_init covers any special initialization that an IP requires
1685 * after all of the have been initialized or something that needs to happen
1686 * late in the init process.
1687 * Returns 0 on success, negative error code on failure.
1688 */
1401static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 1689static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
1402{ 1690{
1403 int i = 0, r; 1691 int i = 0, r;
@@ -1424,6 +1712,17 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
1424 return 0; 1712 return 0;
1425} 1713}
1426 1714
1715/**
1716 * amdgpu_device_ip_fini - run fini for hardware IPs
1717 *
1718 * @adev: amdgpu_device pointer
1719 *
1720 * Main teardown pass for hardware IPs. The list of all the hardware
1721 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1722 * are run. hw_fini tears down the hardware associated with each IP
1723 * and sw_fini tears down any software state associated with each IP.
1724 * Returns 0 on success, negative error code on failure.
1725 */
1427static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 1726static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1428{ 1727{
1429 int i, r; 1728 int i, r;
@@ -1433,7 +1732,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1433 for (i = 0; i < adev->num_ip_blocks; i++) { 1732 for (i = 0; i < adev->num_ip_blocks; i++) {
1434 if (!adev->ip_blocks[i].status.hw) 1733 if (!adev->ip_blocks[i].status.hw)
1435 continue; 1734 continue;
1436 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 1735 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
1736 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1437 /* ungate blocks before hw fini so that we can shutdown the blocks safely */ 1737 /* ungate blocks before hw fini so that we can shutdown the blocks safely */
1438 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1738 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1439 AMD_CG_STATE_UNGATE); 1739 AMD_CG_STATE_UNGATE);
@@ -1458,7 +1758,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1458 continue; 1758 continue;
1459 1759
1460 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 1760 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1461 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { 1761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1762 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1462 /* ungate blocks before hw fini so that we can shutdown the blocks safely */ 1763 /* ungate blocks before hw fini so that we can shutdown the blocks safely */
1463 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1764 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1464 AMD_CG_STATE_UNGATE); 1765 AMD_CG_STATE_UNGATE);
@@ -1479,6 +1780,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1479 adev->ip_blocks[i].status.hw = false; 1780 adev->ip_blocks[i].status.hw = false;
1480 } 1781 }
1481 1782
1783
1482 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 1784 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1483 if (!adev->ip_blocks[i].status.sw) 1785 if (!adev->ip_blocks[i].status.sw)
1484 continue; 1786 continue;
@@ -1514,6 +1816,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1514 return 0; 1816 return 0;
1515} 1817}
1516 1818
1819/**
1820 * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
1821 *
1822 * @work: work_struct
1823 *
1824 * Work handler for amdgpu_device_ip_late_set_cg_state. We put the
1825 * clockgating setup into a worker thread to speed up driver init and
1826 * resume from suspend.
1827 */
1517static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) 1828static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
1518{ 1829{
1519 struct amdgpu_device *adev = 1830 struct amdgpu_device *adev =
@@ -1521,6 +1832,17 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
1521 amdgpu_device_ip_late_set_cg_state(adev); 1832 amdgpu_device_ip_late_set_cg_state(adev);
1522} 1833}
1523 1834
1835/**
1836 * amdgpu_device_ip_suspend - run suspend for hardware IPs
1837 *
1838 * @adev: amdgpu_device pointer
1839 *
1840 * Main suspend function for hardware IPs. The list of all the hardware
1841 * IPs that make up the asic is walked, clockgating is disabled and the
1842 * suspend callbacks are run. suspend puts the hardware and software state
1843 * in each IP into a state suitable for suspend.
1844 * Returns 0 on success, negative error code on failure.
1845 */
1524int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 1846int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
1525{ 1847{
1526 int i, r; 1848 int i, r;
@@ -1539,7 +1861,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
1539 if (!adev->ip_blocks[i].status.valid) 1861 if (!adev->ip_blocks[i].status.valid)
1540 continue; 1862 continue;
1541 /* ungate blocks so that suspend can properly shut them down */ 1863 /* ungate blocks so that suspend can properly shut them down */
1542 if (i != AMD_IP_BLOCK_TYPE_SMC) { 1864 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
1865 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
1543 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 1866 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1544 AMD_CG_STATE_UNGATE); 1867 AMD_CG_STATE_UNGATE);
1545 if (r) { 1868 if (r) {
@@ -1585,6 +1908,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
1585 1908
1586 r = block->version->funcs->hw_init(adev); 1909 r = block->version->funcs->hw_init(adev);
1587 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); 1910 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1911 if (r)
1912 return r;
1588 } 1913 }
1589 } 1914 }
1590 1915
@@ -1618,12 +1943,26 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
1618 1943
1619 r = block->version->funcs->hw_init(adev); 1944 r = block->version->funcs->hw_init(adev);
1620 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); 1945 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
1946 if (r)
1947 return r;
1621 } 1948 }
1622 } 1949 }
1623 1950
1624 return 0; 1951 return 0;
1625} 1952}
1626 1953
1954/**
1955 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
1956 *
1957 * @adev: amdgpu_device pointer
1958 *
1959 * First resume function for hardware IPs. The list of all the hardware
1960 * IPs that make up the asic is walked and the resume callbacks are run for
1961 * COMMON, GMC, and IH. resume puts the hardware into a functional state
1962 * after a suspend and updates the software state as necessary. This
1963 * function is also used for restoring the GPU after a GPU reset.
1964 * Returns 0 on success, negative error code on failure.
1965 */
1627static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 1966static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
1628{ 1967{
1629 int i, r; 1968 int i, r;
@@ -1632,9 +1971,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
1632 if (!adev->ip_blocks[i].status.valid) 1971 if (!adev->ip_blocks[i].status.valid)
1633 continue; 1972 continue;
1634 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 1973 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1635 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 1974 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
1636 adev->ip_blocks[i].version->type == 1975 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1637 AMD_IP_BLOCK_TYPE_IH) {
1638 r = adev->ip_blocks[i].version->funcs->resume(adev); 1976 r = adev->ip_blocks[i].version->funcs->resume(adev);
1639 if (r) { 1977 if (r) {
1640 DRM_ERROR("resume of IP block <%s> failed %d\n", 1978 DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -1647,6 +1985,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
1647 return 0; 1985 return 0;
1648} 1986}
1649 1987
1988/**
1989 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
1990 *
1991 * @adev: amdgpu_device pointer
1992 *
1993 * First resume function for hardware IPs. The list of all the hardware
1994 * IPs that make up the asic is walked and the resume callbacks are run for
1995 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
1996 * functional state after a suspend and updates the software state as
1997 * necessary. This function is also used for restoring the GPU after a GPU
1998 * reset.
1999 * Returns 0 on success, negative error code on failure.
2000 */
1650static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 2001static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
1651{ 2002{
1652 int i, r; 2003 int i, r;
@@ -1655,8 +2006,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
1655 if (!adev->ip_blocks[i].status.valid) 2006 if (!adev->ip_blocks[i].status.valid)
1656 continue; 2007 continue;
1657 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2008 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
1658 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2009 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
1659 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) 2010 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
1660 continue; 2011 continue;
1661 r = adev->ip_blocks[i].version->funcs->resume(adev); 2012 r = adev->ip_blocks[i].version->funcs->resume(adev);
1662 if (r) { 2013 if (r) {
@@ -1669,6 +2020,18 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
1669 return 0; 2020 return 0;
1670} 2021}
1671 2022
2023/**
2024 * amdgpu_device_ip_resume - run resume for hardware IPs
2025 *
2026 * @adev: amdgpu_device pointer
2027 *
2028 * Main resume function for hardware IPs. The hardware IPs
2029 * are split into two resume functions because they are
2030 * are also used in in recovering from a GPU reset and some additional
2031 * steps need to be take between them. In this case (S3/S4) they are
2032 * run sequentially.
2033 * Returns 0 on success, negative error code on failure.
2034 */
1672static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 2035static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
1673{ 2036{
1674 int r; 2037 int r;
@@ -1681,6 +2044,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
1681 return r; 2044 return r;
1682} 2045}
1683 2046
2047/**
2048 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2049 *
2050 * @adev: amdgpu_device pointer
2051 *
2052 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2053 */
1684static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 2054static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
1685{ 2055{
1686 if (amdgpu_sriov_vf(adev)) { 2056 if (amdgpu_sriov_vf(adev)) {
@@ -1697,6 +2067,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
1697 } 2067 }
1698} 2068}
1699 2069
2070/**
2071 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2072 *
2073 * @asic_type: AMD asic type
2074 *
2075 * Check if there is DC (new modesetting infrastructre) support for an asic.
2076 * returns true if DC has support, false if not.
2077 */
1700bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 2078bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
1701{ 2079{
1702 switch (asic_type) { 2080 switch (asic_type) {
@@ -1704,6 +2082,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
1704 case CHIP_BONAIRE: 2082 case CHIP_BONAIRE:
1705 case CHIP_HAWAII: 2083 case CHIP_HAWAII:
1706 case CHIP_KAVERI: 2084 case CHIP_KAVERI:
2085 case CHIP_KABINI:
2086 case CHIP_MULLINS:
1707 case CHIP_CARRIZO: 2087 case CHIP_CARRIZO:
1708 case CHIP_STONEY: 2088 case CHIP_STONEY:
1709 case CHIP_POLARIS11: 2089 case CHIP_POLARIS11:
@@ -1714,10 +2094,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
1714#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) 2094#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
1715 return amdgpu_dc != 0; 2095 return amdgpu_dc != 0;
1716#endif 2096#endif
1717 case CHIP_KABINI:
1718 case CHIP_MULLINS:
1719 return amdgpu_dc > 0;
1720 case CHIP_VEGA10: 2097 case CHIP_VEGA10:
2098 case CHIP_VEGA12:
1721#if defined(CONFIG_DRM_AMD_DC_DCN1_0) 2099#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
1722 case CHIP_RAVEN: 2100 case CHIP_RAVEN:
1723#endif 2101#endif
@@ -1771,14 +2149,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1771 adev->flags = flags; 2149 adev->flags = flags;
1772 adev->asic_type = flags & AMD_ASIC_MASK; 2150 adev->asic_type = flags & AMD_ASIC_MASK;
1773 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 2151 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
1774 adev->mc.gart_size = 512 * 1024 * 1024; 2152 if (amdgpu_emu_mode == 1)
2153 adev->usec_timeout *= 2;
2154 adev->gmc.gart_size = 512 * 1024 * 1024;
1775 adev->accel_working = false; 2155 adev->accel_working = false;
1776 adev->num_rings = 0; 2156 adev->num_rings = 0;
1777 adev->mman.buffer_funcs = NULL; 2157 adev->mman.buffer_funcs = NULL;
1778 adev->mman.buffer_funcs_ring = NULL; 2158 adev->mman.buffer_funcs_ring = NULL;
1779 adev->vm_manager.vm_pte_funcs = NULL; 2159 adev->vm_manager.vm_pte_funcs = NULL;
1780 adev->vm_manager.vm_pte_num_rings = 0; 2160 adev->vm_manager.vm_pte_num_rings = 0;
1781 adev->gart.gart_funcs = NULL; 2161 adev->gmc.gmc_funcs = NULL;
1782 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2162 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
1783 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 2163 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1784 2164
@@ -1867,6 +2247,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1867 if (adev->rio_mem == NULL) 2247 if (adev->rio_mem == NULL)
1868 DRM_INFO("PCI I/O BAR is not found.\n"); 2248 DRM_INFO("PCI I/O BAR is not found.\n");
1869 2249
2250 amdgpu_device_get_pcie_info(adev);
2251
1870 /* early init functions */ 2252 /* early init functions */
1871 r = amdgpu_device_ip_early_init(adev); 2253 r = amdgpu_device_ip_early_init(adev);
1872 if (r) 2254 if (r)
@@ -1885,6 +2267,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1885 if (runtime) 2267 if (runtime)
1886 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 2268 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
1887 2269
2270 if (amdgpu_emu_mode == 1) {
2271 /* post the asic on emulation mode */
2272 emu_soc_asic_init(adev);
2273 goto fence_driver_init;
2274 }
2275
1888 /* Read BIOS */ 2276 /* Read BIOS */
1889 if (!amdgpu_get_bios(adev)) { 2277 if (!amdgpu_get_bios(adev)) {
1890 r = -EINVAL; 2278 r = -EINVAL;
@@ -1937,6 +2325,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1937 amdgpu_atombios_i2c_init(adev); 2325 amdgpu_atombios_i2c_init(adev);
1938 } 2326 }
1939 2327
2328fence_driver_init:
1940 /* Fence driver */ 2329 /* Fence driver */
1941 r = amdgpu_fence_driver_init(adev); 2330 r = amdgpu_fence_driver_init(adev);
1942 if (r) { 2331 if (r) {
@@ -1964,7 +2353,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1964 } 2353 }
1965 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2354 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
1966 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2355 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
1967 amdgpu_device_ip_fini(adev);
1968 goto failed; 2356 goto failed;
1969 } 2357 }
1970 2358
@@ -2063,11 +2451,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2063 2451
2064 DRM_INFO("amdgpu: finishing device.\n"); 2452 DRM_INFO("amdgpu: finishing device.\n");
2065 adev->shutdown = true; 2453 adev->shutdown = true;
2066 if (adev->mode_info.mode_config_initialized) 2454 /* disable all interrupts */
2067 drm_crtc_force_disable_all(adev->ddev); 2455 amdgpu_irq_disable_all(adev);
2068 2456 if (adev->mode_info.mode_config_initialized){
2457 if (!amdgpu_device_has_dc_support(adev))
2458 drm_crtc_force_disable_all(adev->ddev);
2459 else
2460 drm_atomic_helper_shutdown(adev->ddev);
2461 }
2069 amdgpu_ib_pool_fini(adev); 2462 amdgpu_ib_pool_fini(adev);
2070 amdgpu_fence_driver_fini(adev); 2463 amdgpu_fence_driver_fini(adev);
2464 amdgpu_pm_sysfs_fini(adev);
2071 amdgpu_fbdev_fini(adev); 2465 amdgpu_fbdev_fini(adev);
2072 r = amdgpu_device_ip_fini(adev); 2466 r = amdgpu_device_ip_fini(adev);
2073 if (adev->firmware.gpu_info_fw) { 2467 if (adev->firmware.gpu_info_fw) {
@@ -2079,7 +2473,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2079 /* free i2c buses */ 2473 /* free i2c buses */
2080 if (!amdgpu_device_has_dc_support(adev)) 2474 if (!amdgpu_device_has_dc_support(adev))
2081 amdgpu_i2c_fini(adev); 2475 amdgpu_i2c_fini(adev);
2082 amdgpu_atombios_fini(adev); 2476
2477 if (amdgpu_emu_mode != 1)
2478 amdgpu_atombios_fini(adev);
2479
2083 kfree(adev->bios); 2480 kfree(adev->bios);
2084 adev->bios = NULL; 2481 adev->bios = NULL;
2085 if (!pci_is_thunderbolt_attached(adev->pdev)) 2482 if (!pci_is_thunderbolt_attached(adev->pdev))
@@ -2093,7 +2490,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
2093 iounmap(adev->rmmio); 2490 iounmap(adev->rmmio);
2094 adev->rmmio = NULL; 2491 adev->rmmio = NULL;
2095 amdgpu_device_doorbell_fini(adev); 2492 amdgpu_device_doorbell_fini(adev);
2096 amdgpu_pm_sysfs_fini(adev);
2097 amdgpu_debugfs_regs_cleanup(adev); 2493 amdgpu_debugfs_regs_cleanup(adev);
2098} 2494}
2099 2495
@@ -2322,6 +2718,16 @@ unlock:
2322 return r; 2718 return r;
2323} 2719}
2324 2720
2721/**
2722 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
2723 *
2724 * @adev: amdgpu_device pointer
2725 *
2726 * The list of all the hardware IPs that make up the asic is walked and
2727 * the check_soft_reset callbacks are run. check_soft_reset determines
2728 * if the asic is still hung or not.
2729 * Returns true if any of the IPs are still in a hung state, false if not.
2730 */
2325static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 2731static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
2326{ 2732{
2327 int i; 2733 int i;
@@ -2344,6 +2750,17 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
2344 return asic_hang; 2750 return asic_hang;
2345} 2751}
2346 2752
2753/**
2754 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
2755 *
2756 * @adev: amdgpu_device pointer
2757 *
2758 * The list of all the hardware IPs that make up the asic is walked and the
2759 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
2760 * handles any IP specific hardware or software state changes that are
2761 * necessary for a soft reset to succeed.
2762 * Returns 0 on success, negative error code on failure.
2763 */
2347static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 2764static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
2348{ 2765{
2349 int i, r = 0; 2766 int i, r = 0;
@@ -2362,6 +2779,15 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
2362 return 0; 2779 return 0;
2363} 2780}
2364 2781
2782/**
2783 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
2784 *
2785 * @adev: amdgpu_device pointer
2786 *
2787 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
2788 * reset is necessary to recover.
2789 * Returns true if a full asic reset is required, false if not.
2790 */
2365static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 2791static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
2366{ 2792{
2367 int i; 2793 int i;
@@ -2383,6 +2809,17 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
2383 return false; 2809 return false;
2384} 2810}
2385 2811
2812/**
2813 * amdgpu_device_ip_soft_reset - do a soft reset
2814 *
2815 * @adev: amdgpu_device pointer
2816 *
2817 * The list of all the hardware IPs that make up the asic is walked and the
2818 * soft_reset callbacks are run if the block is hung. soft_reset handles any
2819 * IP specific hardware or software state changes that are necessary to soft
2820 * reset the IP.
2821 * Returns 0 on success, negative error code on failure.
2822 */
2386static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 2823static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
2387{ 2824{
2388 int i, r = 0; 2825 int i, r = 0;
@@ -2401,6 +2838,17 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
2401 return 0; 2838 return 0;
2402} 2839}
2403 2840
2841/**
2842 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
2843 *
2844 * @adev: amdgpu_device pointer
2845 *
2846 * The list of all the hardware IPs that make up the asic is walked and the
2847 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
2848 * handles any IP specific hardware or software state changes that are
2849 * necessary after the IP has been soft reset.
2850 * Returns 0 on success, negative error code on failure.
2851 */
2404static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 2852static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
2405{ 2853{
2406 int i, r = 0; 2854 int i, r = 0;
@@ -2418,6 +2866,19 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
2418 return 0; 2866 return 0;
2419} 2867}
2420 2868
2869/**
2870 * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
2871 *
2872 * @adev: amdgpu_device pointer
2873 * @ring: amdgpu_ring for the engine handling the buffer operations
2874 * @bo: amdgpu_bo buffer whose shadow is being restored
2875 * @fence: dma_fence associated with the operation
2876 *
2877 * Restores the VRAM buffer contents from the shadow in GTT. Used to
2878 * restore things like GPUVM page tables after a GPU reset where
2879 * the contents of VRAM might be lost.
2880 * Returns 0 on success, negative error code on failure.
2881 */
2421static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, 2882static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
2422 struct amdgpu_ring *ring, 2883 struct amdgpu_ring *ring,
2423 struct amdgpu_bo *bo, 2884 struct amdgpu_bo *bo,
@@ -2453,17 +2914,81 @@ err:
2453 return r; 2914 return r;
2454} 2915}
2455 2916
2456/* 2917/**
2918 * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
2919 *
2920 * @adev: amdgpu_device pointer
2921 *
2922 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
2923 * restore things like GPUVM page tables after a GPU reset where
2924 * the contents of VRAM might be lost.
2925 * Returns 0 on success, 1 on failure.
2926 */
2927static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
2928{
2929 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2930 struct amdgpu_bo *bo, *tmp;
2931 struct dma_fence *fence = NULL, *next = NULL;
2932 long r = 1;
2933 int i = 0;
2934 long tmo;
2935
2936 if (amdgpu_sriov_runtime(adev))
2937 tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
2938 else
2939 tmo = msecs_to_jiffies(100);
2940
2941 DRM_INFO("recover vram bo from shadow start\n");
2942 mutex_lock(&adev->shadow_list_lock);
2943 list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
2944 next = NULL;
2945 amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
2946 if (fence) {
2947 r = dma_fence_wait_timeout(fence, false, tmo);
2948 if (r == 0)
2949 pr_err("wait fence %p[%d] timeout\n", fence, i);
2950 else if (r < 0)
2951 pr_err("wait fence %p[%d] interrupted\n", fence, i);
2952 if (r < 1) {
2953 dma_fence_put(fence);
2954 fence = next;
2955 break;
2956 }
2957 i++;
2958 }
2959
2960 dma_fence_put(fence);
2961 fence = next;
2962 }
2963 mutex_unlock(&adev->shadow_list_lock);
2964
2965 if (fence) {
2966 r = dma_fence_wait_timeout(fence, false, tmo);
2967 if (r == 0)
2968 pr_err("wait fence %p[%d] timeout\n", fence, i);
2969 else if (r < 0)
2970 pr_err("wait fence %p[%d] interrupted\n", fence, i);
2971
2972 }
2973 dma_fence_put(fence);
2974
2975 if (r > 0)
2976 DRM_INFO("recover vram bo from shadow done\n");
2977 else
2978 DRM_ERROR("recover vram bo from shadow failed\n");
2979
2980 return (r > 0) ? 0 : 1;
2981}
2982
2983/**
2457 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough 2984 * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
2458 * 2985 *
2459 * @adev: amdgpu device pointer 2986 * @adev: amdgpu device pointer
2460 * @reset_flags: output param tells caller the reset result
2461 * 2987 *
2462 * attempt to do soft-reset or full-reset and reinitialize Asic 2988 * attempt to do soft-reset or full-reset and reinitialize Asic
2463 * return 0 means successed otherwise failed 2989 * return 0 means successed otherwise failed
2464*/ 2990 */
2465static int amdgpu_device_reset(struct amdgpu_device *adev, 2991static int amdgpu_device_reset(struct amdgpu_device *adev)
2466 uint64_t* reset_flags)
2467{ 2992{
2468 bool need_full_reset, vram_lost = 0; 2993 bool need_full_reset, vram_lost = 0;
2469 int r; 2994 int r;
@@ -2478,7 +3003,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
2478 DRM_INFO("soft reset failed, will fallback to full reset!\n"); 3003 DRM_INFO("soft reset failed, will fallback to full reset!\n");
2479 need_full_reset = true; 3004 need_full_reset = true;
2480 } 3005 }
2481
2482 } 3006 }
2483 3007
2484 if (need_full_reset) { 3008 if (need_full_reset) {
@@ -2527,28 +3051,21 @@ out:
2527 } 3051 }
2528 } 3052 }
2529 3053
2530 if (reset_flags) { 3054 if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
2531 if (vram_lost) 3055 r = amdgpu_device_handle_vram_lost(adev);
2532 (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
2533
2534 if (need_full_reset)
2535 (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
2536 }
2537 3056
2538 return r; 3057 return r;
2539} 3058}
2540 3059
2541/* 3060/**
2542 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 3061 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
2543 * 3062 *
2544 * @adev: amdgpu device pointer 3063 * @adev: amdgpu device pointer
2545 * @reset_flags: output param tells caller the reset result
2546 * 3064 *
2547 * do VF FLR and reinitialize Asic 3065 * do VF FLR and reinitialize Asic
2548 * return 0 means successed otherwise failed 3066 * return 0 means successed otherwise failed
2549*/ 3067 */
2550static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 3068static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
2551 uint64_t *reset_flags,
2552 bool from_hypervisor) 3069 bool from_hypervisor)
2553{ 3070{
2554 int r; 3071 int r;
@@ -2570,28 +3087,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
2570 3087
2571 /* now we are okay to resume SMC/CP/SDMA */ 3088 /* now we are okay to resume SMC/CP/SDMA */
2572 r = amdgpu_device_ip_reinit_late_sriov(adev); 3089 r = amdgpu_device_ip_reinit_late_sriov(adev);
3090 amdgpu_virt_release_full_gpu(adev, true);
2573 if (r) 3091 if (r)
2574 goto error; 3092 goto error;
2575 3093
2576 amdgpu_irq_gpu_reset_resume_helper(adev); 3094 amdgpu_irq_gpu_reset_resume_helper(adev);
2577 r = amdgpu_ib_ring_tests(adev); 3095 r = amdgpu_ib_ring_tests(adev);
2578 if (r)
2579 dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
2580
2581error:
2582 /* release full control of GPU after ib test */
2583 amdgpu_virt_release_full_gpu(adev, true);
2584 3096
2585 if (reset_flags) { 3097 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
2586 if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3098 atomic_inc(&adev->vram_lost_counter);
2587 (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; 3099 r = amdgpu_device_handle_vram_lost(adev);
2588 atomic_inc(&adev->vram_lost_counter);
2589 }
2590
2591 /* VF FLR or hotlink reset is always full-reset */
2592 (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
2593 } 3100 }
2594 3101
3102error:
3103
2595 return r; 3104 return r;
2596} 3105}
2597 3106
@@ -2609,7 +3118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
2609 struct amdgpu_job *job, bool force) 3118 struct amdgpu_job *job, bool force)
2610{ 3119{
2611 struct drm_atomic_state *state = NULL; 3120 struct drm_atomic_state *state = NULL;
2612 uint64_t reset_flags = 0;
2613 int i, r, resched; 3121 int i, r, resched;
2614 3122
2615 if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { 3123 if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -2631,22 +3139,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
2631 3139
2632 /* block TTM */ 3140 /* block TTM */
2633 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); 3141 resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3142
2634 /* store modesetting */ 3143 /* store modesetting */
2635 if (amdgpu_device_has_dc_support(adev)) 3144 if (amdgpu_device_has_dc_support(adev))
2636 state = drm_atomic_helper_suspend(adev->ddev); 3145 state = drm_atomic_helper_suspend(adev->ddev);
2637 3146
2638 /* block scheduler */ 3147 /* block all schedulers and reset given job's ring */
2639 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 3148 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2640 struct amdgpu_ring *ring = adev->rings[i]; 3149 struct amdgpu_ring *ring = adev->rings[i];
2641 3150
2642 if (!ring || !ring->sched.thread) 3151 if (!ring || !ring->sched.thread)
2643 continue; 3152 continue;
2644 3153
2645 /* only focus on the ring hit timeout if &job not NULL */ 3154 kthread_park(ring->sched.thread);
3155
2646 if (job && job->ring->idx != i) 3156 if (job && job->ring->idx != i)
2647 continue; 3157 continue;
2648 3158
2649 kthread_park(ring->sched.thread);
2650 drm_sched_hw_job_reset(&ring->sched, &job->base); 3159 drm_sched_hw_job_reset(&ring->sched, &job->base);
2651 3160
2652 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3161 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2654,68 +3163,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
2654 } 3163 }
2655 3164
2656 if (amdgpu_sriov_vf(adev)) 3165 if (amdgpu_sriov_vf(adev))
2657 r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); 3166 r = amdgpu_device_reset_sriov(adev, job ? false : true);
2658 else 3167 else
2659 r = amdgpu_device_reset(adev, &reset_flags); 3168 r = amdgpu_device_reset(adev);
2660
2661 if (!r) {
2662 if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
2663 (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
2664 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
2665 struct amdgpu_bo *bo, *tmp;
2666 struct dma_fence *fence = NULL, *next = NULL;
2667
2668 DRM_INFO("recover vram bo from shadow\n");
2669 mutex_lock(&adev->shadow_list_lock);
2670 list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
2671 next = NULL;
2672 amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
2673 if (fence) {
2674 r = dma_fence_wait(fence, false);
2675 if (r) {
2676 WARN(r, "recovery from shadow isn't completed\n");
2677 break;
2678 }
2679 }
2680
2681 dma_fence_put(fence);
2682 fence = next;
2683 }
2684 mutex_unlock(&adev->shadow_list_lock);
2685 if (fence) {
2686 r = dma_fence_wait(fence, false);
2687 if (r)
2688 WARN(r, "recovery from shadow isn't completed\n");
2689 }
2690 dma_fence_put(fence);
2691 }
2692
2693 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2694 struct amdgpu_ring *ring = adev->rings[i];
2695 3169
2696 if (!ring || !ring->sched.thread) 3170 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2697 continue; 3171 struct amdgpu_ring *ring = adev->rings[i];
2698 3172
2699 /* only focus on the ring hit timeout if &job not NULL */ 3173 if (!ring || !ring->sched.thread)
2700 if (job && job->ring->idx != i) 3174 continue;
2701 continue;
2702 3175
3176 /* only need recovery sched of the given job's ring
3177 * or all rings (in the case @job is NULL)
3178 * after above amdgpu_reset accomplished
3179 */
3180 if ((!job || job->ring->idx == i) && !r)
2703 drm_sched_job_recovery(&ring->sched); 3181 drm_sched_job_recovery(&ring->sched);
2704 kthread_unpark(ring->sched.thread);
2705 }
2706 } else {
2707 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2708 struct amdgpu_ring *ring = adev->rings[i];
2709 3182
2710 if (!ring || !ring->sched.thread) 3183 kthread_unpark(ring->sched.thread);
2711 continue;
2712
2713 /* only focus on the ring hit timeout if &job not NULL */
2714 if (job && job->ring->idx != i)
2715 continue;
2716
2717 kthread_unpark(adev->rings[i]->sched.thread);
2718 }
2719 } 3184 }
2720 3185
2721 if (amdgpu_device_has_dc_support(adev)) { 3186 if (amdgpu_device_has_dc_support(adev)) {
@@ -2741,7 +3206,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
2741 return r; 3206 return r;
2742} 3207}
2743 3208
2744void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 3209/**
3210 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3211 *
3212 * @adev: amdgpu_device pointer
3213 *
3214 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3215 * and lanes) of the slot the device is in. Handles APUs and
3216 * virtualized environments where PCIE config space may not be available.
3217 */
3218static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
2745{ 3219{
2746 u32 mask; 3220 u32 mask;
2747 int ret; 3221 int ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 38d47559f098..93f700ab1bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -29,6 +29,7 @@
29#include "amdgpu_i2c.h" 29#include "amdgpu_i2c.h"
30#include "atom.h" 30#include "atom.h"
31#include "amdgpu_connectors.h" 31#include "amdgpu_connectors.h"
32#include "amdgpu_display.h"
32#include <asm/div64.h> 33#include <asm/div64.h>
33 34
34#include <linux/pm_runtime.h> 35#include <linux/pm_runtime.h>
@@ -36,7 +37,8 @@
36#include <drm/drm_edid.h> 37#include <drm/drm_edid.h>
37#include <drm/drm_fb_helper.h> 38#include <drm/drm_fb_helper.h>
38 39
39static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) 40static void amdgpu_display_flip_callback(struct dma_fence *f,
41 struct dma_fence_cb *cb)
40{ 42{
41 struct amdgpu_flip_work *work = 43 struct amdgpu_flip_work *work =
42 container_of(cb, struct amdgpu_flip_work, cb); 44 container_of(cb, struct amdgpu_flip_work, cb);
@@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
45 schedule_work(&work->flip_work.work); 47 schedule_work(&work->flip_work.work);
46} 48}
47 49
48static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, 50static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
49 struct dma_fence **f) 51 struct dma_fence **f)
50{ 52{
51 struct dma_fence *fence= *f; 53 struct dma_fence *fence= *f;
52 54
@@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
55 57
56 *f = NULL; 58 *f = NULL;
57 59
58 if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) 60 if (!dma_fence_add_callback(fence, &work->cb,
61 amdgpu_display_flip_callback))
59 return true; 62 return true;
60 63
61 dma_fence_put(fence); 64 dma_fence_put(fence);
62 return false; 65 return false;
63} 66}
64 67
65static void amdgpu_flip_work_func(struct work_struct *__work) 68static void amdgpu_display_flip_work_func(struct work_struct *__work)
66{ 69{
67 struct delayed_work *delayed_work = 70 struct delayed_work *delayed_work =
68 container_of(__work, struct delayed_work, work); 71 container_of(__work, struct delayed_work, work);
@@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
76 unsigned i; 79 unsigned i;
77 int vpos, hpos; 80 int vpos, hpos;
78 81
79 if (amdgpu_flip_handle_fence(work, &work->excl)) 82 if (amdgpu_display_flip_handle_fence(work, &work->excl))
80 return; 83 return;
81 84
82 for (i = 0; i < work->shared_count; ++i) 85 for (i = 0; i < work->shared_count; ++i)
83 if (amdgpu_flip_handle_fence(work, &work->shared[i])) 86 if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
84 return; 87 return;
85 88
86 /* Wait until we're out of the vertical blank period before the one 89 /* Wait until we're out of the vertical blank period before the one
87 * targeted by the flip 90 * targeted by the flip
88 */ 91 */
89 if (amdgpu_crtc->enabled && 92 if (amdgpu_crtc->enabled &&
90 (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0, 93 (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
91 &vpos, &hpos, NULL, NULL, 94 &vpos, &hpos, NULL, NULL,
92 &crtc->hwmode) 95 &crtc->hwmode)
93 & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == 96 & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
94 (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && 97 (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
95 (int)(work->target_vblank - 98 (int)(work->target_vblank -
@@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
117/* 120/*
118 * Handle unpin events outside the interrupt handler proper. 121 * Handle unpin events outside the interrupt handler proper.
119 */ 122 */
120static void amdgpu_unpin_work_func(struct work_struct *__work) 123static void amdgpu_display_unpin_work_func(struct work_struct *__work)
121{ 124{
122 struct amdgpu_flip_work *work = 125 struct amdgpu_flip_work *work =
123 container_of(__work, struct amdgpu_flip_work, unpin_work); 126 container_of(__work, struct amdgpu_flip_work, unpin_work);
@@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
139 kfree(work); 142 kfree(work);
140} 143}
141 144
142int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, 145int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
143 struct drm_framebuffer *fb, 146 struct drm_framebuffer *fb,
144 struct drm_pending_vblank_event *event, 147 struct drm_pending_vblank_event *event,
145 uint32_t page_flip_flags, uint32_t target, 148 uint32_t page_flip_flags, uint32_t target,
146 struct drm_modeset_acquire_ctx *ctx) 149 struct drm_modeset_acquire_ctx *ctx)
147{ 150{
148 struct drm_device *dev = crtc->dev; 151 struct drm_device *dev = crtc->dev;
149 struct amdgpu_device *adev = dev->dev_private; 152 struct amdgpu_device *adev = dev->dev_private;
@@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
162 if (work == NULL) 165 if (work == NULL)
163 return -ENOMEM; 166 return -ENOMEM;
164 167
165 INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func); 168 INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
166 INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func); 169 INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
167 170
168 work->event = event; 171 work->event = event;
169 work->adev = adev; 172 work->adev = adev;
@@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
189 goto cleanup; 192 goto cleanup;
190 } 193 }
191 194
192 r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base); 195 r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
193 if (unlikely(r != 0)) { 196 if (unlikely(r != 0)) {
194 DRM_ERROR("failed to pin new abo buffer before flip\n"); 197 DRM_ERROR("failed to pin new abo buffer before flip\n");
195 goto unreserve; 198 goto unreserve;
@@ -207,7 +210,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
207 amdgpu_bo_unreserve(new_abo); 210 amdgpu_bo_unreserve(new_abo);
208 211
209 work->base = base; 212 work->base = base;
210 work->target_vblank = target - drm_crtc_vblank_count(crtc) + 213 work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
211 amdgpu_get_vblank_counter_kms(dev, work->crtc_id); 214 amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
212 215
213 /* we borrow the event spin lock for protecting flip_wrok */ 216 /* we borrow the event spin lock for protecting flip_wrok */
@@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
228 /* update crtc fb */ 231 /* update crtc fb */
229 crtc->primary->fb = fb; 232 crtc->primary->fb = fb;
230 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 233 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
231 amdgpu_flip_work_func(&work->flip_work.work); 234 amdgpu_display_flip_work_func(&work->flip_work.work);
232 return 0; 235 return 0;
233 236
234pflip_cleanup: 237pflip_cleanup:
@@ -254,8 +257,8 @@ cleanup:
254 return r; 257 return r;
255} 258}
256 259
257int amdgpu_crtc_set_config(struct drm_mode_set *set, 260int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
258 struct drm_modeset_acquire_ctx *ctx) 261 struct drm_modeset_acquire_ctx *ctx)
259{ 262{
260 struct drm_device *dev; 263 struct drm_device *dev;
261 struct amdgpu_device *adev; 264 struct amdgpu_device *adev;
@@ -352,7 +355,7 @@ static const char *hpd_names[6] = {
352 "HPD6", 355 "HPD6",
353}; 356};
354 357
355void amdgpu_print_display_setup(struct drm_device *dev) 358void amdgpu_display_print_display_setup(struct drm_device *dev)
356{ 359{
357 struct drm_connector *connector; 360 struct drm_connector *connector;
358 struct amdgpu_connector *amdgpu_connector; 361 struct amdgpu_connector *amdgpu_connector;
@@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev)
429} 432}
430 433
431/** 434/**
432 * amdgpu_ddc_probe 435 * amdgpu_display_ddc_probe
433 * 436 *
434 */ 437 */
435bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, 438bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
436 bool use_aux) 439 bool use_aux)
437{ 440{
438 u8 out = 0x0; 441 u8 out = 0x0;
439 u8 buf[8]; 442 u8 buf[8];
@@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
479 return true; 482 return true;
480} 483}
481 484
482static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) 485static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
483{ 486{
484 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); 487 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
485 488
@@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
488 kfree(amdgpu_fb); 491 kfree(amdgpu_fb);
489} 492}
490 493
491static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb, 494static int amdgpu_display_user_framebuffer_create_handle(
492 struct drm_file *file_priv, 495 struct drm_framebuffer *fb,
493 unsigned int *handle) 496 struct drm_file *file_priv,
497 unsigned int *handle)
494{ 498{
495 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); 499 struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
496 500
@@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
498} 502}
499 503
500static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { 504static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
501 .destroy = amdgpu_user_framebuffer_destroy, 505 .destroy = amdgpu_display_user_framebuffer_destroy,
502 .create_handle = amdgpu_user_framebuffer_create_handle, 506 .create_handle = amdgpu_display_user_framebuffer_create_handle,
503}; 507};
504 508
505int 509uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
506amdgpu_framebuffer_init(struct drm_device *dev, 510{
507 struct amdgpu_framebuffer *rfb, 511 uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
508 const struct drm_mode_fb_cmd2 *mode_cmd, 512
509 struct drm_gem_object *obj) 513#if defined(CONFIG_DRM_AMD_DC)
514 if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
515 adev->flags & AMD_IS_APU &&
516 amdgpu_device_asic_has_dc_support(adev->asic_type))
517 domain |= AMDGPU_GEM_DOMAIN_GTT;
518#endif
519
520 return domain;
521}
522
523int amdgpu_display_framebuffer_init(struct drm_device *dev,
524 struct amdgpu_framebuffer *rfb,
525 const struct drm_mode_fb_cmd2 *mode_cmd,
526 struct drm_gem_object *obj)
510{ 527{
511 int ret; 528 int ret;
512 rfb->obj = obj; 529 rfb->obj = obj;
@@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev,
520} 537}
521 538
522struct drm_framebuffer * 539struct drm_framebuffer *
523amdgpu_user_framebuffer_create(struct drm_device *dev, 540amdgpu_display_user_framebuffer_create(struct drm_device *dev,
524 struct drm_file *file_priv, 541 struct drm_file *file_priv,
525 const struct drm_mode_fb_cmd2 *mode_cmd) 542 const struct drm_mode_fb_cmd2 *mode_cmd)
526{ 543{
527 struct drm_gem_object *obj; 544 struct drm_gem_object *obj;
528 struct amdgpu_framebuffer *amdgpu_fb; 545 struct amdgpu_framebuffer *amdgpu_fb;
@@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
547 return ERR_PTR(-ENOMEM); 564 return ERR_PTR(-ENOMEM);
548 } 565 }
549 566
550 ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); 567 ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
551 if (ret) { 568 if (ret) {
552 kfree(amdgpu_fb); 569 kfree(amdgpu_fb);
553 drm_gem_object_put_unlocked(obj); 570 drm_gem_object_put_unlocked(obj);
@@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
558} 575}
559 576
560const struct drm_mode_config_funcs amdgpu_mode_funcs = { 577const struct drm_mode_config_funcs amdgpu_mode_funcs = {
561 .fb_create = amdgpu_user_framebuffer_create, 578 .fb_create = amdgpu_display_user_framebuffer_create,
562 .output_poll_changed = drm_fb_helper_output_poll_changed, 579 .output_poll_changed = drm_fb_helper_output_poll_changed,
563}; 580};
564 581
@@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
580 { AMDGPU_FMT_DITHER_ENABLE, "on" }, 597 { AMDGPU_FMT_DITHER_ENABLE, "on" },
581}; 598};
582 599
583int amdgpu_modeset_create_props(struct amdgpu_device *adev) 600int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
584{ 601{
585 int sz; 602 int sz;
586 603
@@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev)
629 return 0; 646 return 0;
630} 647}
631 648
632void amdgpu_update_display_priority(struct amdgpu_device *adev) 649void amdgpu_display_update_priority(struct amdgpu_device *adev)
633{ 650{
634 /* adjustment options for the display watermarks */ 651 /* adjustment options for the display watermarks */
635 if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2)) 652 if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
@@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev)
639 656
640} 657}
641 658
642static bool is_hdtv_mode(const struct drm_display_mode *mode) 659static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
643{ 660{
644 /* try and guess if this is a tv or a monitor */ 661 /* try and guess if this is a tv or a monitor */
645 if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */ 662 if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
@@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode)
651 return false; 668 return false;
652} 669}
653 670
654bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, 671bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
655 const struct drm_display_mode *mode, 672 const struct drm_display_mode *mode,
656 struct drm_display_mode *adjusted_mode) 673 struct drm_display_mode *adjusted_mode)
657{ 674{
658 struct drm_device *dev = crtc->dev; 675 struct drm_device *dev = crtc->dev;
659 struct drm_encoder *encoder; 676 struct drm_encoder *encoder;
@@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
696 ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || 713 ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
697 ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && 714 ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
698 drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && 715 drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
699 is_hdtv_mode(mode)))) { 716 amdgpu_display_is_hdtv_mode(mode)))) {
700 if (amdgpu_encoder->underscan_hborder != 0) 717 if (amdgpu_encoder->underscan_hborder != 0)
701 amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; 718 amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
702 else 719 else
@@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
764 * unknown small number of scanlines wrt. real scanout position. 781 * unknown small number of scanlines wrt. real scanout position.
765 * 782 *
766 */ 783 */
767int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, 784int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
768 unsigned int flags, int *vpos, int *hpos, 785 unsigned int pipe, unsigned int flags, int *vpos,
769 ktime_t *stime, ktime_t *etime, 786 int *hpos, ktime_t *stime, ktime_t *etime,
770 const struct drm_display_mode *mode) 787 const struct drm_display_mode *mode)
771{ 788{
772 u32 vbl = 0, position = 0; 789 u32 vbl = 0, position = 0;
773 int vbl_start, vbl_end, vtotal, ret = 0; 790 int vbl_start, vbl_end, vtotal, ret = 0;
@@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
859 return ret; 876 return ret;
860} 877}
861 878
862int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc) 879int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
863{ 880{
864 if (crtc < 0 || crtc >= adev->mode_info.num_crtc) 881 if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
865 return AMDGPU_CRTC_IRQ_NONE; 882 return AMDGPU_CRTC_IRQ_NONE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 0bcb6c6e0ca9..2b11d808f297 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,9 +23,10 @@
23#ifndef __AMDGPU_DISPLAY_H__ 23#ifndef __AMDGPU_DISPLAY_H__
24#define __AMDGPU_DISPLAY_H__ 24#define __AMDGPU_DISPLAY_H__
25 25
26uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
26struct drm_framebuffer * 27struct drm_framebuffer *
27amdgpu_user_framebuffer_create(struct drm_device *dev, 28amdgpu_display_user_framebuffer_create(struct drm_device *dev,
28 struct drm_file *file_priv, 29 struct drm_file *file_priv,
29 const struct drm_mode_fb_cmd2 *mode_cmd); 30 const struct drm_mode_fb_cmd2 *mode_cmd);
30 31
31#endif 32#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index a8437a3296a6..643d008410c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -265,9 +265,6 @@ enum amdgpu_pcie_gen {
265#define amdgpu_dpm_read_sensor(adev, idx, value, size) \ 265#define amdgpu_dpm_read_sensor(adev, idx, value, size) \
266 ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) 266 ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
267 267
268#define amdgpu_dpm_get_temperature(adev) \
269 ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle))
270
271#define amdgpu_dpm_set_fan_control_mode(adev, m) \ 268#define amdgpu_dpm_set_fan_control_mode(adev, m) \
272 ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) 269 ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
273 270
@@ -328,8 +325,8 @@ enum amdgpu_pcie_gen {
328#define amdgpu_dpm_set_mclk_od(adev, value) \ 325#define amdgpu_dpm_set_mclk_od(adev, value) \
329 ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) 326 ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
330 327
331#define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \ 328#define amdgpu_dpm_dispatch_task(adev, task_id, user_state) \
332 ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output)) 329 ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state))
333 330
334#define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \ 331#define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
335 ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal))) 332 ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
@@ -344,17 +341,9 @@ enum amdgpu_pcie_gen {
344 ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ 341 ((adev)->powerplay.pp_funcs->reset_power_profile_state(\
345 (adev)->powerplay.pp_handle, request)) 342 (adev)->powerplay.pp_handle, request))
346 343
347#define amdgpu_dpm_get_power_profile_state(adev, query) \ 344#define amdgpu_dpm_switch_power_profile(adev, type, en) \
348 ((adev)->powerplay.pp_funcs->get_power_profile_state(\
349 (adev)->powerplay.pp_handle, query))
350
351#define amdgpu_dpm_set_power_profile_state(adev, request) \
352 ((adev)->powerplay.pp_funcs->set_power_profile_state(\
353 (adev)->powerplay.pp_handle, request))
354
355#define amdgpu_dpm_switch_power_profile(adev, type) \
356 ((adev)->powerplay.pp_funcs->switch_power_profile(\ 345 ((adev)->powerplay.pp_funcs->switch_power_profile(\
357 (adev)->powerplay.pp_handle, type)) 346 (adev)->powerplay.pp_handle, type, en))
358 347
359#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ 348#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
360 ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ 349 ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
@@ -366,6 +355,22 @@ enum amdgpu_pcie_gen {
366 (adev)->powerplay.pp_handle, virtual_addr_low, \ 355 (adev)->powerplay.pp_handle, virtual_addr_low, \
367 virtual_addr_hi, mc_addr_low, mc_addr_hi, size) 356 virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
368 357
358#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
359 ((adev)->powerplay.pp_funcs->get_power_profile_mode(\
360 (adev)->powerplay.pp_handle, buf))
361
362#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \
363 ((adev)->powerplay.pp_funcs->set_power_profile_mode(\
364 (adev)->powerplay.pp_handle, parameter, size))
365
366#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \
367 ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
368 (adev)->powerplay.pp_handle, type, parameter, size))
369
370#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
371 ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
372 (adev)->powerplay.pp_handle))
373
369struct amdgpu_dpm { 374struct amdgpu_dpm {
370 struct amdgpu_ps *ps; 375 struct amdgpu_ps *ps;
371 /* number of valid power states */ 376 /* number of valid power states */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 50afcf65181a..0b19482b36b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -73,9 +73,11 @@
73 * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl 73 * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
74 * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl 74 * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
75 * - 3.23.0 - Add query for VRAM lost counter 75 * - 3.23.0 - Add query for VRAM lost counter
76 * - 3.24.0 - Add high priority compute support for gfx9
77 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
76 */ 78 */
77#define KMS_DRIVER_MAJOR 3 79#define KMS_DRIVER_MAJOR 3
78#define KMS_DRIVER_MINOR 23 80#define KMS_DRIVER_MINOR 25
79#define KMS_DRIVER_PATCHLEVEL 0 81#define KMS_DRIVER_PATCHLEVEL 0
80 82
81int amdgpu_vram_limit = 0; 83int amdgpu_vram_limit = 0;
@@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff;
119uint amdgpu_sdma_phase_quantum = 32; 121uint amdgpu_sdma_phase_quantum = 32;
120char *amdgpu_disable_cu = NULL; 122char *amdgpu_disable_cu = NULL;
121char *amdgpu_virtual_display = NULL; 123char *amdgpu_virtual_display = NULL;
122uint amdgpu_pp_feature_mask = 0xffffffff; 124uint amdgpu_pp_feature_mask = 0xffffbfff;
123int amdgpu_ngg = 0; 125int amdgpu_ngg = 0;
124int amdgpu_prim_buf_per_se = 0; 126int amdgpu_prim_buf_per_se = 0;
125int amdgpu_pos_buf_per_se = 0; 127int amdgpu_pos_buf_per_se = 0;
@@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0;
129int amdgpu_lbpw = -1; 131int amdgpu_lbpw = -1;
130int amdgpu_compute_multipipe = -1; 132int amdgpu_compute_multipipe = -1;
131int amdgpu_gpu_recovery = -1; /* auto */ 133int amdgpu_gpu_recovery = -1; /* auto */
134int amdgpu_emu_mode = 0;
132 135
133MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 136MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
134module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); 137module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -281,9 +284,12 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
281MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); 284MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
282module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); 285module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
283 286
284MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); 287MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
285module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); 288module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
286 289
290MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
291module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
292
287#ifdef CONFIG_DRM_AMDGPU_SI 293#ifdef CONFIG_DRM_AMDGPU_SI
288 294
289#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) 295#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -538,6 +544,12 @@ static const struct pci_device_id pciidlist[] = {
538 {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, 544 {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
539 {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, 545 {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
540 {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, 546 {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
547 /* Vega 12 */
548 {0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
549 {0x1002, 0x69A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
550 {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
551 {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
552 {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
541 /* Raven */ 553 /* Raven */
542 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, 554 {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
543 555
@@ -576,6 +588,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
576 struct drm_device *dev; 588 struct drm_device *dev;
577 unsigned long flags = ent->driver_data; 589 unsigned long flags = ent->driver_data;
578 int ret, retry = 0; 590 int ret, retry = 0;
591 bool supports_atomic = false;
592
593 if (!amdgpu_virtual_display &&
594 amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
595 supports_atomic = true;
579 596
580 if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { 597 if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
581 DRM_INFO("This hardware requires experimental hardware support.\n" 598 DRM_INFO("This hardware requires experimental hardware support.\n"
@@ -596,6 +613,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
596 if (ret) 613 if (ret)
597 return ret; 614 return ret;
598 615
616 /* warn the user if they mix atomic and non-atomic capable GPUs */
617 if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
618 DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
619 /* support atomic early so the atomic debugfs stuff gets created */
620 if (supports_atomic)
621 kms_driver.driver_features |= DRIVER_ATOMIC;
622
599 dev = drm_dev_alloc(&kms_driver, &pdev->dev); 623 dev = drm_dev_alloc(&kms_driver, &pdev->dev);
600 if (IS_ERR(dev)) 624 if (IS_ERR(dev))
601 return PTR_ERR(dev); 625 return PTR_ERR(dev);
@@ -720,7 +744,6 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
720 744
721 drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 745 drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
722 drm_kms_helper_poll_disable(drm_dev); 746 drm_kms_helper_poll_disable(drm_dev);
723 vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
724 747
725 ret = amdgpu_device_suspend(drm_dev, false, false); 748 ret = amdgpu_device_suspend(drm_dev, false, false);
726 pci_save_state(pdev); 749 pci_save_state(pdev);
@@ -757,7 +780,6 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
757 780
758 ret = amdgpu_device_resume(drm_dev, false, false); 781 ret = amdgpu_device_resume(drm_dev, false, false);
759 drm_kms_helper_poll_enable(drm_dev); 782 drm_kms_helper_poll_enable(drm_dev);
760 vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
761 drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; 783 drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
762 return 0; 784 return 0;
763} 785}
@@ -835,8 +857,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
835 ktime_t *stime, ktime_t *etime, 857 ktime_t *stime, ktime_t *etime,
836 const struct drm_display_mode *mode) 858 const struct drm_display_mode *mode)
837{ 859{
838 return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, 860 return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
839 stime, etime, mode); 861 stime, etime, mode);
840} 862}
841 863
842static struct drm_driver kms_driver = { 864static struct drm_driver kms_driver = {
@@ -854,9 +876,6 @@ static struct drm_driver kms_driver = {
854 .disable_vblank = amdgpu_disable_vblank_kms, 876 .disable_vblank = amdgpu_disable_vblank_kms,
855 .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, 877 .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
856 .get_scanout_position = amdgpu_get_crtc_scanout_position, 878 .get_scanout_position = amdgpu_get_crtc_scanout_position,
857 .irq_preinstall = amdgpu_irq_preinstall,
858 .irq_postinstall = amdgpu_irq_postinstall,
859 .irq_uninstall = amdgpu_irq_uninstall,
860 .irq_handler = amdgpu_irq_handler, 879 .irq_handler = amdgpu_irq_handler,
861 .ioctls = amdgpu_ioctls_kms, 880 .ioctls = amdgpu_ioctls_kms,
862 .gem_free_object_unlocked = amdgpu_gem_object_free, 881 .gem_free_object_unlocked = amdgpu_gem_object_free,
@@ -869,9 +888,7 @@ static struct drm_driver kms_driver = {
869 .prime_handle_to_fd = drm_gem_prime_handle_to_fd, 888 .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
870 .prime_fd_to_handle = drm_gem_prime_fd_to_handle, 889 .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
871 .gem_prime_export = amdgpu_gem_prime_export, 890 .gem_prime_export = amdgpu_gem_prime_export,
872 .gem_prime_import = drm_gem_prime_import, 891 .gem_prime_import = amdgpu_gem_prime_import,
873 .gem_prime_pin = amdgpu_gem_prime_pin,
874 .gem_prime_unpin = amdgpu_gem_prime_unpin,
875 .gem_prime_res_obj = amdgpu_gem_prime_res_obj, 892 .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
876 .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, 893 .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
877 .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, 894 .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
@@ -905,6 +922,11 @@ static int __init amdgpu_init(void)
905{ 922{
906 int r; 923 int r;
907 924
925 if (vgacon_text_force()) {
926 DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
927 return -EINVAL;
928 }
929
908 r = amdgpu_sync_init(); 930 r = amdgpu_sync_init();
909 if (r) 931 if (r)
910 goto error_sync; 932 goto error_sync;
@@ -913,10 +935,6 @@ static int __init amdgpu_init(void)
913 if (r) 935 if (r)
914 goto error_fence; 936 goto error_fence;
915 937
916 if (vgacon_text_force()) {
917 DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
918 return -EINVAL;
919 }
920 DRM_INFO("amdgpu kernel modesetting enabled.\n"); 938 DRM_INFO("amdgpu kernel modesetting enabled.\n");
921 driver = &kms_driver; 939 driver = &kms_driver;
922 pdriver = &amdgpu_kms_pci_driver; 940 pdriver = &amdgpu_kms_pci_driver;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index ff3e9beb7d19..12063019751b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -38,6 +38,8 @@
38 38
39#include <linux/vga_switcheroo.h> 39#include <linux/vga_switcheroo.h>
40 40
41#include "amdgpu_display.h"
42
41/* object hierarchy - 43/* object hierarchy -
42 this contains a helper + a amdgpu fb 44 this contains a helper + a amdgpu fb
43 the helper contains a pointer to amdgpu framebuffer baseclass. 45 the helper contains a pointer to amdgpu framebuffer baseclass.
@@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
124 struct drm_gem_object *gobj = NULL; 126 struct drm_gem_object *gobj = NULL;
125 struct amdgpu_bo *abo = NULL; 127 struct amdgpu_bo *abo = NULL;
126 bool fb_tiled = false; /* useful for testing */ 128 bool fb_tiled = false; /* useful for testing */
127 u32 tiling_flags = 0; 129 u32 tiling_flags = 0, domain;
128 int ret; 130 int ret;
129 int aligned_size, size; 131 int aligned_size, size;
130 int height = mode_cmd->height; 132 int height = mode_cmd->height;
@@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
135 /* need to align pitch with crtc limits */ 137 /* need to align pitch with crtc limits */
136 mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, 138 mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
137 fb_tiled); 139 fb_tiled);
140 domain = amdgpu_display_framebuffer_domains(adev);
138 141
139 height = ALIGN(mode_cmd->height, 8); 142 height = ALIGN(mode_cmd->height, 8);
140 size = mode_cmd->pitches[0] * height; 143 size = mode_cmd->pitches[0] * height;
141 aligned_size = ALIGN(size, PAGE_SIZE); 144 aligned_size = ALIGN(size, PAGE_SIZE);
142 ret = amdgpu_gem_object_create(adev, aligned_size, 0, 145 ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
143 AMDGPU_GEM_DOMAIN_VRAM,
144 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 146 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
145 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 147 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
146 AMDGPU_GEM_CREATE_VRAM_CLEARED, 148 AMDGPU_GEM_CREATE_VRAM_CLEARED,
@@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
166 } 168 }
167 169
168 170
169 ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL); 171 ret = amdgpu_bo_pin(abo, domain, NULL);
170 if (ret) { 172 if (ret) {
171 amdgpu_bo_unreserve(abo); 173 amdgpu_bo_unreserve(abo);
172 goto out_unref; 174 goto out_unref;
@@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
225 info->par = rfbdev; 227 info->par = rfbdev;
226 info->skip_vt_switch = true; 228 info->skip_vt_switch = true;
227 229
228 ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); 230 ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb,
231 &mode_cmd, gobj);
229 if (ret) { 232 if (ret) {
230 DRM_ERROR("failed to initialize framebuffer %d\n", ret); 233 DRM_ERROR("failed to initialize framebuffer %d\n", ret);
231 goto out; 234 goto out;
@@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
242 245
243 info->fbops = &amdgpufb_ops; 246 info->fbops = &amdgpufb_ops;
244 247
245 tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; 248 tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
246 info->fix.smem_start = adev->mc.aper_base + tmp; 249 info->fix.smem_start = adev->gmc.aper_base + tmp;
247 info->fix.smem_len = amdgpu_bo_size(abo); 250 info->fix.smem_len = amdgpu_bo_size(abo);
248 info->screen_base = amdgpu_bo_kptr(abo); 251 info->screen_base = amdgpu_bo_kptr(abo);
249 info->screen_size = amdgpu_bo_size(abo); 252 info->screen_size = amdgpu_bo_size(abo);
@@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
252 255
253 /* setup aperture base/size for vesafb takeover */ 256 /* setup aperture base/size for vesafb takeover */
254 info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base; 257 info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
255 info->apertures->ranges[0].size = adev->mc.aper_size; 258 info->apertures->ranges[0].size = adev->gmc.aper_size;
256 259
257 /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ 260 /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
258 261
@@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
262 } 265 }
263 266
264 DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); 267 DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start);
265 DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->mc.aper_base); 268 DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base);
266 DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); 269 DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
267 DRM_INFO("fb depth is %d\n", fb->format->depth); 270 DRM_INFO("fb depth is %d\n", fb->format->depth);
268 DRM_INFO(" pitch is %d\n", fb->pitches[0]); 271 DRM_INFO(" pitch is %d\n", fb->pitches[0]);
@@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
319 return 0; 322 return 0;
320 323
321 /* select 8 bpp console on low vram cards */ 324 /* select 8 bpp console on low vram cards */
322 if (adev->mc.real_vram_size <= (32*1024*1024)) 325 if (adev->gmc.real_vram_size <= (32*1024*1024))
323 bpp_sel = 8; 326 bpp_sel = 8;
324 327
325 rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); 328 rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 008e1984b7e3..97449e06a242 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -410,6 +410,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
410int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, 410int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
411 unsigned num_hw_submission) 411 unsigned num_hw_submission)
412{ 412{
413 long timeout;
413 int r; 414 int r;
414 415
415 /* Check that num_hw_submission is a power of two */ 416 /* Check that num_hw_submission is a power of two */
@@ -433,9 +434,16 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
433 434
434 /* No need to setup the GPU scheduler for KIQ ring */ 435 /* No need to setup the GPU scheduler for KIQ ring */
435 if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { 436 if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
437 /* for non-sriov case, no timeout enforce on compute ring */
438 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
439 && !amdgpu_sriov_vf(ring->adev))
440 timeout = MAX_SCHEDULE_TIMEOUT;
441 else
442 timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
443
436 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, 444 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
437 num_hw_submission, amdgpu_job_hang_limit, 445 num_hw_submission, amdgpu_job_hang_limit,
438 msecs_to_jiffies(amdgpu_lockup_timeout), ring->name); 446 timeout, ring->name);
439 if (r) { 447 if (r) {
440 DRM_ERROR("Failed to create scheduler on ring %s.\n", 448 DRM_ERROR("Failed to create scheduler on ring %s.\n",
441 ring->name); 449 ring->name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0a4f34afaaaa..cf0f186c6092 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -68,17 +68,15 @@
68 */ 68 */
69static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) 69static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
70{ 70{
71 if (adev->dummy_page.page) 71 struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
72
73 if (adev->dummy_page_addr)
72 return 0; 74 return 0;
73 adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); 75 adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0,
74 if (adev->dummy_page.page == NULL) 76 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
75 return -ENOMEM; 77 if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) {
76 adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
77 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
78 if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
79 dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); 78 dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
80 __free_page(adev->dummy_page.page); 79 adev->dummy_page_addr = 0;
81 adev->dummy_page.page = NULL;
82 return -ENOMEM; 80 return -ENOMEM;
83 } 81 }
84 return 0; 82 return 0;
@@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
93 */ 91 */
94static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) 92static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
95{ 93{
96 if (adev->dummy_page.page == NULL) 94 if (!adev->dummy_page_addr)
97 return; 95 return;
98 pci_unmap_page(adev->pdev, adev->dummy_page.addr, 96 pci_unmap_page(adev->pdev, adev->dummy_page_addr,
99 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 97 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
100 __free_page(adev->dummy_page.page); 98 adev->dummy_page_addr = 0;
101 adev->dummy_page.page = NULL;
102} 99}
103 100
104/** 101/**
@@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
116 int r; 113 int r;
117 114
118 if (adev->gart.robj == NULL) { 115 if (adev->gart.robj == NULL) {
119 r = amdgpu_bo_create(adev, adev->gart.table_size, 116 r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
120 PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 117 AMDGPU_GEM_DOMAIN_VRAM,
121 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 118 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
122 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 119 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
123 NULL, NULL, 0, &adev->gart.robj); 120 ttm_bo_type_kernel, NULL,
121 &adev->gart.robj);
124 if (r) { 122 if (r) {
125 return r; 123 return r;
126 } 124 }
@@ -236,18 +234,19 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
236#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 234#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
237 adev->gart.pages[p] = NULL; 235 adev->gart.pages[p] = NULL;
238#endif 236#endif
239 page_base = adev->dummy_page.addr; 237 page_base = adev->dummy_page_addr;
240 if (!adev->gart.ptr) 238 if (!adev->gart.ptr)
241 continue; 239 continue;
242 240
243 for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { 241 for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
244 amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, 242 amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
245 t, page_base, flags); 243 t, page_base, flags);
246 page_base += AMDGPU_GPU_PAGE_SIZE; 244 page_base += AMDGPU_GPU_PAGE_SIZE;
247 } 245 }
248 } 246 }
249 mb(); 247 mb();
250 amdgpu_gart_flush_gpu_tlb(adev, 0); 248 amdgpu_asic_flush_hdp(adev, NULL);
249 amdgpu_gmc_flush_gpu_tlb(adev, 0);
251 return 0; 250 return 0;
252} 251}
253 252
@@ -279,7 +278,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
279 for (i = 0; i < pages; i++) { 278 for (i = 0; i < pages; i++) {
280 page_base = dma_addr[i]; 279 page_base = dma_addr[i];
281 for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { 280 for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
282 amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags); 281 amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
283 page_base += AMDGPU_GPU_PAGE_SIZE; 282 page_base += AMDGPU_GPU_PAGE_SIZE;
284 } 283 }
285 } 284 }
@@ -317,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
317 t = offset / AMDGPU_GPU_PAGE_SIZE; 316 t = offset / AMDGPU_GPU_PAGE_SIZE;
318 p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); 317 p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
319 for (i = 0; i < pages; i++, p++) 318 for (i = 0; i < pages; i++, p++)
320 adev->gart.pages[p] = pagelist[i]; 319 adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
321#endif 320#endif
322 321
323 if (!adev->gart.ptr) 322 if (!adev->gart.ptr)
@@ -329,7 +328,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
329 return r; 328 return r;
330 329
331 mb(); 330 mb();
332 amdgpu_gart_flush_gpu_tlb(adev, 0); 331 amdgpu_asic_flush_hdp(adev, NULL);
332 amdgpu_gmc_flush_gpu_tlb(adev, 0);
333 return 0; 333 return 0;
334} 334}
335 335
@@ -345,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
345{ 345{
346 int r; 346 int r;
347 347
348 if (adev->dummy_page.page) 348 if (adev->dummy_page_addr)
349 return 0; 349 return 0;
350 350
351 /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */ 351 /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
@@ -357,8 +357,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
357 if (r) 357 if (r)
358 return r; 358 return r;
359 /* Compute table size */ 359 /* Compute table size */
360 adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE; 360 adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
361 adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE; 361 adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
362 DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", 362 DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
363 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); 363 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
364 364
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index d4a43302c2be..456295c00291 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -31,7 +31,6 @@
31 */ 31 */
32struct amdgpu_device; 32struct amdgpu_device;
33struct amdgpu_bo; 33struct amdgpu_bo;
34struct amdgpu_gart_funcs;
35 34
36#define AMDGPU_GPU_PAGE_SIZE 4096 35#define AMDGPU_GPU_PAGE_SIZE 4096
37#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) 36#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
@@ -52,8 +51,6 @@ struct amdgpu_gart {
52 51
53 /* Asic default pte flags */ 52 /* Asic default pte flags */
54 uint64_t gart_pte_flags; 53 uint64_t gart_pte_flags;
55
56 const struct amdgpu_gart_funcs *gart_funcs;
57}; 54};
58 55
59int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); 56int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index e48b4ec88c8c..46b9ea4e6103 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
36 struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); 36 struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
37 37
38 if (robj) { 38 if (robj) {
39 if (robj->gem_base.import_attach)
40 drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
41 amdgpu_mn_unregister(robj); 39 amdgpu_mn_unregister(robj);
42 amdgpu_bo_unref(&robj); 40 amdgpu_bo_unref(&robj);
43 } 41 }
@@ -45,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
45 43
46int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, 44int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
47 int alignment, u32 initial_domain, 45 int alignment, u32 initial_domain,
48 u64 flags, bool kernel, 46 u64 flags, enum ttm_bo_type type,
49 struct reservation_object *resv, 47 struct reservation_object *resv,
50 struct drm_gem_object **obj) 48 struct drm_gem_object **obj)
51{ 49{
@@ -59,8 +57,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
59 } 57 }
60 58
61retry: 59retry:
62 r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, 60 r = amdgpu_bo_create(adev, size, alignment, initial_domain,
63 flags, NULL, resv, 0, &bo); 61 flags, type, resv, &bo);
64 if (r) { 62 if (r) {
65 if (r != -ERESTARTSYS) { 63 if (r != -ERESTARTSYS) {
66 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { 64 if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -523,12 +521,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
523 goto error; 521 goto error;
524 522
525 if (operation == AMDGPU_VA_OP_MAP || 523 if (operation == AMDGPU_VA_OP_MAP ||
526 operation == AMDGPU_VA_OP_REPLACE) 524 operation == AMDGPU_VA_OP_REPLACE) {
527 r = amdgpu_vm_bo_update(adev, bo_va, false); 525 r = amdgpu_vm_bo_update(adev, bo_va, false);
526 if (r)
527 goto error;
528 }
528 529
529 r = amdgpu_vm_update_directories(adev, vm); 530 r = amdgpu_vm_update_directories(adev, vm);
530 if (r)
531 goto error;
532 531
533error: 532error:
534 if (r && r != -ERESTARTSYS) 533 if (r && r != -ERESTARTSYS)
@@ -634,7 +633,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
634 if (r) 633 if (r)
635 goto error_backoff; 634 goto error_backoff;
636 635
637 va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); 636 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
638 r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, 637 r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
639 args->offset_in_bo, args->map_size, 638 args->offset_in_bo, args->map_size,
640 va_flags); 639 va_flags);
@@ -654,7 +653,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
654 if (r) 653 if (r)
655 goto error_backoff; 654 goto error_backoff;
656 655
657 va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); 656 va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
658 r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, 657 r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
659 args->offset_in_bo, args->map_size, 658 args->offset_in_bo, args->map_size,
660 va_flags); 659 va_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
new file mode 100644
index 000000000000..893c2490b783
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -0,0 +1,112 @@
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26#ifndef __AMDGPU_GMC_H__
27#define __AMDGPU_GMC_H__
28
29#include <linux/types.h>
30
31#include "amdgpu_irq.h"
32
33struct firmware;
34
35/*
36 * VMHUB structures, functions & helpers
37 */
38struct amdgpu_vmhub {
39 uint32_t ctx0_ptb_addr_lo32;
40 uint32_t ctx0_ptb_addr_hi32;
41 uint32_t vm_inv_eng0_req;
42 uint32_t vm_inv_eng0_ack;
43 uint32_t vm_context0_cntl;
44 uint32_t vm_l2_pro_fault_status;
45 uint32_t vm_l2_pro_fault_cntl;
46};
47
48/*
49 * GPU MC structures, functions & helpers
50 */
51struct amdgpu_gmc_funcs {
52 /* flush the vm tlb via mmio */
53 void (*flush_gpu_tlb)(struct amdgpu_device *adev,
54 uint32_t vmid);
55 /* flush the vm tlb via ring */
56 uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
57 uint64_t pd_addr);
58 /* Change the VMID -> PASID mapping */
59 void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
60 unsigned pasid);
61 /* write pte/pde updates using the cpu */
62 int (*set_pte_pde)(struct amdgpu_device *adev,
63 void *cpu_pt_addr, /* cpu addr of page table */
64 uint32_t gpu_page_idx, /* pte/pde to update */
65 uint64_t addr, /* addr to write into pte/pde */
66 uint64_t flags); /* access flags */
67 /* enable/disable PRT support */
68 void (*set_prt)(struct amdgpu_device *adev, bool enable);
69 /* set pte flags based per asic */
70 uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
71 uint32_t flags);
72 /* get the pde for a given mc addr */
73 void (*get_vm_pde)(struct amdgpu_device *adev, int level,
74 u64 *dst, u64 *flags);
75};
76
77struct amdgpu_gmc {
78 resource_size_t aper_size;
79 resource_size_t aper_base;
80 /* for some chips with <= 32MB we need to lie
81 * about vram size near mc fb location */
82 u64 mc_vram_size;
83 u64 visible_vram_size;
84 u64 gart_size;
85 u64 gart_start;
86 u64 gart_end;
87 u64 vram_start;
88 u64 vram_end;
89 unsigned vram_width;
90 u64 real_vram_size;
91 int vram_mtrr;
92 u64 mc_mask;
93 const struct firmware *fw; /* MC firmware */
94 uint32_t fw_version;
95 struct amdgpu_irq_src vm_fault;
96 uint32_t vram_type;
97 uint32_t srbm_soft_reset;
98 bool prt_warning;
99 uint64_t stolen_size;
100 /* apertures */
101 u64 shared_aperture_start;
102 u64 shared_aperture_end;
103 u64 private_aperture_start;
104 u64 private_aperture_end;
105 /* protects concurrent invalidation */
106 spinlock_t invalidate_lock;
107 bool translate_further;
108
109 const struct amdgpu_gmc_funcs *gmc_funcs;
110};
111
112#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 7c2be32c5aea..da7b1b92d9cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; 58 start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
59 size = (adev->mc.gart_size >> PAGE_SHIFT) - start; 59 size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
60 drm_mm_init(&mgr->mm, start, size); 60 drm_mm_init(&mgr->mm, start, size);
61 spin_lock_init(&mgr->lock); 61 spin_lock_init(&mgr->lock);
62 atomic64_set(&mgr->available, p_size); 62 atomic64_set(&mgr->available, p_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a162d87ca0c8..311589e02d17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -181,15 +181,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
181 } 181 }
182 } 182 }
183 183
184 if (ring->funcs->init_cond_exec) 184 if (job && ring->funcs->init_cond_exec)
185 patch_offset = amdgpu_ring_init_cond_exec(ring); 185 patch_offset = amdgpu_ring_init_cond_exec(ring);
186 186
187 if (ring->funcs->emit_hdp_flush
188#ifdef CONFIG_X86_64 187#ifdef CONFIG_X86_64
189 && !(adev->flags & AMD_IS_APU) 188 if (!(adev->flags & AMD_IS_APU))
190#endif 189#endif
191 ) 190 {
192 amdgpu_ring_emit_hdp_flush(ring); 191 if (ring->funcs->emit_hdp_flush)
192 amdgpu_ring_emit_hdp_flush(ring);
193 else
194 amdgpu_asic_flush_hdp(adev, ring);
195 }
193 196
194 skip_preamble = ring->current_ctx == fence_ctx; 197 skip_preamble = ring->current_ctx == fence_ctx;
195 need_ctx_switch = ring->current_ctx != fence_ctx; 198 need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
219 if (ring->funcs->emit_tmz) 222 if (ring->funcs->emit_tmz)
220 amdgpu_ring_emit_tmz(ring, false); 223 amdgpu_ring_emit_tmz(ring, false);
221 224
222 if (ring->funcs->emit_hdp_invalidate
223#ifdef CONFIG_X86_64 225#ifdef CONFIG_X86_64
224 && !(adev->flags & AMD_IS_APU) 226 if (!(adev->flags & AMD_IS_APU))
225#endif 227#endif
226 ) 228 amdgpu_asic_invalidate_hdp(adev, ring);
227 amdgpu_ring_emit_hdp_invalidate(ring);
228 229
229 r = amdgpu_fence_emit(ring, f); 230 r = amdgpu_fence_emit(ring, f);
230 if (r) { 231 if (r) {
@@ -278,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
278 return r; 279 return r;
279 } 280 }
280 281
281 r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
282 if (r) {
283 return r;
284 }
285
286 adev->ib_pool_ready = true; 282 adev->ib_pool_ready = true;
287 if (amdgpu_debugfs_sa_init(adev)) { 283 if (amdgpu_debugfs_sa_init(adev)) {
288 dev_err(adev->dev, "failed to register debugfs file for SA\n"); 284 dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
301void amdgpu_ib_pool_fini(struct amdgpu_device *adev) 297void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
302{ 298{
303 if (adev->ib_pool_ready) { 299 if (adev->ib_pool_ready) {
304 amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
305 amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); 300 amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
306 adev->ib_pool_ready = false; 301 adev->ib_pool_ready = false;
307 } 302 }
@@ -321,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
321{ 316{
322 unsigned i; 317 unsigned i;
323 int r, ret = 0; 318 int r, ret = 0;
319 long tmo_gfx, tmo_mm;
320
321 tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
322 if (amdgpu_sriov_vf(adev)) {
323 /* for MM engines in hypervisor side they are not scheduled together
324 * with CP and SDMA engines, so even in exclusive mode MM engine could
325 * still running on other VF thus the IB TEST TIMEOUT for MM engines
326 * under SR-IOV should be set to a long time. 8 sec should be enough
327 * for the MM comes back to this VF.
328 */
329 tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
330 }
331
332 if (amdgpu_sriov_runtime(adev)) {
333 /* for CP & SDMA engines since they are scheduled together so
334 * need to make the timeout width enough to cover the time
335 * cost waiting for it coming back under RUNTIME only
336 */
337 tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
338 }
324 339
325 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 340 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
326 struct amdgpu_ring *ring = adev->rings[i]; 341 struct amdgpu_ring *ring = adev->rings[i];
342 long tmo;
327 343
328 if (!ring || !ring->ready) 344 if (!ring || !ring->ready)
329 continue; 345 continue;
330 346
331 r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT); 347 /* MM engine need more time */
348 if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
349 ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
350 ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
351 ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
352 ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
353 tmo = tmo_mm;
354 else
355 tmo = tmo_gfx;
356
357 r = amdgpu_ring_test_ib(ring, tmo);
332 if (r) { 358 if (r) {
333 ring->ready = false; 359 ring->ready = false;
334 360
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 16884a0b677b..a1c78f90eadf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -40,6 +40,12 @@
40 */ 40 */
41static DEFINE_IDA(amdgpu_pasid_ida); 41static DEFINE_IDA(amdgpu_pasid_ida);
42 42
43/* Helper to free pasid from a fence callback */
44struct amdgpu_pasid_cb {
45 struct dma_fence_cb cb;
46 unsigned int pasid;
47};
48
43/** 49/**
44 * amdgpu_pasid_alloc - Allocate a PASID 50 * amdgpu_pasid_alloc - Allocate a PASID
45 * @bits: Maximum width of the PASID in bits, must be at least 1 51 * @bits: Maximum width of the PASID in bits, must be at least 1
@@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits)
63 break; 69 break;
64 } 70 }
65 71
72 if (pasid >= 0)
73 trace_amdgpu_pasid_allocated(pasid);
74
66 return pasid; 75 return pasid;
67} 76}
68 77
@@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits)
72 */ 81 */
73void amdgpu_pasid_free(unsigned int pasid) 82void amdgpu_pasid_free(unsigned int pasid)
74{ 83{
84 trace_amdgpu_pasid_freed(pasid);
75 ida_simple_remove(&amdgpu_pasid_ida, pasid); 85 ida_simple_remove(&amdgpu_pasid_ida, pasid);
76} 86}
77 87
88static void amdgpu_pasid_free_cb(struct dma_fence *fence,
89 struct dma_fence_cb *_cb)
90{
91 struct amdgpu_pasid_cb *cb =
92 container_of(_cb, struct amdgpu_pasid_cb, cb);
93
94 amdgpu_pasid_free(cb->pasid);
95 dma_fence_put(fence);
96 kfree(cb);
97}
98
99/**
100 * amdgpu_pasid_free_delayed - free pasid when fences signal
101 *
102 * @resv: reservation object with the fences to wait for
103 * @pasid: pasid to free
104 *
105 * Free the pasid only after all the fences in resv are signaled.
106 */
107void amdgpu_pasid_free_delayed(struct reservation_object *resv,
108 unsigned int pasid)
109{
110 struct dma_fence *fence, **fences;
111 struct amdgpu_pasid_cb *cb;
112 unsigned count;
113 int r;
114
115 r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences);
116 if (r)
117 goto fallback;
118
119 if (count == 0) {
120 amdgpu_pasid_free(pasid);
121 return;
122 }
123
124 if (count == 1) {
125 fence = fences[0];
126 kfree(fences);
127 } else {
128 uint64_t context = dma_fence_context_alloc(1);
129 struct dma_fence_array *array;
130
131 array = dma_fence_array_create(count, fences, context,
132 1, false);
133 if (!array) {
134 kfree(fences);
135 goto fallback;
136 }
137 fence = &array->base;
138 }
139
140 cb = kmalloc(sizeof(*cb), GFP_KERNEL);
141 if (!cb) {
142 /* Last resort when we are OOM */
143 dma_fence_wait(fence, false);
144 dma_fence_put(fence);
145 amdgpu_pasid_free(pasid);
146 } else {
147 cb->pasid = pasid;
148 if (dma_fence_add_callback(fence, &cb->cb,
149 amdgpu_pasid_free_cb))
150 amdgpu_pasid_free_cb(fence, &cb->cb);
151 }
152
153 return;
154
155fallback:
156 /* Not enough memory for the delayed delete, as last resort
157 * block for all the fences to complete.
158 */
159 reservation_object_wait_timeout_rcu(resv, true, false,
160 MAX_SCHEDULE_TIMEOUT);
161 amdgpu_pasid_free(pasid);
162}
163
78/* 164/*
79 * VMID manager 165 * VMID manager
80 * 166 *
@@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
96 atomic_read(&adev->gpu_reset_counter); 182 atomic_read(&adev->gpu_reset_counter);
97} 183}
98 184
99/* idr_mgr->lock must be held */
100static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
101 struct amdgpu_ring *ring,
102 struct amdgpu_sync *sync,
103 struct dma_fence *fence,
104 struct amdgpu_job *job)
105{
106 struct amdgpu_device *adev = ring->adev;
107 unsigned vmhub = ring->funcs->vmhub;
108 uint64_t fence_context = adev->fence_context + ring->idx;
109 struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
110 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
111 struct dma_fence *updates = sync->last_vm_update;
112 int r = 0;
113 struct dma_fence *flushed, *tmp;
114 bool needs_flush = vm->use_cpu_for_update;
115
116 flushed = id->flushed_updates;
117 if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
118 (atomic64_read(&id->owner) != vm->entity.fence_context) ||
119 (job->vm_pd_addr != id->pd_gpu_addr) ||
120 (updates && (!flushed || updates->context != flushed->context ||
121 dma_fence_is_later(updates, flushed))) ||
122 (!id->last_flush || (id->last_flush->context != fence_context &&
123 !dma_fence_is_signaled(id->last_flush)))) {
124 needs_flush = true;
125 /* to prevent one context starved by another context */
126 id->pd_gpu_addr = 0;
127 tmp = amdgpu_sync_peek_fence(&id->active, ring);
128 if (tmp) {
129 r = amdgpu_sync_fence(adev, sync, tmp, false);
130 return r;
131 }
132 }
133
134 /* Good we can use this VMID. Remember this submission as
135 * user of the VMID.
136 */
137 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
138 if (r)
139 goto out;
140
141 if (updates && (!flushed || updates->context != flushed->context ||
142 dma_fence_is_later(updates, flushed))) {
143 dma_fence_put(id->flushed_updates);
144 id->flushed_updates = dma_fence_get(updates);
145 }
146 id->pd_gpu_addr = job->vm_pd_addr;
147 atomic64_set(&id->owner, vm->entity.fence_context);
148 job->vm_needs_flush = needs_flush;
149 if (needs_flush) {
150 dma_fence_put(id->last_flush);
151 id->last_flush = NULL;
152 }
153 job->vmid = id - id_mgr->ids;
154 trace_amdgpu_vm_grab_id(vm, ring, job);
155out:
156 return r;
157}
158
159/** 185/**
160 * amdgpu_vm_grab_id - allocate the next free VMID 186 * amdgpu_vm_grab_idle - grab idle VMID
161 * 187 *
162 * @vm: vm to allocate id for 188 * @vm: vm to allocate id for
163 * @ring: ring we want to submit job to 189 * @ring: ring we want to submit job to
164 * @sync: sync object where we add dependencies 190 * @sync: sync object where we add dependencies
165 * @fence: fence protecting ID from reuse 191 * @idle: resulting idle VMID
166 * 192 *
167 * Allocate an id for the vm, adding fences to the sync obj as necessary. 193 * Try to find an idle VMID, if none is idle add a fence to wait to the sync
194 * object. Returns -ENOMEM when we are out of memory.
168 */ 195 */
169int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 196static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
170 struct amdgpu_sync *sync, struct dma_fence *fence, 197 struct amdgpu_ring *ring,
171 struct amdgpu_job *job) 198 struct amdgpu_sync *sync,
199 struct amdgpu_vmid **idle)
172{ 200{
173 struct amdgpu_device *adev = ring->adev; 201 struct amdgpu_device *adev = ring->adev;
174 unsigned vmhub = ring->funcs->vmhub; 202 unsigned vmhub = ring->funcs->vmhub;
175 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 203 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
176 uint64_t fence_context = adev->fence_context + ring->idx;
177 struct dma_fence *updates = sync->last_vm_update;
178 struct amdgpu_vmid *id, *idle;
179 struct dma_fence **fences; 204 struct dma_fence **fences;
180 unsigned i; 205 unsigned i;
181 int r = 0; 206 int r;
207
208 if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
209 return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
182 210
183 mutex_lock(&id_mgr->lock);
184 if (vm->reserved_vmid[vmhub]) {
185 r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
186 mutex_unlock(&id_mgr->lock);
187 return r;
188 }
189 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 211 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
190 if (!fences) { 212 if (!fences)
191 mutex_unlock(&id_mgr->lock);
192 return -ENOMEM; 213 return -ENOMEM;
193 } 214
194 /* Check if we have an idle VMID */ 215 /* Check if we have an idle VMID */
195 i = 0; 216 i = 0;
196 list_for_each_entry(idle, &id_mgr->ids_lru, list) { 217 list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
197 fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); 218 fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
198 if (!fences[i]) 219 if (!fences[i])
199 break; 220 break;
200 ++i; 221 ++i;
201 } 222 }
202 223
203 /* If we can't find a idle VMID to use, wait till one becomes available */ 224 /* If we can't find a idle VMID to use, wait till one becomes available */
204 if (&idle->list == &id_mgr->ids_lru) { 225 if (&(*idle)->list == &id_mgr->ids_lru) {
205 u64 fence_context = adev->vm_manager.fence_context + ring->idx; 226 u64 fence_context = adev->vm_manager.fence_context + ring->idx;
206 unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; 227 unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
207 struct dma_fence_array *array; 228 struct dma_fence_array *array;
208 unsigned j; 229 unsigned j;
209 230
231 *idle = NULL;
210 for (j = 0; j < i; ++j) 232 for (j = 0; j < i; ++j)
211 dma_fence_get(fences[j]); 233 dma_fence_get(fences[j]);
212 234
213 array = dma_fence_array_create(i, fences, fence_context, 235 array = dma_fence_array_create(i, fences, fence_context,
214 seqno, true); 236 seqno, true);
215 if (!array) { 237 if (!array) {
216 for (j = 0; j < i; ++j) 238 for (j = 0; j < i; ++j)
217 dma_fence_put(fences[j]); 239 dma_fence_put(fences[j]);
218 kfree(fences); 240 kfree(fences);
219 r = -ENOMEM; 241 return -ENOMEM;
220 goto error;
221 } 242 }
222 243
244 r = amdgpu_sync_fence(adev, sync, &array->base, false);
245 dma_fence_put(ring->vmid_wait);
246 ring->vmid_wait = &array->base;
247 return r;
248 }
249 kfree(fences);
223 250
224 r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); 251 return 0;
225 dma_fence_put(&array->base); 252}
226 if (r)
227 goto error;
228 253
229 mutex_unlock(&id_mgr->lock); 254/**
230 return 0; 255 * amdgpu_vm_grab_reserved - try to assign reserved VMID
256 *
257 * @vm: vm to allocate id for
258 * @ring: ring we want to submit job to
259 * @sync: sync object where we add dependencies
260 * @fence: fence protecting ID from reuse
261 * @job: job who wants to use the VMID
262 *
263 * Try to assign a reserved VMID.
264 */
265static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
266 struct amdgpu_ring *ring,
267 struct amdgpu_sync *sync,
268 struct dma_fence *fence,
269 struct amdgpu_job *job,
270 struct amdgpu_vmid **id)
271{
272 struct amdgpu_device *adev = ring->adev;
273 unsigned vmhub = ring->funcs->vmhub;
274 uint64_t fence_context = adev->fence_context + ring->idx;
275 struct dma_fence *updates = sync->last_vm_update;
276 bool needs_flush = vm->use_cpu_for_update;
277 int r = 0;
278
279 *id = vm->reserved_vmid[vmhub];
280 if (updates && (*id)->flushed_updates &&
281 updates->context == (*id)->flushed_updates->context &&
282 !dma_fence_is_later(updates, (*id)->flushed_updates))
283 updates = NULL;
284
285 if ((*id)->owner != vm->entity.fence_context ||
286 job->vm_pd_addr != (*id)->pd_gpu_addr ||
287 updates || !(*id)->last_flush ||
288 ((*id)->last_flush->context != fence_context &&
289 !dma_fence_is_signaled((*id)->last_flush))) {
290 struct dma_fence *tmp;
231 291
292 /* to prevent one context starved by another context */
293 (*id)->pd_gpu_addr = 0;
294 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
295 if (tmp) {
296 *id = NULL;
297 r = amdgpu_sync_fence(adev, sync, tmp, false);
298 return r;
299 }
300 needs_flush = true;
232 } 301 }
233 kfree(fences); 302
303 /* Good we can use this VMID. Remember this submission as
304 * user of the VMID.
305 */
306 r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
307 if (r)
308 return r;
309
310 if (updates) {
311 dma_fence_put((*id)->flushed_updates);
312 (*id)->flushed_updates = dma_fence_get(updates);
313 }
314 job->vm_needs_flush = needs_flush;
315 return 0;
316}
317
318/**
319 * amdgpu_vm_grab_used - try to reuse a VMID
320 *
321 * @vm: vm to allocate id for
322 * @ring: ring we want to submit job to
323 * @sync: sync object where we add dependencies
324 * @fence: fence protecting ID from reuse
325 * @job: job who wants to use the VMID
326 * @id: resulting VMID
327 *
328 * Try to reuse a VMID for this submission.
329 */
330static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
331 struct amdgpu_ring *ring,
332 struct amdgpu_sync *sync,
333 struct dma_fence *fence,
334 struct amdgpu_job *job,
335 struct amdgpu_vmid **id)
336{
337 struct amdgpu_device *adev = ring->adev;
338 unsigned vmhub = ring->funcs->vmhub;
339 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
340 uint64_t fence_context = adev->fence_context + ring->idx;
341 struct dma_fence *updates = sync->last_vm_update;
342 int r;
234 343
235 job->vm_needs_flush = vm->use_cpu_for_update; 344 job->vm_needs_flush = vm->use_cpu_for_update;
345
236 /* Check if we can use a VMID already assigned to this VM */ 346 /* Check if we can use a VMID already assigned to this VM */
237 list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { 347 list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
238 struct dma_fence *flushed;
239 bool needs_flush = vm->use_cpu_for_update; 348 bool needs_flush = vm->use_cpu_for_update;
349 struct dma_fence *flushed;
240 350
241 /* Check all the prerequisites to using this VMID */ 351 /* Check all the prerequisites to using this VMID */
242 if (amdgpu_vmid_had_gpu_reset(adev, id)) 352 if ((*id)->owner != vm->entity.fence_context)
243 continue;
244
245 if (atomic64_read(&id->owner) != vm->entity.fence_context)
246 continue; 353 continue;
247 354
248 if (job->vm_pd_addr != id->pd_gpu_addr) 355 if ((*id)->pd_gpu_addr != job->vm_pd_addr)
249 continue; 356 continue;
250 357
251 if (!id->last_flush || 358 if (!(*id)->last_flush ||
252 (id->last_flush->context != fence_context && 359 ((*id)->last_flush->context != fence_context &&
253 !dma_fence_is_signaled(id->last_flush))) 360 !dma_fence_is_signaled((*id)->last_flush)))
254 needs_flush = true; 361 needs_flush = true;
255 362
256 flushed = id->flushed_updates; 363 flushed = (*id)->flushed_updates;
257 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) 364 if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
258 needs_flush = true; 365 needs_flush = true;
259 366
@@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
261 if (adev->asic_type < CHIP_VEGA10 && needs_flush) 368 if (adev->asic_type < CHIP_VEGA10 && needs_flush)
262 continue; 369 continue;
263 370
264 /* Good we can use this VMID. Remember this submission as 371 /* Good, we can use this VMID. Remember this submission as
265 * user of the VMID. 372 * user of the VMID.
266 */ 373 */
267 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); 374 r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
268 if (r) 375 if (r)
269 goto error; 376 return r;
270 377
271 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { 378 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
272 dma_fence_put(id->flushed_updates); 379 dma_fence_put((*id)->flushed_updates);
273 id->flushed_updates = dma_fence_get(updates); 380 (*id)->flushed_updates = dma_fence_get(updates);
274 } 381 }
275 382
276 if (needs_flush) 383 job->vm_needs_flush |= needs_flush;
277 goto needs_flush; 384 return 0;
278 else 385 }
279 goto no_flush_needed;
280 386
281 }; 387 *id = NULL;
388 return 0;
389}
282 390
283 /* Still no ID to use? Then use the idle one found earlier */ 391/**
284 id = idle; 392 * amdgpu_vm_grab_id - allocate the next free VMID
393 *
394 * @vm: vm to allocate id for
395 * @ring: ring we want to submit job to
396 * @sync: sync object where we add dependencies
397 * @fence: fence protecting ID from reuse
398 * @job: job who wants to use the VMID
399 *
400 * Allocate an id for the vm, adding fences to the sync obj as necessary.
401 */
402int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
403 struct amdgpu_sync *sync, struct dma_fence *fence,
404 struct amdgpu_job *job)
405{
406 struct amdgpu_device *adev = ring->adev;
407 unsigned vmhub = ring->funcs->vmhub;
408 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
409 struct amdgpu_vmid *idle = NULL;
410 struct amdgpu_vmid *id = NULL;
411 int r = 0;
285 412
286 /* Remember this submission as user of the VMID */ 413 mutex_lock(&id_mgr->lock);
287 r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); 414 r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
288 if (r) 415 if (r || !idle)
289 goto error; 416 goto error;
290 417
291 id->pd_gpu_addr = job->vm_pd_addr; 418 if (vm->reserved_vmid[vmhub]) {
292 dma_fence_put(id->flushed_updates); 419 r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
293 id->flushed_updates = dma_fence_get(updates); 420 if (r || !id)
294 atomic64_set(&id->owner, vm->entity.fence_context); 421 goto error;
422 } else {
423 r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
424 if (r)
425 goto error;
295 426
296needs_flush: 427 if (!id) {
297 job->vm_needs_flush = true; 428 struct dma_fence *updates = sync->last_vm_update;
298 dma_fence_put(id->last_flush);
299 id->last_flush = NULL;
300 429
301no_flush_needed: 430 /* Still no ID to use? Then use the idle one found earlier */
302 list_move_tail(&id->list, &id_mgr->ids_lru); 431 id = idle;
303 432
433 /* Remember this submission as user of the VMID */
434 r = amdgpu_sync_fence(ring->adev, &id->active,
435 fence, false);
436 if (r)
437 goto error;
438
439 dma_fence_put(id->flushed_updates);
440 id->flushed_updates = dma_fence_get(updates);
441 job->vm_needs_flush = true;
442 }
443
444 list_move_tail(&id->list, &id_mgr->ids_lru);
445 }
446
447 id->pd_gpu_addr = job->vm_pd_addr;
448 id->owner = vm->entity.fence_context;
449
450 if (job->vm_needs_flush) {
451 dma_fence_put(id->last_flush);
452 id->last_flush = NULL;
453 }
304 job->vmid = id - id_mgr->ids; 454 job->vmid = id - id_mgr->ids;
455 job->pasid = vm->pasid;
305 trace_amdgpu_vm_grab_id(vm, ring, job); 456 trace_amdgpu_vm_grab_id(vm, ring, job);
306 457
307error: 458error:
@@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
370 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 521 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
371 struct amdgpu_vmid *id = &id_mgr->ids[vmid]; 522 struct amdgpu_vmid *id = &id_mgr->ids[vmid];
372 523
373 atomic64_set(&id->owner, 0); 524 mutex_lock(&id_mgr->lock);
525 id->owner = 0;
374 id->gds_base = 0; 526 id->gds_base = 0;
375 id->gds_size = 0; 527 id->gds_size = 0;
376 id->gws_base = 0; 528 id->gws_base = 0;
377 id->gws_size = 0; 529 id->gws_size = 0;
378 id->oa_base = 0; 530 id->oa_base = 0;
379 id->oa_size = 0; 531 id->oa_size = 0;
532 mutex_unlock(&id_mgr->lock);
380} 533}
381 534
382/** 535/**
@@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
454 amdgpu_sync_free(&id->active); 607 amdgpu_sync_free(&id->active);
455 dma_fence_put(id->flushed_updates); 608 dma_fence_put(id->flushed_updates);
456 dma_fence_put(id->last_flush); 609 dma_fence_put(id->last_flush);
610 dma_fence_put(id->pasid_mapping);
457 } 611 }
458 } 612 }
459} 613}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index ad931fa570b3..7625419f0fc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -43,7 +43,7 @@ struct amdgpu_vmid {
43 struct list_head list; 43 struct list_head list;
44 struct amdgpu_sync active; 44 struct amdgpu_sync active;
45 struct dma_fence *last_flush; 45 struct dma_fence *last_flush;
46 atomic64_t owner; 46 uint64_t owner;
47 47
48 uint64_t pd_gpu_addr; 48 uint64_t pd_gpu_addr;
49 /* last flushed PD/PT update */ 49 /* last flushed PD/PT update */
@@ -57,6 +57,9 @@ struct amdgpu_vmid {
57 uint32_t gws_size; 57 uint32_t gws_size;
58 uint32_t oa_base; 58 uint32_t oa_base;
59 uint32_t oa_size; 59 uint32_t oa_size;
60
61 unsigned pasid;
62 struct dma_fence *pasid_mapping;
60}; 63};
61 64
62struct amdgpu_vmid_mgr { 65struct amdgpu_vmid_mgr {
@@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr {
69 72
70int amdgpu_pasid_alloc(unsigned int bits); 73int amdgpu_pasid_alloc(unsigned int bits);
71void amdgpu_pasid_free(unsigned int pasid); 74void amdgpu_pasid_free(unsigned int pasid);
75void amdgpu_pasid_free_delayed(struct reservation_object *resv,
76 unsigned int pasid);
72 77
73bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, 78bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
74 struct amdgpu_vmid *id); 79 struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 29cf10927a92..0e01f115bbe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -25,51 +25,12 @@
25#define __AMDGPU_IH_H__ 25#define __AMDGPU_IH_H__
26 26
27#include <linux/chash.h> 27#include <linux/chash.h>
28#include "soc15_ih_clientid.h"
28 29
29struct amdgpu_device; 30struct amdgpu_device;
30 /*
31 * vega10+ IH clients
32 */
33enum amdgpu_ih_clientid
34{
35 AMDGPU_IH_CLIENTID_IH = 0x00,
36 AMDGPU_IH_CLIENTID_ACP = 0x01,
37 AMDGPU_IH_CLIENTID_ATHUB = 0x02,
38 AMDGPU_IH_CLIENTID_BIF = 0x03,
39 AMDGPU_IH_CLIENTID_DCE = 0x04,
40 AMDGPU_IH_CLIENTID_ISP = 0x05,
41 AMDGPU_IH_CLIENTID_PCIE0 = 0x06,
42 AMDGPU_IH_CLIENTID_RLC = 0x07,
43 AMDGPU_IH_CLIENTID_SDMA0 = 0x08,
44 AMDGPU_IH_CLIENTID_SDMA1 = 0x09,
45 AMDGPU_IH_CLIENTID_SE0SH = 0x0a,
46 AMDGPU_IH_CLIENTID_SE1SH = 0x0b,
47 AMDGPU_IH_CLIENTID_SE2SH = 0x0c,
48 AMDGPU_IH_CLIENTID_SE3SH = 0x0d,
49 AMDGPU_IH_CLIENTID_SYSHUB = 0x0e,
50 AMDGPU_IH_CLIENTID_THM = 0x0f,
51 AMDGPU_IH_CLIENTID_UVD = 0x10,
52 AMDGPU_IH_CLIENTID_VCE0 = 0x11,
53 AMDGPU_IH_CLIENTID_VMC = 0x12,
54 AMDGPU_IH_CLIENTID_XDMA = 0x13,
55 AMDGPU_IH_CLIENTID_GRBM_CP = 0x14,
56 AMDGPU_IH_CLIENTID_ATS = 0x15,
57 AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16,
58 AMDGPU_IH_CLIENTID_DF = 0x17,
59 AMDGPU_IH_CLIENTID_VCE1 = 0x18,
60 AMDGPU_IH_CLIENTID_PWR = 0x19,
61 AMDGPU_IH_CLIENTID_UTCL2 = 0x1b,
62 AMDGPU_IH_CLIENTID_EA = 0x1c,
63 AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d,
64 AMDGPU_IH_CLIENTID_MP0 = 0x1e,
65 AMDGPU_IH_CLIENTID_MP1 = 0x1f,
66
67 AMDGPU_IH_CLIENTID_MAX,
68
69 AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD
70};
71 31
72#define AMDGPU_IH_CLIENTID_LEGACY 0 32#define AMDGPU_IH_CLIENTID_LEGACY 0
33#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
73 34
74#define AMDGPU_PAGEFAULT_HASH_BITS 8 35#define AMDGPU_PAGEFAULT_HASH_BITS 8
75struct amdgpu_retryfault_hashtable { 36struct amdgpu_retryfault_hashtable {
@@ -109,7 +70,7 @@ struct amdgpu_iv_entry {
109 unsigned vmid_src; 70 unsigned vmid_src;
110 uint64_t timestamp; 71 uint64_t timestamp;
111 unsigned timestamp_src; 72 unsigned timestamp_src;
112 unsigned pas_id; 73 unsigned pasid;
113 unsigned pasid_src; 74 unsigned pasid_src;
114 unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; 75 unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
115 const uint32_t *iv_entry; 76 const uint32_t *iv_entry;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 36483e0d3c97..3a5ca462abf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
92} 92}
93 93
94/* Disable *all* interrupts */ 94/* Disable *all* interrupts */
95static void amdgpu_irq_disable_all(struct amdgpu_device *adev) 95void amdgpu_irq_disable_all(struct amdgpu_device *adev)
96{ 96{
97 unsigned long irqflags; 97 unsigned long irqflags;
98 unsigned i, j, k; 98 unsigned i, j, k;
@@ -123,55 +123,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
123} 123}
124 124
125/** 125/**
126 * amdgpu_irq_preinstall - drm irq preinstall callback
127 *
128 * @dev: drm dev pointer
129 *
130 * Gets the hw ready to enable irqs (all asics).
131 * This function disables all interrupt sources on the GPU.
132 */
133void amdgpu_irq_preinstall(struct drm_device *dev)
134{
135 struct amdgpu_device *adev = dev->dev_private;
136
137 /* Disable *all* interrupts */
138 amdgpu_irq_disable_all(adev);
139 /* Clear bits */
140 amdgpu_ih_process(adev);
141}
142
143/**
144 * amdgpu_irq_postinstall - drm irq preinstall callback
145 *
146 * @dev: drm dev pointer
147 *
148 * Handles stuff to be done after enabling irqs (all asics).
149 * Returns 0 on success.
150 */
151int amdgpu_irq_postinstall(struct drm_device *dev)
152{
153 dev->max_vblank_count = 0x00ffffff;
154 return 0;
155}
156
157/**
158 * amdgpu_irq_uninstall - drm irq uninstall callback
159 *
160 * @dev: drm dev pointer
161 *
162 * This function disables all interrupt sources on the GPU (all asics).
163 */
164void amdgpu_irq_uninstall(struct drm_device *dev)
165{
166 struct amdgpu_device *adev = dev->dev_private;
167
168 if (adev == NULL) {
169 return;
170 }
171 amdgpu_irq_disable_all(adev);
172}
173
174/**
175 * amdgpu_irq_handler - irq handler 126 * amdgpu_irq_handler - irq handler
176 * 127 *
177 * @int irq, void *arg: args 128 * @int irq, void *arg: args
@@ -262,6 +213,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
262 cancel_work_sync(&adev->reset_work); 213 cancel_work_sync(&adev->reset_work);
263 return r; 214 return r;
264 } 215 }
216 adev->ddev->max_vblank_count = 0x00ffffff;
265 217
266 DRM_DEBUG("amdgpu: irq initialized.\n"); 218 DRM_DEBUG("amdgpu: irq initialized.\n");
267 return 0; 219 return 0;
@@ -307,6 +259,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
307 } 259 }
308 } 260 }
309 kfree(adev->irq.client[i].sources); 261 kfree(adev->irq.client[i].sources);
262 adev->irq.client[i].sources = NULL;
310 } 263 }
311} 264}
312 265
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 0610cc4a9788..3375ad778edc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -78,9 +78,7 @@ struct amdgpu_irq {
78 uint32_t srbm_soft_reset; 78 uint32_t srbm_soft_reset;
79}; 79};
80 80
81void amdgpu_irq_preinstall(struct drm_device *dev); 81void amdgpu_irq_disable_all(struct amdgpu_device *adev);
82int amdgpu_irq_postinstall(struct drm_device *dev);
83void amdgpu_irq_uninstall(struct drm_device *dev);
84irqreturn_t amdgpu_irq_handler(int irq, void *arg); 82irqreturn_t amdgpu_irq_handler(int irq, void *arg);
85 83
86int amdgpu_irq_init(struct amdgpu_device *adev); 84int amdgpu_irq_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index bd6e9a40f421..4b7824d30e73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -190,8 +190,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
190 fw_info->ver = adev->uvd.fw_version; 190 fw_info->ver = adev->uvd.fw_version;
191 fw_info->feature = 0; 191 fw_info->feature = 0;
192 break; 192 break;
193 case AMDGPU_INFO_FW_VCN:
194 fw_info->ver = adev->vcn.fw_version;
195 fw_info->feature = 0;
196 break;
193 case AMDGPU_INFO_FW_GMC: 197 case AMDGPU_INFO_FW_GMC:
194 fw_info->ver = adev->mc.fw_version; 198 fw_info->ver = adev->gmc.fw_version;
195 fw_info->feature = 0; 199 fw_info->feature = 0;
196 break; 200 break;
197 case AMDGPU_INFO_FW_GFX_ME: 201 case AMDGPU_INFO_FW_GFX_ME:
@@ -470,9 +474,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
470 case AMDGPU_INFO_VRAM_GTT: { 474 case AMDGPU_INFO_VRAM_GTT: {
471 struct drm_amdgpu_info_vram_gtt vram_gtt; 475 struct drm_amdgpu_info_vram_gtt vram_gtt;
472 476
473 vram_gtt.vram_size = adev->mc.real_vram_size; 477 vram_gtt.vram_size = adev->gmc.real_vram_size;
474 vram_gtt.vram_size -= adev->vram_pin_size; 478 vram_gtt.vram_size -= adev->vram_pin_size;
475 vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; 479 vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
476 vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); 480 vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
477 vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; 481 vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
478 vram_gtt.gtt_size *= PAGE_SIZE; 482 vram_gtt.gtt_size *= PAGE_SIZE;
@@ -484,17 +488,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
484 struct drm_amdgpu_memory_info mem; 488 struct drm_amdgpu_memory_info mem;
485 489
486 memset(&mem, 0, sizeof(mem)); 490 memset(&mem, 0, sizeof(mem));
487 mem.vram.total_heap_size = adev->mc.real_vram_size; 491 mem.vram.total_heap_size = adev->gmc.real_vram_size;
488 mem.vram.usable_heap_size = 492 mem.vram.usable_heap_size =
489 adev->mc.real_vram_size - adev->vram_pin_size; 493 adev->gmc.real_vram_size - adev->vram_pin_size;
490 mem.vram.heap_usage = 494 mem.vram.heap_usage =
491 amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 495 amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
492 mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; 496 mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
493 497
494 mem.cpu_accessible_vram.total_heap_size = 498 mem.cpu_accessible_vram.total_heap_size =
495 adev->mc.visible_vram_size; 499 adev->gmc.visible_vram_size;
496 mem.cpu_accessible_vram.usable_heap_size = 500 mem.cpu_accessible_vram.usable_heap_size =
497 adev->mc.visible_vram_size - 501 adev->gmc.visible_vram_size -
498 (adev->vram_pin_size - adev->invisible_pin_size); 502 (adev->vram_pin_size - adev->invisible_pin_size);
499 mem.cpu_accessible_vram.heap_usage = 503 mem.cpu_accessible_vram.heap_usage =
500 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 504 amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -580,11 +584,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
580 dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; 584 dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
581 585
582 vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; 586 vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
587 vm_size -= AMDGPU_VA_RESERVED_SIZE;
588
589 /* Older VCE FW versions are buggy and can handle only 40bits */
590 if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
591 vm_size = min(vm_size, 1ULL << 40);
592
583 dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; 593 dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
584 dev_info.virtual_address_max = 594 dev_info.virtual_address_max =
585 min(vm_size, AMDGPU_VA_HOLE_START); 595 min(vm_size, AMDGPU_VA_HOLE_START);
586 596
587 vm_size -= AMDGPU_VA_RESERVED_SIZE;
588 if (vm_size > AMDGPU_VA_HOLE_START) { 597 if (vm_size > AMDGPU_VA_HOLE_START) {
589 dev_info.high_va_offset = AMDGPU_VA_HOLE_END; 598 dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
590 dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; 599 dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
@@ -599,8 +608,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
599 sizeof(adev->gfx.cu_info.ao_cu_bitmap)); 608 sizeof(adev->gfx.cu_info.ao_cu_bitmap));
600 memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], 609 memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
601 sizeof(adev->gfx.cu_info.bitmap)); 610 sizeof(adev->gfx.cu_info.bitmap));
602 dev_info.vram_type = adev->mc.vram_type; 611 dev_info.vram_type = adev->gmc.vram_type;
603 dev_info.vram_bit_width = adev->mc.vram_width; 612 dev_info.vram_bit_width = adev->gmc.vram_width;
604 dev_info.vce_harvest_config = adev->vce.harvest_config; 613 dev_info.vce_harvest_config = adev->vce.harvest_config;
605 dev_info.gc_double_offchip_lds_buf = 614 dev_info.gc_double_offchip_lds_buf =
606 adev->gfx.config.double_offchip_lds_buf; 615 adev->gfx.config.double_offchip_lds_buf;
@@ -758,6 +767,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
758 return -EINVAL; 767 return -EINVAL;
759 } 768 }
760 break; 769 break;
770 case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
771 /* get stable pstate sclk in Mhz */
772 if (amdgpu_dpm_read_sensor(adev,
773 AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
774 (void *)&ui32, &ui32_size)) {
775 return -EINVAL;
776 }
777 ui32 /= 100;
778 break;
779 case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
780 /* get stable pstate mclk in Mhz */
781 if (amdgpu_dpm_read_sensor(adev,
782 AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
783 (void *)&ui32, &ui32_size)) {
784 return -EINVAL;
785 }
786 ui32 /= 100;
787 break;
761 default: 788 default:
762 DRM_DEBUG_KMS("Invalid request %d\n", 789 DRM_DEBUG_KMS("Invalid request %d\n",
763 info->sensor_info.type); 790 info->sensor_info.type);
@@ -805,7 +832,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
805{ 832{
806 struct amdgpu_device *adev = dev->dev_private; 833 struct amdgpu_device *adev = dev->dev_private;
807 struct amdgpu_fpriv *fpriv; 834 struct amdgpu_fpriv *fpriv;
808 int r; 835 int r, pasid;
809 836
810 file_priv->driver_priv = NULL; 837 file_priv->driver_priv = NULL;
811 838
@@ -819,28 +846,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
819 goto out_suspend; 846 goto out_suspend;
820 } 847 }
821 848
822 r = amdgpu_vm_init(adev, &fpriv->vm, 849 pasid = amdgpu_pasid_alloc(16);
823 AMDGPU_VM_CONTEXT_GFX, 0); 850 if (pasid < 0) {
824 if (r) { 851 dev_warn(adev->dev, "No more PASIDs available!");
825 kfree(fpriv); 852 pasid = 0;
826 goto out_suspend;
827 } 853 }
854 r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
855 if (r)
856 goto error_pasid;
828 857
829 fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); 858 fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
830 if (!fpriv->prt_va) { 859 if (!fpriv->prt_va) {
831 r = -ENOMEM; 860 r = -ENOMEM;
832 amdgpu_vm_fini(adev, &fpriv->vm); 861 goto error_vm;
833 kfree(fpriv);
834 goto out_suspend;
835 } 862 }
836 863
837 if (amdgpu_sriov_vf(adev)) { 864 if (amdgpu_sriov_vf(adev)) {
838 r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); 865 r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
839 if (r) { 866 if (r)
840 amdgpu_vm_fini(adev, &fpriv->vm); 867 goto error_vm;
841 kfree(fpriv);
842 goto out_suspend;
843 }
844 } 868 }
845 869
846 mutex_init(&fpriv->bo_list_lock); 870 mutex_init(&fpriv->bo_list_lock);
@@ -849,6 +873,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
849 amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); 873 amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
850 874
851 file_priv->driver_priv = fpriv; 875 file_priv->driver_priv = fpriv;
876 goto out_suspend;
877
878error_vm:
879 amdgpu_vm_fini(adev, &fpriv->vm);
880
881error_pasid:
882 if (pasid)
883 amdgpu_pasid_free(pasid);
884
885 kfree(fpriv);
852 886
853out_suspend: 887out_suspend:
854 pm_runtime_mark_last_busy(dev->dev); 888 pm_runtime_mark_last_busy(dev->dev);
@@ -871,6 +905,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
871 struct amdgpu_device *adev = dev->dev_private; 905 struct amdgpu_device *adev = dev->dev_private;
872 struct amdgpu_fpriv *fpriv = file_priv->driver_priv; 906 struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
873 struct amdgpu_bo_list *list; 907 struct amdgpu_bo_list *list;
908 struct amdgpu_bo *pd;
909 unsigned int pasid;
874 int handle; 910 int handle;
875 911
876 if (!fpriv) 912 if (!fpriv)
@@ -895,7 +931,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
895 amdgpu_bo_unreserve(adev->virt.csa_obj); 931 amdgpu_bo_unreserve(adev->virt.csa_obj);
896 } 932 }
897 933
934 pasid = fpriv->vm.pasid;
935 pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
936
898 amdgpu_vm_fini(adev, &fpriv->vm); 937 amdgpu_vm_fini(adev, &fpriv->vm);
938 if (pasid)
939 amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
940 amdgpu_bo_unref(&pd);
899 941
900 idr_for_each_entry(&fpriv->bo_list_handles, list, handle) 942 idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
901 amdgpu_bo_list_free(list); 943 amdgpu_bo_list_free(list);
@@ -947,11 +989,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
947 */ 989 */
948 do { 990 do {
949 count = amdgpu_display_vblank_get_counter(adev, pipe); 991 count = amdgpu_display_vblank_get_counter(adev, pipe);
950 /* Ask amdgpu_get_crtc_scanoutpos to return vpos as 992 /* Ask amdgpu_display_get_crtc_scanoutpos to return
951 * distance to start of vblank, instead of regular 993 * vpos as distance to start of vblank, instead of
952 * vertical scanout pos. 994 * regular vertical scanout pos.
953 */ 995 */
954 stat = amdgpu_get_crtc_scanoutpos( 996 stat = amdgpu_display_get_crtc_scanoutpos(
955 dev, pipe, GET_DISTANCE_TO_VBLANKSTART, 997 dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
956 &vpos, &hpos, NULL, NULL, 998 &vpos, &hpos, NULL, NULL,
957 &adev->mode_info.crtcs[pipe]->base.hwmode); 999 &adev->mode_info.crtcs[pipe]->base.hwmode);
@@ -992,7 +1034,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
992int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe) 1034int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
993{ 1035{
994 struct amdgpu_device *adev = dev->dev_private; 1036 struct amdgpu_device *adev = dev->dev_private;
995 int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); 1037 int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
996 1038
997 return amdgpu_irq_get(adev, &adev->crtc_irq, idx); 1039 return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
998} 1040}
@@ -1008,7 +1050,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
1008void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) 1050void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
1009{ 1051{
1010 struct amdgpu_device *adev = dev->dev_private; 1052 struct amdgpu_device *adev = dev->dev_private;
1011 int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); 1053 int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
1012 1054
1013 amdgpu_irq_put(adev, &adev->crtc_irq, idx); 1055 amdgpu_irq_put(adev, &adev->crtc_irq, idx);
1014} 1056}
@@ -1160,6 +1202,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
1160 i, fw_info.feature, fw_info.ver); 1202 i, fw_info.feature, fw_info.ver);
1161 } 1203 }
1162 1204
1205 /* VCN */
1206 query_fw.fw_type = AMDGPU_INFO_FW_VCN;
1207 ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
1208 if (ret)
1209 return ret;
1210 seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
1211 fw_info.feature, fw_info.ver);
1212
1163 return 0; 1213 return 0;
1164} 1214}
1165 1215
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 54f06c959340..d6416ee52e32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -267,8 +267,6 @@ struct amdgpu_display_funcs {
267 void (*bandwidth_update)(struct amdgpu_device *adev); 267 void (*bandwidth_update)(struct amdgpu_device *adev);
268 /* get frame count */ 268 /* get frame count */
269 u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc); 269 u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
270 /* wait for vblank */
271 void (*vblank_wait)(struct amdgpu_device *adev, int crtc);
272 /* set backlight level */ 270 /* set backlight level */
273 void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder, 271 void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
274 u8 level); 272 u8 level);
@@ -352,6 +350,7 @@ struct amdgpu_mode_info {
352 u16 firmware_flags; 350 u16 firmware_flags;
353 /* pointer to backlight encoder */ 351 /* pointer to backlight encoder */
354 struct amdgpu_encoder *bl_encoder; 352 struct amdgpu_encoder *bl_encoder;
353 u8 bl_level; /* saved backlight level */
355 struct amdgpu_audio audio; /* audio stuff */ 354 struct amdgpu_audio audio; /* audio stuff */
356 int num_crtc; /* number of crtcs */ 355 int num_crtc; /* number of crtcs */
357 int num_hpd; /* number of hpd pins */ 356 int num_hpd; /* number of hpd pins */
@@ -552,14 +551,6 @@ struct amdgpu_connector {
552 /* we need to mind the EDID between detect 551 /* we need to mind the EDID between detect
553 and get modes due to analog/digital/tvencoder */ 552 and get modes due to analog/digital/tvencoder */
554 struct edid *edid; 553 struct edid *edid;
555 /* number of modes generated from EDID at 'dc_sink' */
556 int num_modes;
557 /* The 'old' sink - before an HPD.
558 * The 'current' sink is in dc_link->sink. */
559 struct dc_sink *dc_sink;
560 struct dc_link *dc_link;
561 struct dc_sink *dc_em_sink;
562 const struct dc_stream *stream;
563 void *con_priv; 554 void *con_priv;
564 bool dac_load_detect; 555 bool dac_load_detect;
565 bool detected_by_load; /* if the connection status was determined by load */ 556 bool detected_by_load; /* if the connection status was determined by load */
@@ -570,27 +561,6 @@ struct amdgpu_connector {
570 enum amdgpu_connector_audio audio; 561 enum amdgpu_connector_audio audio;
571 enum amdgpu_connector_dither dither; 562 enum amdgpu_connector_dither dither;
572 unsigned pixelclock_for_modeset; 563 unsigned pixelclock_for_modeset;
573
574 struct drm_dp_mst_topology_mgr mst_mgr;
575 struct amdgpu_dm_dp_aux dm_dp_aux;
576 struct drm_dp_mst_port *port;
577 struct amdgpu_connector *mst_port;
578 struct amdgpu_encoder *mst_encoder;
579 struct semaphore mst_sem;
580
581 /* TODO see if we can merge with ddc_bus or make a dm_connector */
582 struct amdgpu_i2c_adapter *i2c;
583
584 /* Monitor range limits */
585 int min_vfreq ;
586 int max_vfreq ;
587 int pixel_clock_mhz;
588
589 /*freesync caps*/
590 struct mod_freesync_caps caps;
591
592 struct mutex hpd_lock;
593
594}; 564};
595 565
596/* TODO: start to use this struct and remove same field from base one */ 566/* TODO: start to use this struct and remove same field from base one */
@@ -608,7 +578,7 @@ struct amdgpu_mst_connector {
608#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ 578#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
609 ((em) == ATOM_ENCODER_MODE_DP_MST)) 579 ((em) == ATOM_ENCODER_MODE_DP_MST))
610 580
611/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ 581/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
612#define DRM_SCANOUTPOS_VALID (1 << 0) 582#define DRM_SCANOUTPOS_VALID (1 << 0)
613#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) 583#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1)
614#define DRM_SCANOUTPOS_ACCURATE (1 << 2) 584#define DRM_SCANOUTPOS_ACCURATE (1 << 2)
@@ -627,30 +597,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
627u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder); 597u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
628struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder); 598struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
629 599
630bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux); 600bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
601 bool use_aux);
631 602
632void amdgpu_encoder_set_active_device(struct drm_encoder *encoder); 603void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
633 604
634int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, 605int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
635 unsigned int flags, int *vpos, int *hpos, 606 unsigned int pipe, unsigned int flags, int *vpos,
636 ktime_t *stime, ktime_t *etime, 607 int *hpos, ktime_t *stime, ktime_t *etime,
637 const struct drm_display_mode *mode); 608 const struct drm_display_mode *mode);
638 609
639int amdgpu_framebuffer_init(struct drm_device *dev, 610int amdgpu_display_framebuffer_init(struct drm_device *dev,
640 struct amdgpu_framebuffer *rfb, 611 struct amdgpu_framebuffer *rfb,
641 const struct drm_mode_fb_cmd2 *mode_cmd, 612 const struct drm_mode_fb_cmd2 *mode_cmd,
642 struct drm_gem_object *obj); 613 struct drm_gem_object *obj);
643 614
644int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); 615int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
645 616
646void amdgpu_enc_destroy(struct drm_encoder *encoder); 617void amdgpu_enc_destroy(struct drm_encoder *encoder);
647void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj); 618void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
648bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, 619bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
649 const struct drm_display_mode *mode, 620 const struct drm_display_mode *mode,
650 struct drm_display_mode *adjusted_mode); 621 struct drm_display_mode *adjusted_mode);
651void amdgpu_panel_mode_fixup(struct drm_encoder *encoder, 622void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
652 struct drm_display_mode *adjusted_mode); 623 struct drm_display_mode *adjusted_mode);
653int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc); 624int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
654 625
655/* fbdev layer */ 626/* fbdev layer */
656int amdgpu_fbdev_init(struct amdgpu_device *adev); 627int amdgpu_fbdev_init(struct amdgpu_device *adev);
@@ -662,15 +633,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
662int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); 633int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
663 634
664/* amdgpu_display.c */ 635/* amdgpu_display.c */
665void amdgpu_print_display_setup(struct drm_device *dev); 636void amdgpu_display_print_display_setup(struct drm_device *dev);
666int amdgpu_modeset_create_props(struct amdgpu_device *adev); 637int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
667int amdgpu_crtc_set_config(struct drm_mode_set *set, 638int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
668 struct drm_modeset_acquire_ctx *ctx); 639 struct drm_modeset_acquire_ctx *ctx);
669int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, 640int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
670 struct drm_framebuffer *fb, 641 struct drm_framebuffer *fb,
671 struct drm_pending_vblank_event *event, 642 struct drm_pending_vblank_event *event,
672 uint32_t page_flip_flags, uint32_t target, 643 uint32_t page_flip_flags, uint32_t target,
673 struct drm_modeset_acquire_ctx *ctx); 644 struct drm_modeset_acquire_ctx *ctx);
674extern const struct drm_mode_config_funcs amdgpu_mode_funcs; 645extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
675 646
676#endif 647#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5c4c3e0d527b..6d08cde8443c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -36,6 +36,7 @@
36#include <drm/drm_cache.h> 36#include <drm/drm_cache.h>
37#include "amdgpu.h" 37#include "amdgpu.h"
38#include "amdgpu_trace.h" 38#include "amdgpu_trace.h"
39#include "amdgpu_amdkfd.h"
39 40
40static bool amdgpu_need_backup(struct amdgpu_device *adev) 41static bool amdgpu_need_backup(struct amdgpu_device *adev)
41{ 42{
@@ -54,8 +55,13 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
54 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 55 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
55 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); 56 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
56 57
58 if (bo->kfd_bo)
59 amdgpu_amdkfd_unreserve_system_memory_limit(bo);
60
57 amdgpu_bo_kunmap(bo); 61 amdgpu_bo_kunmap(bo);
58 62
63 if (bo->gem_base.import_attach)
64 drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
59 drm_gem_object_release(&bo->gem_base); 65 drm_gem_object_release(&bo->gem_base);
60 amdgpu_bo_unref(&bo->parent); 66 amdgpu_bo_unref(&bo->parent);
61 if (!list_empty(&bo->shadow_list)) { 67 if (!list_empty(&bo->shadow_list)) {
@@ -83,7 +89,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
83 u32 c = 0; 89 u32 c = 0;
84 90
85 if (domain & AMDGPU_GEM_DOMAIN_VRAM) { 91 if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
86 unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; 92 unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
87 93
88 places[c].fpfn = 0; 94 places[c].fpfn = 0;
89 places[c].lpfn = 0; 95 places[c].lpfn = 0;
@@ -103,7 +109,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
103 if (domain & AMDGPU_GEM_DOMAIN_GTT) { 109 if (domain & AMDGPU_GEM_DOMAIN_GTT) {
104 places[c].fpfn = 0; 110 places[c].fpfn = 0;
105 if (flags & AMDGPU_GEM_CREATE_SHADOW) 111 if (flags & AMDGPU_GEM_CREATE_SHADOW)
106 places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; 112 places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
107 else 113 else
108 places[c].lpfn = 0; 114 places[c].lpfn = 0;
109 places[c].flags = TTM_PL_FLAG_TT; 115 places[c].flags = TTM_PL_FLAG_TT;
@@ -169,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
169 * @size: size for the new BO 175 * @size: size for the new BO
170 * @align: alignment for the new BO 176 * @align: alignment for the new BO
171 * @domain: where to place it 177 * @domain: where to place it
172 * @bo_ptr: resulting BO 178 * @bo_ptr: used to initialize BOs in structures
173 * @gpu_addr: GPU addr of the pinned BO 179 * @gpu_addr: GPU addr of the pinned BO
174 * @cpu_addr: optional CPU address mapping 180 * @cpu_addr: optional CPU address mapping
175 * 181 *
176 * Allocates and pins a BO for kernel internal use, and returns it still 182 * Allocates and pins a BO for kernel internal use, and returns it still
177 * reserved. 183 * reserved.
178 * 184 *
185 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
186 *
179 * Returns 0 on success, negative error code otherwise. 187 * Returns 0 on success, negative error code otherwise.
180 */ 188 */
181int amdgpu_bo_create_reserved(struct amdgpu_device *adev, 189int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
@@ -187,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
187 int r; 195 int r;
188 196
189 if (!*bo_ptr) { 197 if (!*bo_ptr) {
190 r = amdgpu_bo_create(adev, size, align, true, domain, 198 r = amdgpu_bo_create(adev, size, align, domain,
191 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 199 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
192 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 200 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
193 NULL, NULL, 0, bo_ptr); 201 ttm_bo_type_kernel, NULL, bo_ptr);
194 if (r) { 202 if (r) {
195 dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", 203 dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
196 r); 204 r);
@@ -238,12 +246,14 @@ error_free:
238 * @size: size for the new BO 246 * @size: size for the new BO
239 * @align: alignment for the new BO 247 * @align: alignment for the new BO
240 * @domain: where to place it 248 * @domain: where to place it
241 * @bo_ptr: resulting BO 249 * @bo_ptr: used to initialize BOs in structures
242 * @gpu_addr: GPU addr of the pinned BO 250 * @gpu_addr: GPU addr of the pinned BO
243 * @cpu_addr: optional CPU address mapping 251 * @cpu_addr: optional CPU address mapping
244 * 252 *
245 * Allocates and pins a BO for kernel internal use. 253 * Allocates and pins a BO for kernel internal use.
246 * 254 *
255 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
256 *
247 * Returns 0 on success, negative error code otherwise. 257 * Returns 0 on success, negative error code otherwise.
248 */ 258 */
249int amdgpu_bo_create_kernel(struct amdgpu_device *adev, 259int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
@@ -331,22 +341,19 @@ fail:
331 return false; 341 return false;
332} 342}
333 343
334static int amdgpu_bo_do_create(struct amdgpu_device *adev, 344static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
335 unsigned long size, int byte_align, 345 int byte_align, u32 domain,
336 bool kernel, u32 domain, u64 flags, 346 u64 flags, enum ttm_bo_type type,
337 struct sg_table *sg,
338 struct reservation_object *resv, 347 struct reservation_object *resv,
339 uint64_t init_value,
340 struct amdgpu_bo **bo_ptr) 348 struct amdgpu_bo **bo_ptr)
341{ 349{
342 struct ttm_operation_ctx ctx = { 350 struct ttm_operation_ctx ctx = {
343 .interruptible = !kernel, 351 .interruptible = (type != ttm_bo_type_kernel),
344 .no_wait_gpu = false, 352 .no_wait_gpu = false,
345 .allow_reserved_eviction = true, 353 .resv = resv,
346 .resv = resv 354 .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
347 }; 355 };
348 struct amdgpu_bo *bo; 356 struct amdgpu_bo *bo;
349 enum ttm_bo_type type;
350 unsigned long page_align; 357 unsigned long page_align;
351 size_t acc_size; 358 size_t acc_size;
352 int r; 359 int r;
@@ -357,13 +364,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
357 if (!amdgpu_bo_validate_size(adev, size, domain)) 364 if (!amdgpu_bo_validate_size(adev, size, domain))
358 return -ENOMEM; 365 return -ENOMEM;
359 366
360 if (kernel) {
361 type = ttm_bo_type_kernel;
362 } else if (sg) {
363 type = ttm_bo_type_sg;
364 } else {
365 type = ttm_bo_type_device;
366 }
367 *bo_ptr = NULL; 367 *bo_ptr = NULL;
368 368
369 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, 369 acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -372,11 +372,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
372 bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); 372 bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
373 if (bo == NULL) 373 if (bo == NULL)
374 return -ENOMEM; 374 return -ENOMEM;
375 r = drm_gem_object_init(adev->ddev, &bo->gem_base, size); 375 drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
376 if (unlikely(r)) {
377 kfree(bo);
378 return r;
379 }
380 INIT_LIST_HEAD(&bo->shadow_list); 376 INIT_LIST_HEAD(&bo->shadow_list);
381 INIT_LIST_HEAD(&bo->va); 377 INIT_LIST_HEAD(&bo->va);
382 bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | 378 bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -386,7 +382,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
386 AMDGPU_GEM_DOMAIN_GWS | 382 AMDGPU_GEM_DOMAIN_GWS |
387 AMDGPU_GEM_DOMAIN_OA); 383 AMDGPU_GEM_DOMAIN_OA);
388 bo->allowed_domains = bo->preferred_domains; 384 bo->allowed_domains = bo->preferred_domains;
389 if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) 385 if (type != ttm_bo_type_kernel &&
386 bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
390 bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; 387 bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
391 388
392 bo->flags = flags; 389 bo->flags = flags;
@@ -423,27 +420,27 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
423 amdgpu_ttm_placement_from_domain(bo, domain); 420 amdgpu_ttm_placement_from_domain(bo, domain);
424 421
425 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, 422 r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
426 &bo->placement, page_align, &ctx, NULL, 423 &bo->placement, page_align, &ctx, acc_size,
427 acc_size, sg, resv, &amdgpu_ttm_bo_destroy); 424 NULL, resv, &amdgpu_ttm_bo_destroy);
428 if (unlikely(r != 0)) 425 if (unlikely(r != 0))
429 return r; 426 return r;
430 427
431 if (adev->mc.visible_vram_size < adev->mc.real_vram_size && 428 if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
432 bo->tbo.mem.mem_type == TTM_PL_VRAM && 429 bo->tbo.mem.mem_type == TTM_PL_VRAM &&
433 bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) 430 bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
434 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 431 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
435 ctx.bytes_moved); 432 ctx.bytes_moved);
436 else 433 else
437 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); 434 amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
438 435
439 if (kernel) 436 if (type == ttm_bo_type_kernel)
440 bo->tbo.priority = 1; 437 bo->tbo.priority = 1;
441 438
442 if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && 439 if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
443 bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { 440 bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
444 struct dma_fence *fence; 441 struct dma_fence *fence;
445 442
446 r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); 443 r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
447 if (unlikely(r)) 444 if (unlikely(r))
448 goto fail_unreserve; 445 goto fail_unreserve;
449 446
@@ -480,12 +477,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
480 if (bo->shadow) 477 if (bo->shadow)
481 return 0; 478 return 0;
482 479
483 r = amdgpu_bo_do_create(adev, size, byte_align, true, 480 r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
484 AMDGPU_GEM_DOMAIN_GTT,
485 AMDGPU_GEM_CREATE_CPU_GTT_USWC | 481 AMDGPU_GEM_CREATE_CPU_GTT_USWC |
486 AMDGPU_GEM_CREATE_SHADOW, 482 AMDGPU_GEM_CREATE_SHADOW,
487 NULL, bo->tbo.resv, 0, 483 ttm_bo_type_kernel,
488 &bo->shadow); 484 bo->tbo.resv, &bo->shadow);
489 if (!r) { 485 if (!r) {
490 bo->shadow->parent = amdgpu_bo_ref(bo); 486 bo->shadow->parent = amdgpu_bo_ref(bo);
491 mutex_lock(&adev->shadow_list_lock); 487 mutex_lock(&adev->shadow_list_lock);
@@ -496,22 +492,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
496 return r; 492 return r;
497} 493}
498 494
499/* init_value will only take effect when flags contains 495int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
500 * AMDGPU_GEM_CREATE_VRAM_CLEARED. 496 int byte_align, u32 domain,
501 */ 497 u64 flags, enum ttm_bo_type type,
502int amdgpu_bo_create(struct amdgpu_device *adev,
503 unsigned long size, int byte_align,
504 bool kernel, u32 domain, u64 flags,
505 struct sg_table *sg,
506 struct reservation_object *resv, 498 struct reservation_object *resv,
507 uint64_t init_value,
508 struct amdgpu_bo **bo_ptr) 499 struct amdgpu_bo **bo_ptr)
509{ 500{
510 uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; 501 uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
511 int r; 502 int r;
512 503
513 r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, 504 r = amdgpu_bo_do_create(adev, size, byte_align, domain,
514 parent_flags, sg, resv, init_value, bo_ptr); 505 parent_flags, type, resv, bo_ptr);
515 if (r) 506 if (r)
516 return r; 507 return r;
517 508
@@ -826,31 +817,32 @@ static const char *amdgpu_vram_names[] = {
826 "GDDR4", 817 "GDDR4",
827 "GDDR5", 818 "GDDR5",
828 "HBM", 819 "HBM",
829 "DDR3" 820 "DDR3",
821 "DDR4",
830}; 822};
831 823
832int amdgpu_bo_init(struct amdgpu_device *adev) 824int amdgpu_bo_init(struct amdgpu_device *adev)
833{ 825{
834 /* reserve PAT memory space to WC for VRAM */ 826 /* reserve PAT memory space to WC for VRAM */
835 arch_io_reserve_memtype_wc(adev->mc.aper_base, 827 arch_io_reserve_memtype_wc(adev->gmc.aper_base,
836 adev->mc.aper_size); 828 adev->gmc.aper_size);
837 829
838 /* Add an MTRR for the VRAM */ 830 /* Add an MTRR for the VRAM */
839 adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, 831 adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
840 adev->mc.aper_size); 832 adev->gmc.aper_size);
841 DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", 833 DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
842 adev->mc.mc_vram_size >> 20, 834 adev->gmc.mc_vram_size >> 20,
843 (unsigned long long)adev->mc.aper_size >> 20); 835 (unsigned long long)adev->gmc.aper_size >> 20);
844 DRM_INFO("RAM width %dbits %s\n", 836 DRM_INFO("RAM width %dbits %s\n",
845 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); 837 adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
846 return amdgpu_ttm_init(adev); 838 return amdgpu_ttm_init(adev);
847} 839}
848 840
849void amdgpu_bo_fini(struct amdgpu_device *adev) 841void amdgpu_bo_fini(struct amdgpu_device *adev)
850{ 842{
851 amdgpu_ttm_fini(adev); 843 amdgpu_ttm_fini(adev);
852 arch_phys_wc_del(adev->mc.vram_mtrr); 844 arch_phys_wc_del(adev->gmc.vram_mtrr);
853 arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); 845 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
854} 846}
855 847
856int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, 848int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
@@ -980,7 +972,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
980 972
981 size = bo->mem.num_pages << PAGE_SHIFT; 973 size = bo->mem.num_pages << PAGE_SHIFT;
982 offset = bo->mem.start << PAGE_SHIFT; 974 offset = bo->mem.start << PAGE_SHIFT;
983 if ((offset + size) <= adev->mc.visible_vram_size) 975 if ((offset + size) <= adev->gmc.visible_vram_size)
984 return 0; 976 return 0;
985 977
986 /* Can't move a pinned BO to visible VRAM */ 978 /* Can't move a pinned BO to visible VRAM */
@@ -1003,7 +995,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
1003 offset = bo->mem.start << PAGE_SHIFT; 995 offset = bo->mem.start << PAGE_SHIFT;
1004 /* this should never happen */ 996 /* this should never happen */
1005 if (bo->mem.mem_type == TTM_PL_VRAM && 997 if (bo->mem.mem_type == TTM_PL_VRAM &&
1006 (offset + size) > adev->mc.visible_vram_size) 998 (offset + size) > adev->gmc.visible_vram_size)
1007 return -EINVAL; 999 return -EINVAL;
1008 1000
1009 return 0; 1001 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 33615e2ea2e6..546f77cb7882 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -92,6 +92,8 @@ struct amdgpu_bo {
92 struct list_head mn_list; 92 struct list_head mn_list;
93 struct list_head shadow_list; 93 struct list_head shadow_list;
94 }; 94 };
95
96 struct kgd_mem *kfd_bo;
95}; 97};
96 98
97static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) 99static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
@@ -201,13 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
201 return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; 203 return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
202} 204}
203 205
204int amdgpu_bo_create(struct amdgpu_device *adev, 206int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
205 unsigned long size, int byte_align, 207 int byte_align, u32 domain,
206 bool kernel, u32 domain, u64 flags, 208 u64 flags, enum ttm_bo_type type,
207 struct sg_table *sg, 209 struct reservation_object *resv,
208 struct reservation_object *resv, 210 struct amdgpu_bo **bo_ptr);
209 uint64_t init_value,
210 struct amdgpu_bo **bo_ptr);
211int amdgpu_bo_create_reserved(struct amdgpu_device *adev, 211int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
212 unsigned long size, int align, 212 unsigned long size, int align,
213 u32 domain, struct amdgpu_bo **bo_ptr, 213 u32 domain, struct amdgpu_bo **bo_ptr,
@@ -282,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
282 struct amdgpu_sa_manager *sa_manager); 282 struct amdgpu_sa_manager *sa_manager);
283int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, 283int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
284 struct amdgpu_sa_manager *sa_manager); 284 struct amdgpu_sa_manager *sa_manager);
285int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
286 struct amdgpu_sa_manager *sa_manager);
287int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, 285int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
288 struct amdgpu_sa_bo **sa_bo, 286 struct amdgpu_sa_bo **sa_bo,
289 unsigned size, unsigned align); 287 unsigned size, unsigned align);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 01a996c6b802..361975cf45a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
116 } 116 }
117 117
118 if (adev->powerplay.pp_funcs->dispatch_tasks) { 118 if (adev->powerplay.pp_funcs->dispatch_tasks) {
119 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); 119 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state);
120 } else { 120 } else {
121 mutex_lock(&adev->pm.mutex); 121 mutex_lock(&adev->pm.mutex);
122 adev->pm.dpm.user_state = state; 122 adev->pm.dpm.user_state = state;
@@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
316 if (state != POWER_STATE_TYPE_INTERNAL_BOOT && 316 if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
317 state != POWER_STATE_TYPE_DEFAULT) { 317 state != POWER_STATE_TYPE_DEFAULT) {
318 amdgpu_dpm_dispatch_task(adev, 318 amdgpu_dpm_dispatch_task(adev,
319 AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); 319 AMD_PP_TASK_ENABLE_USER_STATE, &state);
320 adev->pp_force_state_enabled = true; 320 adev->pp_force_state_enabled = true;
321 } 321 }
322 } 322 }
@@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
360 return count; 360 return count;
361} 361}
362 362
363static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
364 struct device_attribute *attr,
365 const char *buf,
366 size_t count)
367{
368 struct drm_device *ddev = dev_get_drvdata(dev);
369 struct amdgpu_device *adev = ddev->dev_private;
370 int ret;
371 uint32_t parameter_size = 0;
372 long parameter[64];
373 char buf_cpy[128];
374 char *tmp_str;
375 char *sub_str;
376 const char delimiter[3] = {' ', '\n', '\0'};
377 uint32_t type;
378
379 if (count > 127)
380 return -EINVAL;
381
382 if (*buf == 's')
383 type = PP_OD_EDIT_SCLK_VDDC_TABLE;
384 else if (*buf == 'm')
385 type = PP_OD_EDIT_MCLK_VDDC_TABLE;
386 else if(*buf == 'r')
387 type = PP_OD_RESTORE_DEFAULT_TABLE;
388 else if (*buf == 'c')
389 type = PP_OD_COMMIT_DPM_TABLE;
390 else
391 return -EINVAL;
392
393 memcpy(buf_cpy, buf, count+1);
394
395 tmp_str = buf_cpy;
396
397 while (isspace(*++tmp_str));
398
399 while (tmp_str[0]) {
400 sub_str = strsep(&tmp_str, delimiter);
401 ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
402 if (ret)
403 return -EINVAL;
404 parameter_size++;
405
406 while (isspace(*tmp_str))
407 tmp_str++;
408 }
409
410 if (adev->powerplay.pp_funcs->odn_edit_dpm_table)
411 ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
412 parameter, parameter_size);
413
414 if (ret)
415 return -EINVAL;
416
417 if (type == PP_OD_COMMIT_DPM_TABLE) {
418 if (adev->powerplay.pp_funcs->dispatch_tasks) {
419 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
420 return count;
421 } else {
422 return -EINVAL;
423 }
424 }
425
426 return count;
427}
428
429static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
430 struct device_attribute *attr,
431 char *buf)
432{
433 struct drm_device *ddev = dev_get_drvdata(dev);
434 struct amdgpu_device *adev = ddev->dev_private;
435 uint32_t size = 0;
436
437 if (adev->powerplay.pp_funcs->print_clock_levels) {
438 size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
439 size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
440 return size;
441 } else {
442 return snprintf(buf, PAGE_SIZE, "\n");
443 }
444
445}
446
363static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, 447static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
364 struct device_attribute *attr, 448 struct device_attribute *attr,
365 char *buf) 449 char *buf)
@@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
530 amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); 614 amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
531 615
532 if (adev->powerplay.pp_funcs->dispatch_tasks) { 616 if (adev->powerplay.pp_funcs->dispatch_tasks) {
533 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); 617 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
534 } else { 618 } else {
535 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; 619 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
536 amdgpu_pm_compute_clocks(adev); 620 amdgpu_pm_compute_clocks(adev);
@@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
574 amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); 658 amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
575 659
576 if (adev->powerplay.pp_funcs->dispatch_tasks) { 660 if (adev->powerplay.pp_funcs->dispatch_tasks) {
577 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); 661 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
578 } else { 662 } else {
579 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; 663 adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
580 amdgpu_pm_compute_clocks(adev); 664 amdgpu_pm_compute_clocks(adev);
@@ -584,159 +668,70 @@ fail:
584 return count; 668 return count;
585} 669}
586 670
587static ssize_t amdgpu_get_pp_power_profile(struct device *dev, 671static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
588 char *buf, struct amd_pp_profile *query) 672 struct device_attribute *attr,
673 char *buf)
589{ 674{
590 struct drm_device *ddev = dev_get_drvdata(dev); 675 struct drm_device *ddev = dev_get_drvdata(dev);
591 struct amdgpu_device *adev = ddev->dev_private; 676 struct amdgpu_device *adev = ddev->dev_private;
592 int ret = 0xff;
593 677
594 if (adev->powerplay.pp_funcs->get_power_profile_state) 678 if (adev->powerplay.pp_funcs->get_power_profile_mode)
595 ret = amdgpu_dpm_get_power_profile_state( 679 return amdgpu_dpm_get_power_profile_mode(adev, buf);
596 adev, query);
597 680
598 if (ret) 681 return snprintf(buf, PAGE_SIZE, "\n");
599 return ret;
600
601 return snprintf(buf, PAGE_SIZE,
602 "%d %d %d %d %d\n",
603 query->min_sclk / 100,
604 query->min_mclk / 100,
605 query->activity_threshold,
606 query->up_hyst,
607 query->down_hyst);
608} 682}
609 683
610static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
611 struct device_attribute *attr,
612 char *buf)
613{
614 struct amd_pp_profile query = {0};
615
616 query.type = AMD_PP_GFX_PROFILE;
617
618 return amdgpu_get_pp_power_profile(dev, buf, &query);
619}
620 684
621static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev, 685static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
622 struct device_attribute *attr, 686 struct device_attribute *attr,
623 char *buf)
624{
625 struct amd_pp_profile query = {0};
626
627 query.type = AMD_PP_COMPUTE_PROFILE;
628
629 return amdgpu_get_pp_power_profile(dev, buf, &query);
630}
631
632static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
633 const char *buf, 687 const char *buf,
634 size_t count, 688 size_t count)
635 struct amd_pp_profile *request)
636{ 689{
690 int ret = 0xff;
637 struct drm_device *ddev = dev_get_drvdata(dev); 691 struct drm_device *ddev = dev_get_drvdata(dev);
638 struct amdgpu_device *adev = ddev->dev_private; 692 struct amdgpu_device *adev = ddev->dev_private;
639 uint32_t loop = 0; 693 uint32_t parameter_size = 0;
640 char *sub_str, buf_cpy[128], *tmp_str; 694 long parameter[64];
695 char *sub_str, buf_cpy[128];
696 char *tmp_str;
697 uint32_t i = 0;
698 char tmp[2];
699 long int profile_mode = 0;
641 const char delimiter[3] = {' ', '\n', '\0'}; 700 const char delimiter[3] = {' ', '\n', '\0'};
642 long int value;
643 int ret = 0xff;
644
645 if (strncmp("reset", buf, strlen("reset")) == 0) {
646 if (adev->powerplay.pp_funcs->reset_power_profile_state)
647 ret = amdgpu_dpm_reset_power_profile_state(
648 adev, request);
649 if (ret) {
650 count = -EINVAL;
651 goto fail;
652 }
653 return count;
654 }
655
656 if (strncmp("set", buf, strlen("set")) == 0) {
657 if (adev->powerplay.pp_funcs->set_power_profile_state)
658 ret = amdgpu_dpm_set_power_profile_state(
659 adev, request);
660
661 if (ret) {
662 count = -EINVAL;
663 goto fail;
664 }
665 return count;
666 }
667 701
668 if (count + 1 >= 128) { 702 tmp[0] = *(buf);
669 count = -EINVAL; 703 tmp[1] = '\0';
704 ret = kstrtol(tmp, 0, &profile_mode);
705 if (ret)
670 goto fail; 706 goto fail;
671 }
672
673 memcpy(buf_cpy, buf, count + 1);
674 tmp_str = buf_cpy;
675
676 while (tmp_str[0]) {
677 sub_str = strsep(&tmp_str, delimiter);
678 ret = kstrtol(sub_str, 0, &value);
679 if (ret) {
680 count = -EINVAL;
681 goto fail;
682 }
683 707
684 switch (loop) { 708 if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
685 case 0: 709 if (count < 2 || count > 127)
686 /* input unit MHz convert to dpm table unit 10KHz*/ 710 return -EINVAL;
687 request->min_sclk = (uint32_t)value * 100; 711 while (isspace(*++buf))
688 break; 712 i++;
689 case 1: 713 memcpy(buf_cpy, buf, count-i);
690 /* input unit MHz convert to dpm table unit 10KHz*/ 714 tmp_str = buf_cpy;
691 request->min_mclk = (uint32_t)value * 100; 715 while (tmp_str[0]) {
692 break; 716 sub_str = strsep(&tmp_str, delimiter);
693 case 2: 717 ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
694 request->activity_threshold = (uint16_t)value; 718 if (ret) {
695 break; 719 count = -EINVAL;
696 case 3: 720 goto fail;
697 request->up_hyst = (uint8_t)value; 721 }
698 break; 722 parameter_size++;
699 case 4: 723 while (isspace(*tmp_str))
700 request->down_hyst = (uint8_t)value; 724 tmp_str++;
701 break;
702 default:
703 break;
704 } 725 }
705
706 loop++;
707 } 726 }
708 if (adev->powerplay.pp_funcs->set_power_profile_state) 727 parameter[parameter_size] = profile_mode;
709 ret = amdgpu_dpm_set_power_profile_state(adev, request); 728 if (adev->powerplay.pp_funcs->set_power_profile_mode)
710 729 ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
711 if (ret)
712 count = -EINVAL;
713 730
731 if (!ret)
732 return count;
714fail: 733fail:
715 return count; 734 return -EINVAL;
716}
717
718static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
719 struct device_attribute *attr,
720 const char *buf,
721 size_t count)
722{
723 struct amd_pp_profile request = {0};
724
725 request.type = AMD_PP_GFX_PROFILE;
726
727 return amdgpu_set_pp_power_profile(dev, buf, count, &request);
728}
729
730static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
731 struct device_attribute *attr,
732 const char *buf,
733 size_t count)
734{
735 struct amd_pp_profile request = {0};
736
737 request.type = AMD_PP_COMPUTE_PROFILE;
738
739 return amdgpu_set_pp_power_profile(dev, buf, count, &request);
740} 735}
741 736
742static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); 737static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
@@ -766,12 +761,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
766static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR, 761static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
767 amdgpu_get_pp_mclk_od, 762 amdgpu_get_pp_mclk_od,
768 amdgpu_set_pp_mclk_od); 763 amdgpu_set_pp_mclk_od);
769static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR, 764static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
770 amdgpu_get_pp_gfx_power_profile, 765 amdgpu_get_pp_power_profile_mode,
771 amdgpu_set_pp_gfx_power_profile); 766 amdgpu_set_pp_power_profile_mode);
772static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR, 767static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
773 amdgpu_get_pp_compute_power_profile, 768 amdgpu_get_pp_od_clk_voltage,
774 amdgpu_set_pp_compute_power_profile); 769 amdgpu_set_pp_od_clk_voltage);
775 770
776static ssize_t amdgpu_hwmon_show_temp(struct device *dev, 771static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
777 struct device_attribute *attr, 772 struct device_attribute *attr,
@@ -779,17 +774,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
779{ 774{
780 struct amdgpu_device *adev = dev_get_drvdata(dev); 775 struct amdgpu_device *adev = dev_get_drvdata(dev);
781 struct drm_device *ddev = adev->ddev; 776 struct drm_device *ddev = adev->ddev;
782 int temp; 777 int r, temp, size = sizeof(temp);
783 778
784 /* Can't get temperature when the card is off */ 779 /* Can't get temperature when the card is off */
785 if ((adev->flags & AMD_IS_PX) && 780 if ((adev->flags & AMD_IS_PX) &&
786 (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) 781 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
787 return -EINVAL; 782 return -EINVAL;
788 783
789 if (!adev->powerplay.pp_funcs->get_temperature) 784 /* sanity check PP is enabled */
790 temp = 0; 785 if (!(adev->powerplay.pp_funcs &&
791 else 786 adev->powerplay.pp_funcs->read_sensor))
792 temp = amdgpu_dpm_get_temperature(adev); 787 return -EINVAL;
788
789 /* get the temperature */
790 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
791 (void *)&temp, &size);
792 if (r)
793 return r;
793 794
794 return snprintf(buf, PAGE_SIZE, "%d\n", temp); 795 return snprintf(buf, PAGE_SIZE, "%d\n", temp);
795} 796}
@@ -834,6 +835,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
834 int err; 835 int err;
835 int value; 836 int value;
836 837
838 /* Can't adjust fan when the card is off */
839 if ((adev->flags & AMD_IS_PX) &&
840 (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
841 return -EINVAL;
842
837 if (!adev->powerplay.pp_funcs->set_fan_control_mode) 843 if (!adev->powerplay.pp_funcs->set_fan_control_mode)
838 return -EINVAL; 844 return -EINVAL;
839 845
@@ -868,6 +874,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
868 int err; 874 int err;
869 u32 value; 875 u32 value;
870 876
877 /* Can't adjust fan when the card is off */
878 if ((adev->flags & AMD_IS_PX) &&
879 (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
880 return -EINVAL;
881
871 err = kstrtou32(buf, 10, &value); 882 err = kstrtou32(buf, 10, &value);
872 if (err) 883 if (err)
873 return err; 884 return err;
@@ -891,6 +902,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
891 int err; 902 int err;
892 u32 speed = 0; 903 u32 speed = 0;
893 904
905 /* Can't adjust fan when the card is off */
906 if ((adev->flags & AMD_IS_PX) &&
907 (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
908 return -EINVAL;
909
894 if (adev->powerplay.pp_funcs->get_fan_speed_percent) { 910 if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
895 err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); 911 err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
896 if (err) 912 if (err)
@@ -910,6 +926,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
910 int err; 926 int err;
911 u32 speed = 0; 927 u32 speed = 0;
912 928
929 /* Can't adjust fan when the card is off */
930 if ((adev->flags & AMD_IS_PX) &&
931 (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
932 return -EINVAL;
933
913 if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { 934 if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
914 err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); 935 err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
915 if (err) 936 if (err)
@@ -919,6 +940,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
919 return sprintf(buf, "%i\n", speed); 940 return sprintf(buf, "%i\n", speed);
920} 941}
921 942
943static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
944 struct device_attribute *attr,
945 char *buf)
946{
947 struct amdgpu_device *adev = dev_get_drvdata(dev);
948 struct drm_device *ddev = adev->ddev;
949 u32 vddgfx;
950 int r, size = sizeof(vddgfx);
951
952 /* Can't get voltage when the card is off */
953 if ((adev->flags & AMD_IS_PX) &&
954 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
955 return -EINVAL;
956
957 /* sanity check PP is enabled */
958 if (!(adev->powerplay.pp_funcs &&
959 adev->powerplay.pp_funcs->read_sensor))
960 return -EINVAL;
961
962 /* get the voltage */
963 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
964 (void *)&vddgfx, &size);
965 if (r)
966 return r;
967
968 return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx);
969}
970
971static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
972 struct device_attribute *attr,
973 char *buf)
974{
975 return snprintf(buf, PAGE_SIZE, "vddgfx\n");
976}
977
978static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
979 struct device_attribute *attr,
980 char *buf)
981{
982 struct amdgpu_device *adev = dev_get_drvdata(dev);
983 struct drm_device *ddev = adev->ddev;
984 u32 vddnb;
985 int r, size = sizeof(vddnb);
986
987 /* only APUs have vddnb */
988 if (adev->flags & AMD_IS_APU)
989 return -EINVAL;
990
991 /* Can't get voltage when the card is off */
992 if ((adev->flags & AMD_IS_PX) &&
993 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
994 return -EINVAL;
995
996 /* sanity check PP is enabled */
997 if (!(adev->powerplay.pp_funcs &&
998 adev->powerplay.pp_funcs->read_sensor))
999 return -EINVAL;
1000
1001 /* get the voltage */
1002 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
1003 (void *)&vddnb, &size);
1004 if (r)
1005 return r;
1006
1007 return snprintf(buf, PAGE_SIZE, "%d\n", vddnb);
1008}
1009
1010static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev,
1011 struct device_attribute *attr,
1012 char *buf)
1013{
1014 return snprintf(buf, PAGE_SIZE, "vddnb\n");
1015}
1016
1017static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
1018 struct device_attribute *attr,
1019 char *buf)
1020{
1021 struct amdgpu_device *adev = dev_get_drvdata(dev);
1022 struct drm_device *ddev = adev->ddev;
1023 struct pp_gpu_power query = {0};
1024 int r, size = sizeof(query);
1025 unsigned uw;
1026
1027 /* Can't get power when the card is off */
1028 if ((adev->flags & AMD_IS_PX) &&
1029 (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
1030 return -EINVAL;
1031
1032 /* sanity check PP is enabled */
1033 if (!(adev->powerplay.pp_funcs &&
1034 adev->powerplay.pp_funcs->read_sensor))
1035 return -EINVAL;
1036
1037 /* get the voltage */
1038 r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
1039 (void *)&query, &size);
1040 if (r)
1041 return r;
1042
1043 /* convert to microwatts */
1044 uw = (query.average_gpu_power >> 8) * 1000000;
1045
1046 return snprintf(buf, PAGE_SIZE, "%u\n", uw);
1047}
1048
1049static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
1050 struct device_attribute *attr,
1051 char *buf)
1052{
1053 return sprintf(buf, "%i\n", 0);
1054}
1055
1056static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
1057 struct device_attribute *attr,
1058 char *buf)
1059{
1060 struct amdgpu_device *adev = dev_get_drvdata(dev);
1061 uint32_t limit = 0;
1062
1063 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
1064 adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
1065 return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
1066 } else {
1067 return snprintf(buf, PAGE_SIZE, "\n");
1068 }
1069}
1070
1071static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
1072 struct device_attribute *attr,
1073 char *buf)
1074{
1075 struct amdgpu_device *adev = dev_get_drvdata(dev);
1076 uint32_t limit = 0;
1077
1078 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
1079 adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
1080 return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
1081 } else {
1082 return snprintf(buf, PAGE_SIZE, "\n");
1083 }
1084}
1085
1086
1087static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
1088 struct device_attribute *attr,
1089 const char *buf,
1090 size_t count)
1091{
1092 struct amdgpu_device *adev = dev_get_drvdata(dev);
1093 int err;
1094 u32 value;
1095
1096 err = kstrtou32(buf, 10, &value);
1097 if (err)
1098 return err;
1099
1100 value = value / 1000000; /* convert to Watt */
1101 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
1102 err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
1103 if (err)
1104 return err;
1105 } else {
1106 return -EINVAL;
1107 }
1108
1109 return count;
1110}
1111
922static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); 1112static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
923static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); 1113static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
924static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); 1114static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -927,6 +1117,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
927static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); 1117static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
928static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0); 1118static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
929static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0); 1119static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
1120static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
1121static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
1122static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
1123static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0);
1124static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0);
1125static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
1126static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
1127static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
930 1128
931static struct attribute *hwmon_attributes[] = { 1129static struct attribute *hwmon_attributes[] = {
932 &sensor_dev_attr_temp1_input.dev_attr.attr, 1130 &sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -937,6 +1135,14 @@ static struct attribute *hwmon_attributes[] = {
937 &sensor_dev_attr_pwm1_min.dev_attr.attr, 1135 &sensor_dev_attr_pwm1_min.dev_attr.attr,
938 &sensor_dev_attr_pwm1_max.dev_attr.attr, 1136 &sensor_dev_attr_pwm1_max.dev_attr.attr,
939 &sensor_dev_attr_fan1_input.dev_attr.attr, 1137 &sensor_dev_attr_fan1_input.dev_attr.attr,
1138 &sensor_dev_attr_in0_input.dev_attr.attr,
1139 &sensor_dev_attr_in0_label.dev_attr.attr,
1140 &sensor_dev_attr_in1_input.dev_attr.attr,
1141 &sensor_dev_attr_in1_label.dev_attr.attr,
1142 &sensor_dev_attr_power1_average.dev_attr.attr,
1143 &sensor_dev_attr_power1_cap_max.dev_attr.attr,
1144 &sensor_dev_attr_power1_cap_min.dev_attr.attr,
1145 &sensor_dev_attr_power1_cap.dev_attr.attr,
940 NULL 1146 NULL
941}; 1147};
942 1148
@@ -947,9 +1153,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
947 struct amdgpu_device *adev = dev_get_drvdata(dev); 1153 struct amdgpu_device *adev = dev_get_drvdata(dev);
948 umode_t effective_mode = attr->mode; 1154 umode_t effective_mode = attr->mode;
949 1155
950 /* no skipping for powerplay */ 1156 /* handle non-powerplay limitations */
951 if (adev->powerplay.cgs_device) 1157 if (!adev->powerplay.pp_handle) {
952 return effective_mode; 1158 /* Skip fan attributes if fan is not present */
1159 if (adev->pm.no_fan &&
1160 (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
1161 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
1162 attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
1163 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
1164 return 0;
1165 /* requires powerplay */
1166 if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
1167 return 0;
1168 }
953 1169
954 /* Skip limit attributes if DPM is not enabled */ 1170 /* Skip limit attributes if DPM is not enabled */
955 if (!adev->pm.dpm_enabled && 1171 if (!adev->pm.dpm_enabled &&
@@ -961,14 +1177,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
961 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) 1177 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
962 return 0; 1178 return 0;
963 1179
964 /* Skip fan attributes if fan is not present */
965 if (adev->pm.no_fan &&
966 (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
967 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
968 attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
969 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
970 return 0;
971
972 /* mask fan attributes if we have no bindings for this asic to expose */ 1180 /* mask fan attributes if we have no bindings for this asic to expose */
973 if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && 1181 if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
974 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ 1182 attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
@@ -982,6 +1190,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
982 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ 1190 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
983 effective_mode &= ~S_IWUSR; 1191 effective_mode &= ~S_IWUSR;
984 1192
1193 if ((adev->flags & AMD_IS_APU) &&
1194 (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
1195 attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
1196 attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
1197 return 0;
1198
985 /* hide max/min values if we can't both query and manage the fan */ 1199 /* hide max/min values if we can't both query and manage the fan */
986 if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && 1200 if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
987 !adev->powerplay.pp_funcs->get_fan_speed_percent) && 1201 !adev->powerplay.pp_funcs->get_fan_speed_percent) &&
@@ -989,8 +1203,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
989 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) 1203 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
990 return 0; 1204 return 0;
991 1205
992 /* requires powerplay */ 1206 /* only APUs have vddnb */
993 if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) 1207 if (!(adev->flags & AMD_IS_APU) &&
1208 (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
1209 attr == &sensor_dev_attr_in1_label.dev_attr.attr))
994 return 0; 1210 return 0;
995 1211
996 return effective_mode; 1212 return effective_mode;
@@ -1013,13 +1229,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
1013 pm.dpm.thermal.work); 1229 pm.dpm.thermal.work);
1014 /* switch to the thermal state */ 1230 /* switch to the thermal state */
1015 enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; 1231 enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL;
1232 int temp, size = sizeof(temp);
1016 1233
1017 if (!adev->pm.dpm_enabled) 1234 if (!adev->pm.dpm_enabled)
1018 return; 1235 return;
1019 1236
1020 if (adev->powerplay.pp_funcs->get_temperature) { 1237 if (adev->powerplay.pp_funcs &&
1021 int temp = amdgpu_dpm_get_temperature(adev); 1238 adev->powerplay.pp_funcs->read_sensor &&
1022 1239 !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
1240 (void *)&temp, &size)) {
1023 if (temp < adev->pm.dpm.thermal.min_temp) 1241 if (temp < adev->pm.dpm.thermal.min_temp)
1024 /* switch back the user state */ 1242 /* switch back the user state */
1025 dpm_state = adev->pm.dpm.user_state; 1243 dpm_state = adev->pm.dpm.user_state;
@@ -1319,9 +1537,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
1319 if (adev->pm.dpm_enabled == 0) 1537 if (adev->pm.dpm_enabled == 0)
1320 return 0; 1538 return 0;
1321 1539
1322 if (adev->powerplay.pp_funcs->get_temperature == NULL)
1323 return 0;
1324
1325 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, 1540 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
1326 DRIVER_NAME, adev, 1541 DRIVER_NAME, adev,
1327 hwmon_groups); 1542 hwmon_groups);
@@ -1391,20 +1606,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
1391 return ret; 1606 return ret;
1392 } 1607 }
1393 ret = device_create_file(adev->dev, 1608 ret = device_create_file(adev->dev,
1394 &dev_attr_pp_gfx_power_profile); 1609 &dev_attr_pp_power_profile_mode);
1395 if (ret) { 1610 if (ret) {
1396 DRM_ERROR("failed to create device file " 1611 DRM_ERROR("failed to create device file "
1397 "pp_gfx_power_profile\n"); 1612 "pp_power_profile_mode\n");
1398 return ret; 1613 return ret;
1399 } 1614 }
1400 ret = device_create_file(adev->dev, 1615 ret = device_create_file(adev->dev,
1401 &dev_attr_pp_compute_power_profile); 1616 &dev_attr_pp_od_clk_voltage);
1402 if (ret) { 1617 if (ret) {
1403 DRM_ERROR("failed to create device file " 1618 DRM_ERROR("failed to create device file "
1404 "pp_compute_power_profile\n"); 1619 "pp_od_clk_voltage\n");
1405 return ret; 1620 return ret;
1406 } 1621 }
1407
1408 ret = amdgpu_debugfs_pm_init(adev); 1622 ret = amdgpu_debugfs_pm_init(adev);
1409 if (ret) { 1623 if (ret) {
1410 DRM_ERROR("Failed to register debugfs file for dpm!\n"); 1624 DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1437,9 +1651,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
1437 device_remove_file(adev->dev, &dev_attr_pp_sclk_od); 1651 device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
1438 device_remove_file(adev->dev, &dev_attr_pp_mclk_od); 1652 device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
1439 device_remove_file(adev->dev, 1653 device_remove_file(adev->dev,
1440 &dev_attr_pp_gfx_power_profile); 1654 &dev_attr_pp_power_profile_mode);
1441 device_remove_file(adev->dev, 1655 device_remove_file(adev->dev,
1442 &dev_attr_pp_compute_power_profile); 1656 &dev_attr_pp_od_clk_voltage);
1443} 1657}
1444 1658
1445void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) 1659void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1462,7 +1676,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
1462 } 1676 }
1463 1677
1464 if (adev->powerplay.pp_funcs->dispatch_tasks) { 1678 if (adev->powerplay.pp_funcs->dispatch_tasks) {
1465 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL); 1679 amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
1466 } else { 1680 } else {
1467 mutex_lock(&adev->pm.mutex); 1681 mutex_lock(&adev->pm.mutex);
1468 adev->pm.dpm.new_active_crtcs = 0; 1682 adev->pm.dpm.new_active_crtcs = 0;
@@ -1512,6 +1726,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
1512 seq_printf(m, "\t%u MHz (MCLK)\n", value/100); 1726 seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
1513 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size)) 1727 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
1514 seq_printf(m, "\t%u MHz (SCLK)\n", value/100); 1728 seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
1729 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size))
1730 seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100);
1731 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size))
1732 seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100);
1515 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size)) 1733 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
1516 seq_printf(m, "\t%u mV (VDDGFX)\n", value); 1734 seq_printf(m, "\t%u mV (VDDGFX)\n", value);
1517 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) 1735 if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
deleted file mode 100644
index 5f5aa5fddc16..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ /dev/null
@@ -1,290 +0,0 @@
1/*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25#include "atom.h"
26#include "amdgpu.h"
27#include "amd_shared.h"
28#include <linux/module.h>
29#include <linux/moduleparam.h>
30#include "amdgpu_pm.h"
31#include <drm/amdgpu_drm.h>
32#include "amdgpu_powerplay.h"
33#include "si_dpm.h"
34#include "cik_dpm.h"
35#include "vi_dpm.h"
36
37static int amdgpu_pp_early_init(void *handle)
38{
39 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
40 struct amd_powerplay *amd_pp;
41 int ret = 0;
42
43 amd_pp = &(adev->powerplay);
44 amd_pp->pp_handle = (void *)adev;
45
46 switch (adev->asic_type) {
47 case CHIP_POLARIS11:
48 case CHIP_POLARIS10:
49 case CHIP_POLARIS12:
50 case CHIP_TONGA:
51 case CHIP_FIJI:
52 case CHIP_TOPAZ:
53 case CHIP_CARRIZO:
54 case CHIP_STONEY:
55 case CHIP_VEGA10:
56 case CHIP_RAVEN:
57 amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
58 amd_pp->ip_funcs = &pp_ip_funcs;
59 amd_pp->pp_funcs = &pp_dpm_funcs;
60 break;
61 /* These chips don't have powerplay implemenations */
62#ifdef CONFIG_DRM_AMDGPU_SI
63 case CHIP_TAHITI:
64 case CHIP_PITCAIRN:
65 case CHIP_VERDE:
66 case CHIP_OLAND:
67 case CHIP_HAINAN:
68 amd_pp->ip_funcs = &si_dpm_ip_funcs;
69 amd_pp->pp_funcs = &si_dpm_funcs;
70 break;
71#endif
72#ifdef CONFIG_DRM_AMDGPU_CIK
73 case CHIP_BONAIRE:
74 case CHIP_HAWAII:
75 if (amdgpu_dpm == -1) {
76 amd_pp->ip_funcs = &ci_dpm_ip_funcs;
77 amd_pp->pp_funcs = &ci_dpm_funcs;
78 } else {
79 amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
80 amd_pp->ip_funcs = &pp_ip_funcs;
81 amd_pp->pp_funcs = &pp_dpm_funcs;
82 }
83 break;
84 case CHIP_KABINI:
85 case CHIP_MULLINS:
86 case CHIP_KAVERI:
87 amd_pp->ip_funcs = &kv_dpm_ip_funcs;
88 amd_pp->pp_funcs = &kv_dpm_funcs;
89 break;
90#endif
91 default:
92 ret = -EINVAL;
93 break;
94 }
95
96 if (adev->powerplay.ip_funcs->early_init)
97 ret = adev->powerplay.ip_funcs->early_init(
98 amd_pp->cgs_device ? amd_pp->cgs_device :
99 amd_pp->pp_handle);
100
101 return ret;
102}
103
104
105static int amdgpu_pp_late_init(void *handle)
106{
107 int ret = 0;
108 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
109
110 if (adev->powerplay.ip_funcs->late_init)
111 ret = adev->powerplay.ip_funcs->late_init(
112 adev->powerplay.pp_handle);
113
114 return ret;
115}
116
117static int amdgpu_pp_sw_init(void *handle)
118{
119 int ret = 0;
120 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
121
122 if (adev->powerplay.ip_funcs->sw_init)
123 ret = adev->powerplay.ip_funcs->sw_init(
124 adev->powerplay.pp_handle);
125
126 return ret;
127}
128
129static int amdgpu_pp_sw_fini(void *handle)
130{
131 int ret = 0;
132 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
133
134 if (adev->powerplay.ip_funcs->sw_fini)
135 ret = adev->powerplay.ip_funcs->sw_fini(
136 adev->powerplay.pp_handle);
137 if (ret)
138 return ret;
139
140 return ret;
141}
142
143static int amdgpu_pp_hw_init(void *handle)
144{
145 int ret = 0;
146 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
147
148 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
149 amdgpu_ucode_init_bo(adev);
150
151 if (adev->powerplay.ip_funcs->hw_init)
152 ret = adev->powerplay.ip_funcs->hw_init(
153 adev->powerplay.pp_handle);
154
155 return ret;
156}
157
158static int amdgpu_pp_hw_fini(void *handle)
159{
160 int ret = 0;
161 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
162
163 if (adev->powerplay.ip_funcs->hw_fini)
164 ret = adev->powerplay.ip_funcs->hw_fini(
165 adev->powerplay.pp_handle);
166
167 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
168 amdgpu_ucode_fini_bo(adev);
169
170 return ret;
171}
172
173static void amdgpu_pp_late_fini(void *handle)
174{
175 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
176
177 if (adev->powerplay.ip_funcs->late_fini)
178 adev->powerplay.ip_funcs->late_fini(
179 adev->powerplay.pp_handle);
180
181 if (adev->powerplay.cgs_device)
182 amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
183}
184
185static int amdgpu_pp_suspend(void *handle)
186{
187 int ret = 0;
188 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
189
190 if (adev->powerplay.ip_funcs->suspend)
191 ret = adev->powerplay.ip_funcs->suspend(
192 adev->powerplay.pp_handle);
193 return ret;
194}
195
196static int amdgpu_pp_resume(void *handle)
197{
198 int ret = 0;
199 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
200
201 if (adev->powerplay.ip_funcs->resume)
202 ret = adev->powerplay.ip_funcs->resume(
203 adev->powerplay.pp_handle);
204 return ret;
205}
206
207static int amdgpu_pp_set_clockgating_state(void *handle,
208 enum amd_clockgating_state state)
209{
210 int ret = 0;
211 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
212
213 if (adev->powerplay.ip_funcs->set_clockgating_state)
214 ret = adev->powerplay.ip_funcs->set_clockgating_state(
215 adev->powerplay.pp_handle, state);
216 return ret;
217}
218
219static int amdgpu_pp_set_powergating_state(void *handle,
220 enum amd_powergating_state state)
221{
222 int ret = 0;
223 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
224
225 if (adev->powerplay.ip_funcs->set_powergating_state)
226 ret = adev->powerplay.ip_funcs->set_powergating_state(
227 adev->powerplay.pp_handle, state);
228 return ret;
229}
230
231
232static bool amdgpu_pp_is_idle(void *handle)
233{
234 bool ret = true;
235 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
236
237 if (adev->powerplay.ip_funcs->is_idle)
238 ret = adev->powerplay.ip_funcs->is_idle(
239 adev->powerplay.pp_handle);
240 return ret;
241}
242
243static int amdgpu_pp_wait_for_idle(void *handle)
244{
245 int ret = 0;
246 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
247
248 if (adev->powerplay.ip_funcs->wait_for_idle)
249 ret = adev->powerplay.ip_funcs->wait_for_idle(
250 adev->powerplay.pp_handle);
251 return ret;
252}
253
254static int amdgpu_pp_soft_reset(void *handle)
255{
256 int ret = 0;
257 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
258
259 if (adev->powerplay.ip_funcs->soft_reset)
260 ret = adev->powerplay.ip_funcs->soft_reset(
261 adev->powerplay.pp_handle);
262 return ret;
263}
264
265static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
266 .name = "amdgpu_powerplay",
267 .early_init = amdgpu_pp_early_init,
268 .late_init = amdgpu_pp_late_init,
269 .sw_init = amdgpu_pp_sw_init,
270 .sw_fini = amdgpu_pp_sw_fini,
271 .hw_init = amdgpu_pp_hw_init,
272 .hw_fini = amdgpu_pp_hw_fini,
273 .late_fini = amdgpu_pp_late_fini,
274 .suspend = amdgpu_pp_suspend,
275 .resume = amdgpu_pp_resume,
276 .is_idle = amdgpu_pp_is_idle,
277 .wait_for_idle = amdgpu_pp_wait_for_idle,
278 .soft_reset = amdgpu_pp_soft_reset,
279 .set_clockgating_state = amdgpu_pp_set_clockgating_state,
280 .set_powergating_state = amdgpu_pp_set_powergating_state,
281};
282
283const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
284{
285 .type = AMD_IP_BLOCK_TYPE_SMC,
286 .major = 1,
287 .minor = 0,
288 .rev = 0,
289 .funcs = &amdgpu_pp_ip_funcs,
290};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index ae9c106979d7..4b584cb75bf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -26,9 +26,12 @@
26#include <drm/drmP.h> 26#include <drm/drmP.h>
27 27
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_display.h"
29#include <drm/amdgpu_drm.h> 30#include <drm/amdgpu_drm.h>
30#include <linux/dma-buf.h> 31#include <linux/dma-buf.h>
31 32
33static const struct dma_buf_ops amdgpu_dmabuf_ops;
34
32struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) 35struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
33{ 36{
34 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 37 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -102,59 +105,95 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
102 int ret; 105 int ret;
103 106
104 ww_mutex_lock(&resv->lock, NULL); 107 ww_mutex_lock(&resv->lock, NULL);
105 ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, 108 ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
106 AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); 109 AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
107 ww_mutex_unlock(&resv->lock); 110 resv, &bo);
108 if (ret) 111 if (ret)
109 return ERR_PTR(ret); 112 goto error;
110 113
111 bo->prime_shared_count = 1; 114 bo->tbo.sg = sg;
115 bo->tbo.ttm->sg = sg;
116 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
117 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
118 if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
119 bo->prime_shared_count = 1;
120
121 ww_mutex_unlock(&resv->lock);
112 return &bo->gem_base; 122 return &bo->gem_base;
123
124error:
125 ww_mutex_unlock(&resv->lock);
126 return ERR_PTR(ret);
113} 127}
114 128
115int amdgpu_gem_prime_pin(struct drm_gem_object *obj) 129static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
130 struct device *target_dev,
131 struct dma_buf_attachment *attach)
116{ 132{
133 struct drm_gem_object *obj = dma_buf->priv;
117 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 134 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
118 long ret = 0; 135 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
119 136 long r;
120 ret = amdgpu_bo_reserve(bo, false); 137
121 if (unlikely(ret != 0)) 138 r = drm_gem_map_attach(dma_buf, target_dev, attach);
122 return ret; 139 if (r)
123 140 return r;
124 /* 141
125 * Wait for all shared fences to complete before we switch to future 142 r = amdgpu_bo_reserve(bo, false);
126 * use of exclusive fence on this prime shared bo. 143 if (unlikely(r != 0))
127 */ 144 goto error_detach;
128 ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, 145
129 MAX_SCHEDULE_TIMEOUT); 146
130 if (unlikely(ret < 0)) { 147 if (attach->dev->driver != adev->dev->driver) {
131 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); 148 /*
132 amdgpu_bo_unreserve(bo); 149 * Wait for all shared fences to complete before we switch to future
133 return ret; 150 * use of exclusive fence on this prime shared bo.
151 */
152 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
153 true, false,
154 MAX_SCHEDULE_TIMEOUT);
155 if (unlikely(r < 0)) {
156 DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
157 goto error_unreserve;
158 }
134 } 159 }
135 160
136 /* pin buffer into GTT */ 161 /* pin buffer into GTT */
137 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); 162 r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
138 if (likely(ret == 0)) 163 if (r)
164 goto error_unreserve;
165
166 if (attach->dev->driver != adev->dev->driver)
139 bo->prime_shared_count++; 167 bo->prime_shared_count++;
140 168
169error_unreserve:
141 amdgpu_bo_unreserve(bo); 170 amdgpu_bo_unreserve(bo);
142 return ret; 171
172error_detach:
173 if (r)
174 drm_gem_map_detach(dma_buf, attach);
175 return r;
143} 176}
144 177
145void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) 178static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
179 struct dma_buf_attachment *attach)
146{ 180{
181 struct drm_gem_object *obj = dma_buf->priv;
147 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); 182 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
183 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
148 int ret = 0; 184 int ret = 0;
149 185
150 ret = amdgpu_bo_reserve(bo, true); 186 ret = amdgpu_bo_reserve(bo, true);
151 if (unlikely(ret != 0)) 187 if (unlikely(ret != 0))
152 return; 188 goto error;
153 189
154 amdgpu_bo_unpin(bo); 190 amdgpu_bo_unpin(bo);
155 if (bo->prime_shared_count) 191 if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
156 bo->prime_shared_count--; 192 bo->prime_shared_count--;
157 amdgpu_bo_unreserve(bo); 193 amdgpu_bo_unreserve(bo);
194
195error:
196 drm_gem_map_detach(dma_buf, attach);
158} 197}
159 198
160struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) 199struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
@@ -164,6 +203,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
164 return bo->tbo.resv; 203 return bo->tbo.resv;
165} 204}
166 205
206static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
207 enum dma_data_direction direction)
208{
209 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
210 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
211 struct ttm_operation_ctx ctx = { true, false };
212 u32 domain = amdgpu_display_framebuffer_domains(adev);
213 int ret;
214 bool reads = (direction == DMA_BIDIRECTIONAL ||
215 direction == DMA_FROM_DEVICE);
216
217 if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
218 return 0;
219
220 /* move to gtt */
221 ret = amdgpu_bo_reserve(bo, false);
222 if (unlikely(ret != 0))
223 return ret;
224
225 if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
226 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
227 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
228 }
229
230 amdgpu_bo_unreserve(bo);
231 return ret;
232}
233
234static const struct dma_buf_ops amdgpu_dmabuf_ops = {
235 .attach = amdgpu_gem_map_attach,
236 .detach = amdgpu_gem_map_detach,
237 .map_dma_buf = drm_gem_map_dma_buf,
238 .unmap_dma_buf = drm_gem_unmap_dma_buf,
239 .release = drm_gem_dmabuf_release,
240 .begin_cpu_access = amdgpu_gem_begin_cpu_access,
241 .map = drm_gem_dmabuf_kmap,
242 .map_atomic = drm_gem_dmabuf_kmap_atomic,
243 .unmap = drm_gem_dmabuf_kunmap,
244 .unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
245 .mmap = drm_gem_dmabuf_mmap,
246 .vmap = drm_gem_dmabuf_vmap,
247 .vunmap = drm_gem_dmabuf_vunmap,
248};
249
167struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, 250struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
168 struct drm_gem_object *gobj, 251 struct drm_gem_object *gobj,
169 int flags) 252 int flags)
@@ -176,7 +259,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
176 return ERR_PTR(-EPERM); 259 return ERR_PTR(-EPERM);
177 260
178 buf = drm_gem_prime_export(dev, gobj, flags); 261 buf = drm_gem_prime_export(dev, gobj, flags);
179 if (!IS_ERR(buf)) 262 if (!IS_ERR(buf)) {
180 buf->file->f_mapping = dev->anon_inode->i_mapping; 263 buf->file->f_mapping = dev->anon_inode->i_mapping;
264 buf->ops = &amdgpu_dmabuf_ops;
265 }
266
181 return buf; 267 return buf;
182} 268}
269
270struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
271 struct dma_buf *dma_buf)
272{
273 struct drm_gem_object *obj;
274
275 if (dma_buf->ops == &amdgpu_dmabuf_ops) {
276 obj = dma_buf->priv;
277 if (obj->dev == dev) {
278 /*
279 * Importing dmabuf exported from out own gem increases
280 * refcount on gem itself instead of f_count of dmabuf.
281 */
282 drm_gem_object_get(obj);
283 return obj;
284 }
285 }
286
287 return drm_gem_prime_import(dev, dma_buf);
288}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2157d4509e84..c7d43e064fc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -51,29 +51,11 @@ static int psp_sw_init(void *handle)
51 51
52 switch (adev->asic_type) { 52 switch (adev->asic_type) {
53 case CHIP_VEGA10: 53 case CHIP_VEGA10:
54 psp->init_microcode = psp_v3_1_init_microcode; 54 case CHIP_VEGA12:
55 psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv; 55 psp_v3_1_set_psp_funcs(psp);
56 psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
57 psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
58 psp->ring_init = psp_v3_1_ring_init;
59 psp->ring_create = psp_v3_1_ring_create;
60 psp->ring_stop = psp_v3_1_ring_stop;
61 psp->ring_destroy = psp_v3_1_ring_destroy;
62 psp->cmd_submit = psp_v3_1_cmd_submit;
63 psp->compare_sram_data = psp_v3_1_compare_sram_data;
64 psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
65 psp->mode1_reset = psp_v3_1_mode1_reset;
66 break; 56 break;
67 case CHIP_RAVEN: 57 case CHIP_RAVEN:
68 psp->init_microcode = psp_v10_0_init_microcode; 58 psp_v10_0_set_psp_funcs(psp);
69 psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
70 psp->ring_init = psp_v10_0_ring_init;
71 psp->ring_create = psp_v10_0_ring_create;
72 psp->ring_stop = psp_v10_0_ring_stop;
73 psp->ring_destroy = psp_v10_0_ring_destroy;
74 psp->cmd_submit = psp_v10_0_cmd_submit;
75 psp->compare_sram_data = psp_v10_0_compare_sram_data;
76 psp->mode1_reset = psp_v10_0_mode1_reset;
77 break; 59 break;
78 default: 60 default:
79 return -EINVAL; 61 return -EINVAL;
@@ -81,6 +63,9 @@ static int psp_sw_init(void *handle)
81 63
82 psp->adev = adev; 64 psp->adev = adev;
83 65
66 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
67 return 0;
68
84 ret = psp_init_microcode(psp); 69 ret = psp_init_microcode(psp);
85 if (ret) { 70 if (ret) {
86 DRM_ERROR("Failed to load psp firmware!\n"); 71 DRM_ERROR("Failed to load psp firmware!\n");
@@ -94,6 +79,9 @@ static int psp_sw_fini(void *handle)
94{ 79{
95 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 80 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
96 81
82 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
83 return 0;
84
97 release_firmware(adev->psp.sos_fw); 85 release_firmware(adev->psp.sos_fw);
98 adev->psp.sos_fw = NULL; 86 adev->psp.sos_fw = NULL;
99 release_firmware(adev->psp.asd_fw); 87 release_firmware(adev->psp.asd_fw);
@@ -472,6 +460,9 @@ static int psp_suspend(void *handle)
472 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 460 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
473 struct psp_context *psp = &adev->psp; 461 struct psp_context *psp = &adev->psp;
474 462
463 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
464 return 0;
465
475 ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); 466 ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
476 if (ret) { 467 if (ret) {
477 DRM_ERROR("PSP ring stop failed\n"); 468 DRM_ERROR("PSP ring stop failed\n");
@@ -512,19 +503,11 @@ failed:
512 return ret; 503 return ret;
513} 504}
514 505
515static bool psp_check_reset(void* handle) 506int psp_gpu_reset(struct amdgpu_device *adev)
516{ 507{
517 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 508 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
518 509 return 0;
519 if (adev->flags & AMD_IS_APU)
520 return true;
521
522 return false;
523}
524 510
525static int psp_reset(void* handle)
526{
527 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
528 return psp_mode1_reset(&adev->psp); 511 return psp_mode1_reset(&adev->psp);
529} 512}
530 513
@@ -571,9 +554,9 @@ const struct amd_ip_funcs psp_ip_funcs = {
571 .suspend = psp_suspend, 554 .suspend = psp_suspend,
572 .resume = psp_resume, 555 .resume = psp_resume,
573 .is_idle = NULL, 556 .is_idle = NULL,
574 .check_soft_reset = psp_check_reset, 557 .check_soft_reset = NULL,
575 .wait_for_idle = NULL, 558 .wait_for_idle = NULL,
576 .soft_reset = psp_reset, 559 .soft_reset = NULL,
577 .set_clockgating_state = psp_set_clockgating_state, 560 .set_clockgating_state = psp_set_clockgating_state,
578 .set_powergating_state = psp_set_powergating_state, 561 .set_powergating_state = psp_set_powergating_state,
579}; 562};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index ce4654550416..129209686848 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -33,6 +33,8 @@
33#define PSP_ASD_SHARED_MEM_SIZE 0x4000 33#define PSP_ASD_SHARED_MEM_SIZE 0x4000
34#define PSP_1_MEG 0x100000 34#define PSP_1_MEG 0x100000
35 35
36struct psp_context;
37
36enum psp_ring_type 38enum psp_ring_type
37{ 39{
38 PSP_RING_TYPE__INVALID = 0, 40 PSP_RING_TYPE__INVALID = 0,
@@ -53,12 +55,8 @@ struct psp_ring
53 uint32_t ring_size; 55 uint32_t ring_size;
54}; 56};
55 57
56struct psp_context 58struct psp_funcs
57{ 59{
58 struct amdgpu_device *adev;
59 struct psp_ring km_ring;
60 struct psp_gfx_cmd_resp *cmd;
61
62 int (*init_microcode)(struct psp_context *psp); 60 int (*init_microcode)(struct psp_context *psp);
63 int (*bootloader_load_sysdrv)(struct psp_context *psp); 61 int (*bootloader_load_sysdrv)(struct psp_context *psp);
64 int (*bootloader_load_sos)(struct psp_context *psp); 62 int (*bootloader_load_sos)(struct psp_context *psp);
@@ -77,6 +75,15 @@ struct psp_context
77 enum AMDGPU_UCODE_ID ucode_type); 75 enum AMDGPU_UCODE_ID ucode_type);
78 bool (*smu_reload_quirk)(struct psp_context *psp); 76 bool (*smu_reload_quirk)(struct psp_context *psp);
79 int (*mode1_reset)(struct psp_context *psp); 77 int (*mode1_reset)(struct psp_context *psp);
78};
79
80struct psp_context
81{
82 struct amdgpu_device *adev;
83 struct psp_ring km_ring;
84 struct psp_gfx_cmd_resp *cmd;
85
86 const struct psp_funcs *funcs;
80 87
81 /* fence buffer */ 88 /* fence buffer */
82 struct amdgpu_bo *fw_pri_bo; 89 struct amdgpu_bo *fw_pri_bo;
@@ -123,25 +130,25 @@ struct amdgpu_psp_funcs {
123 enum AMDGPU_UCODE_ID); 130 enum AMDGPU_UCODE_ID);
124}; 131};
125 132
126#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) 133#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
127#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) 134#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
128#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) 135#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
129#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type)) 136#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
130#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) 137#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
131#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ 138#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
132 (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) 139 (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
133#define psp_compare_sram_data(psp, ucode, type) \ 140#define psp_compare_sram_data(psp, ucode, type) \
134 (psp)->compare_sram_data((psp), (ucode), (type)) 141 (psp)->funcs->compare_sram_data((psp), (ucode), (type))
135#define psp_init_microcode(psp) \ 142#define psp_init_microcode(psp) \
136 ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0) 143 ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
137#define psp_bootloader_load_sysdrv(psp) \ 144#define psp_bootloader_load_sysdrv(psp) \
138 ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0) 145 ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
139#define psp_bootloader_load_sos(psp) \ 146#define psp_bootloader_load_sos(psp) \
140 ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) 147 ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
141#define psp_smu_reload_quirk(psp) \ 148#define psp_smu_reload_quirk(psp) \
142 ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) 149 ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
143#define psp_mode1_reset(psp) \ 150#define psp_mode1_reset(psp) \
144 ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false) 151 ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
145 152
146extern const struct amd_ip_funcs psp_ip_funcs; 153extern const struct amd_ip_funcs psp_ip_funcs;
147 154
@@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
151 158
152extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; 159extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
153 160
161int psp_gpu_reset(struct amdgpu_device *adev);
162
154#endif 163#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13044e66dcaf..d5f526f38e50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
360 360
361 amdgpu_debugfs_ring_fini(ring); 361 amdgpu_debugfs_ring_fini(ring);
362 362
363 dma_fence_put(ring->vmid_wait);
364 ring->vmid_wait = NULL;
365
363 ring->adev->rings[ring->idx] = NULL; 366 ring->adev->rings[ring->idx] = NULL;
364} 367}
365 368
@@ -481,7 +484,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
481 result = 0; 484 result = 0;
482 485
483 if (*pos < 12) { 486 if (*pos < 12) {
484 early[0] = amdgpu_ring_get_rptr(ring); 487 early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
485 early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; 488 early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
486 early[2] = ring->wptr & ring->buf_mask; 489 early[2] = ring->wptr & ring->buf_mask;
487 for (i = *pos / 4; i < 3 && size; i++) { 490 for (i = *pos / 4; i < 3 && size; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 102dad3edf6a..1a5911882657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -26,6 +26,7 @@
26 26
27#include <drm/amdgpu_drm.h> 27#include <drm/amdgpu_drm.h>
28#include <drm/gpu_scheduler.h> 28#include <drm/gpu_scheduler.h>
29#include <drm/drm_print.h>
29 30
30/* max number of rings */ 31/* max number of rings */
31#define AMDGPU_MAX_RINGS 18 32#define AMDGPU_MAX_RINGS 18
@@ -35,8 +36,9 @@
35#define AMDGPU_MAX_UVD_ENC_RINGS 2 36#define AMDGPU_MAX_UVD_ENC_RINGS 2
36 37
37/* some special values for the owner field */ 38/* some special values for the owner field */
38#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) 39#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
39#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) 40#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
41#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
40 42
41#define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 43#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
42#define AMDGPU_FENCE_FLAG_INT (1 << 1) 44#define AMDGPU_FENCE_FLAG_INT (1 << 1)
@@ -128,7 +130,6 @@ struct amdgpu_ring_funcs {
128 void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, 130 void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
129 uint64_t pd_addr); 131 uint64_t pd_addr);
130 void (*emit_hdp_flush)(struct amdgpu_ring *ring); 132 void (*emit_hdp_flush)(struct amdgpu_ring *ring);
131 void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
132 void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, 133 void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
133 uint32_t gds_base, uint32_t gds_size, 134 uint32_t gds_base, uint32_t gds_size,
134 uint32_t gws_base, uint32_t gws_size, 135 uint32_t gws_base, uint32_t gws_size,
@@ -151,6 +152,8 @@ struct amdgpu_ring_funcs {
151 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); 152 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
152 void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); 153 void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
153 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); 154 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
155 void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
156 uint32_t val, uint32_t mask);
154 void (*emit_tmz)(struct amdgpu_ring *ring, bool start); 157 void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
155 /* priority functions */ 158 /* priority functions */
156 void (*set_priority) (struct amdgpu_ring *ring, 159 void (*set_priority) (struct amdgpu_ring *ring,
@@ -195,6 +198,7 @@ struct amdgpu_ring {
195 u64 cond_exe_gpu_addr; 198 u64 cond_exe_gpu_addr;
196 volatile u32 *cond_exe_cpu_addr; 199 volatile u32 *cond_exe_cpu_addr;
197 unsigned vm_inv_eng; 200 unsigned vm_inv_eng;
201 struct dma_fence *vmid_wait;
198 bool has_compute_vm_bug; 202 bool has_compute_vm_bug;
199 203
200 atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; 204 atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 3144400435b7..fb1667b35daa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
63 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 63 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
64 INIT_LIST_HEAD(&sa_manager->flist[i]); 64 INIT_LIST_HEAD(&sa_manager->flist[i]);
65 65
66 r = amdgpu_bo_create(adev, size, align, true, domain, 66 r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
67 0, NULL, NULL, 0, &sa_manager->bo); 67 &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
68 if (r) { 68 if (r) {
69 dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); 69 dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
70 return r; 70 return r;
71 } 71 }
72 72
73 memset(sa_manager->cpu_ptr, 0, sa_manager->size);
73 return r; 74 return r;
74} 75}
75 76
76void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, 77void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
77 struct amdgpu_sa_manager *sa_manager) 78 struct amdgpu_sa_manager *sa_manager)
78{ 79{
79 struct amdgpu_sa_bo *sa_bo, *tmp; 80 struct amdgpu_sa_bo *sa_bo, *tmp;
80 81
82 if (sa_manager->bo == NULL) {
83 dev_err(adev->dev, "no bo for sa manager\n");
84 return;
85 }
86
81 if (!list_empty(&sa_manager->olist)) { 87 if (!list_empty(&sa_manager->olist)) {
82 sa_manager->hole = &sa_manager->olist, 88 sa_manager->hole = &sa_manager->olist,
83 amdgpu_sa_bo_try_free(sa_manager); 89 amdgpu_sa_bo_try_free(sa_manager);
@@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
88 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { 94 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
89 amdgpu_sa_bo_remove_locked(sa_bo); 95 amdgpu_sa_bo_remove_locked(sa_bo);
90 } 96 }
91 amdgpu_bo_unref(&sa_manager->bo);
92 sa_manager->size = 0;
93}
94
95int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
96 struct amdgpu_sa_manager *sa_manager)
97{
98 int r;
99
100 if (sa_manager->bo == NULL) {
101 dev_err(adev->dev, "no bo for sa manager\n");
102 return -EINVAL;
103 }
104 97
105 /* map the buffer */ 98 amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
106 r = amdgpu_bo_reserve(sa_manager->bo, false); 99 sa_manager->size = 0;
107 if (r) {
108 dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r);
109 return r;
110 }
111 r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
112 if (r) {
113 amdgpu_bo_unreserve(sa_manager->bo);
114 dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
115 return r;
116 }
117 r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
118 memset(sa_manager->cpu_ptr, 0, sa_manager->size);
119 amdgpu_bo_unreserve(sa_manager->bo);
120 return r;
121}
122
123int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
124 struct amdgpu_sa_manager *sa_manager)
125{
126 int r;
127
128 if (sa_manager->bo == NULL) {
129 dev_err(adev->dev, "no bo for sa manager\n");
130 return -EINVAL;
131 }
132
133 r = amdgpu_bo_reserve(sa_manager->bo, true);
134 if (!r) {
135 amdgpu_bo_kunmap(sa_manager->bo);
136 amdgpu_bo_unpin(sa_manager->bo);
137 amdgpu_bo_unreserve(sa_manager->bo);
138 }
139 return r;
140} 100}
141 101
142static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 102static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index df65c66dc956..2d6f5ec77a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -31,6 +31,7 @@
31#include <drm/drmP.h> 31#include <drm/drmP.h>
32#include "amdgpu.h" 32#include "amdgpu.h"
33#include "amdgpu_trace.h" 33#include "amdgpu_trace.h"
34#include "amdgpu_amdkfd.h"
34 35
35struct amdgpu_sync_entry { 36struct amdgpu_sync_entry {
36 struct hlist_node node; 37 struct hlist_node node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
85 */ 86 */
86static void *amdgpu_sync_get_owner(struct dma_fence *f) 87static void *amdgpu_sync_get_owner(struct dma_fence *f)
87{ 88{
88 struct drm_sched_fence *s_fence = to_drm_sched_fence(f); 89 struct drm_sched_fence *s_fence;
90 struct amdgpu_amdkfd_fence *kfd_fence;
91
92 if (!f)
93 return AMDGPU_FENCE_OWNER_UNDEFINED;
89 94
95 s_fence = to_drm_sched_fence(f);
90 if (s_fence) 96 if (s_fence)
91 return s_fence->owner; 97 return s_fence->owner;
92 98
99 kfd_fence = to_amdgpu_amdkfd_fence(f);
100 if (kfd_fence)
101 return AMDGPU_FENCE_OWNER_KFD;
102
93 return AMDGPU_FENCE_OWNER_UNDEFINED; 103 return AMDGPU_FENCE_OWNER_UNDEFINED;
94} 104}
95 105
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
204 for (i = 0; i < flist->shared_count; ++i) { 214 for (i = 0; i < flist->shared_count; ++i) {
205 f = rcu_dereference_protected(flist->shared[i], 215 f = rcu_dereference_protected(flist->shared[i],
206 reservation_object_held(resv)); 216 reservation_object_held(resv));
217 /* We only want to trigger KFD eviction fences on
218 * evict or move jobs. Skip KFD fences otherwise.
219 */
220 fence_owner = amdgpu_sync_get_owner(f);
221 if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
222 owner != AMDGPU_FENCE_OWNER_UNDEFINED)
223 continue;
224
207 if (amdgpu_sync_same_dev(adev, f)) { 225 if (amdgpu_sync_same_dev(adev, f)) {
208 /* VM updates are only interesting 226 /* VM updates are only interesting
209 * for other VM updates and moves. 227 * for other VM updates and moves.
210 */ 228 */
211 fence_owner = amdgpu_sync_get_owner(f);
212 if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && 229 if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
213 (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && 230 (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
214 ((owner == AMDGPU_FENCE_OWNER_VM) != 231 ((owner == AMDGPU_FENCE_OWNER_VM) !=
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
305 return NULL; 322 return NULL;
306} 323}
307 324
325/**
326 * amdgpu_sync_clone - clone a sync object
327 *
328 * @source: sync object to clone
329 * @clone: pointer to destination sync object
330 *
331 * Adds references to all unsignaled fences in @source to @clone. Also
332 * removes signaled fences from @source while at it.
333 */
334int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
335{
336 struct amdgpu_sync_entry *e;
337 struct hlist_node *tmp;
338 struct dma_fence *f;
339 int i, r;
340
341 hash_for_each_safe(source->fences, i, tmp, e, node) {
342 f = e->fence;
343 if (!dma_fence_is_signaled(f)) {
344 r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
345 if (r)
346 return r;
347 } else {
348 hash_del(&e->node);
349 dma_fence_put(f);
350 kmem_cache_free(amdgpu_sync_slab, e);
351 }
352 }
353
354 dma_fence_put(clone->last_vm_update);
355 clone->last_vm_update = dma_fence_get(source->last_vm_update);
356
357 return 0;
358}
359
308int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) 360int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
309{ 361{
310 struct amdgpu_sync_entry *e; 362 struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7aba38d5c9df..10cf23a57f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
50struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, 50struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
51 struct amdgpu_ring *ring); 51 struct amdgpu_ring *ring);
52struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); 52struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
53int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
53int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); 54int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
54void amdgpu_sync_free(struct amdgpu_sync *sync); 55void amdgpu_sync_free(struct amdgpu_sync *sync);
55int amdgpu_sync_init(void); 56int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index ed8c3739015b..2dbe87591f81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
42 /* Number of tests = 42 /* Number of tests =
43 * (Total GTT - IB pool - writeback page - ring buffers) / test size 43 * (Total GTT - IB pool - writeback page - ring buffers) / test size
44 */ 44 */
45 n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; 45 n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
46 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 46 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
47 if (adev->rings[i]) 47 if (adev->rings[i])
48 n -= adev->rings[i]->ring_size; 48 n -= adev->rings[i]->ring_size;
@@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
59 goto out_cleanup; 59 goto out_cleanup;
60 } 60 }
61 61
62 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 62 r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
63 AMDGPU_GEM_DOMAIN_VRAM, 0, 63 ttm_bo_type_kernel, NULL, &vram_obj);
64 NULL, NULL, 0, &vram_obj);
65 if (r) { 64 if (r) {
66 DRM_ERROR("Failed to create VRAM object\n"); 65 DRM_ERROR("Failed to create VRAM object\n");
67 goto out_cleanup; 66 goto out_cleanup;
@@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
80 void **vram_start, **vram_end; 79 void **vram_start, **vram_end;
81 struct dma_fence *fence = NULL; 80 struct dma_fence *fence = NULL;
82 81
83 r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 82 r = amdgpu_bo_create(adev, size, PAGE_SIZE,
84 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 83 AMDGPU_GEM_DOMAIN_GTT, 0,
85 NULL, 0, gtt_obj + i); 84 ttm_bo_type_kernel, NULL, gtt_obj + i);
86 if (r) { 85 if (r) {
87 DRM_ERROR("Failed to create GTT object %d\n", i); 86 DRM_ERROR("Failed to create GTT object %d\n", i);
88 goto out_lclean; 87 goto out_lclean;
@@ -142,10 +141,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
142 "0x%16llx/0x%16llx)\n", 141 "0x%16llx/0x%16llx)\n",
143 i, *vram_start, gart_start, 142 i, *vram_start, gart_start,
144 (unsigned long long) 143 (unsigned long long)
145 (gart_addr - adev->mc.gart_start + 144 (gart_addr - adev->gmc.gart_start +
146 (void*)gart_start - gtt_map), 145 (void*)gart_start - gtt_map),
147 (unsigned long long) 146 (unsigned long long)
148 (vram_addr - adev->mc.vram_start + 147 (vram_addr - adev->gmc.vram_start +
149 (void*)gart_start - gtt_map)); 148 (void*)gart_start - gtt_map));
150 amdgpu_bo_kunmap(vram_obj); 149 amdgpu_bo_kunmap(vram_obj);
151 goto out_lclean_unpin; 150 goto out_lclean_unpin;
@@ -187,10 +186,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
187 "0x%16llx/0x%16llx)\n", 186 "0x%16llx/0x%16llx)\n",
188 i, *gart_start, vram_start, 187 i, *gart_start, vram_start,
189 (unsigned long long) 188 (unsigned long long)
190 (vram_addr - adev->mc.vram_start + 189 (vram_addr - adev->gmc.vram_start +
191 (void*)vram_start - vram_map), 190 (void*)vram_start - vram_map),
192 (unsigned long long) 191 (unsigned long long)
193 (gart_addr - adev->mc.gart_start + 192 (gart_addr - adev->gmc.gart_start +
194 (void*)vram_start - vram_map)); 193 (void*)vram_start - vram_map));
195 amdgpu_bo_kunmap(gtt_obj[i]); 194 amdgpu_bo_kunmap(gtt_obj[i]);
196 goto out_lclean_unpin; 195 goto out_lclean_unpin;
@@ -200,7 +199,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
200 amdgpu_bo_kunmap(gtt_obj[i]); 199 amdgpu_bo_kunmap(gtt_obj[i]);
201 200
202 DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", 201 DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
203 gart_addr - adev->mc.gart_start); 202 gart_addr - adev->gmc.gart_start);
204 continue; 203 continue;
205 204
206out_lclean_unpin: 205out_lclean_unpin:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index cace7a93fc94..532263ab6e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv,
86 __field(unsigned, vmid_src) 86 __field(unsigned, vmid_src)
87 __field(uint64_t, timestamp) 87 __field(uint64_t, timestamp)
88 __field(unsigned, timestamp_src) 88 __field(unsigned, timestamp_src)
89 __field(unsigned, pas_id) 89 __field(unsigned, pasid)
90 __array(unsigned, src_data, 4) 90 __array(unsigned, src_data, 4)
91 ), 91 ),
92 TP_fast_assign( 92 TP_fast_assign(
@@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv,
97 __entry->vmid_src = iv->vmid_src; 97 __entry->vmid_src = iv->vmid_src;
98 __entry->timestamp = iv->timestamp; 98 __entry->timestamp = iv->timestamp;
99 __entry->timestamp_src = iv->timestamp_src; 99 __entry->timestamp_src = iv->timestamp_src;
100 __entry->pas_id = iv->pas_id; 100 __entry->pasid = iv->pasid;
101 __entry->src_data[0] = iv->src_data[0]; 101 __entry->src_data[0] = iv->src_data[0];
102 __entry->src_data[1] = iv->src_data[1]; 102 __entry->src_data[1] = iv->src_data[1];
103 __entry->src_data[2] = iv->src_data[2]; 103 __entry->src_data[2] = iv->src_data[2];
104 __entry->src_data[3] = iv->src_data[3]; 104 __entry->src_data[3] = iv->src_data[3];
105 ), 105 ),
106 TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", 106 TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
107 __entry->client_id, __entry->src_id, 107 __entry->client_id, __entry->src_id,
108 __entry->ring_id, __entry->vmid, 108 __entry->ring_id, __entry->vmid,
109 __entry->timestamp, __entry->pas_id, 109 __entry->timestamp, __entry->pasid,
110 __entry->src_data[0], __entry->src_data[1], 110 __entry->src_data[0], __entry->src_data[1],
111 __entry->src_data[2], __entry->src_data[3]) 111 __entry->src_data[2], __entry->src_data[3])
112); 112);
@@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
217 struct amdgpu_job *job), 217 struct amdgpu_job *job),
218 TP_ARGS(vm, ring, job), 218 TP_ARGS(vm, ring, job),
219 TP_STRUCT__entry( 219 TP_STRUCT__entry(
220 __field(struct amdgpu_vm *, vm) 220 __field(u32, pasid)
221 __field(u32, ring) 221 __field(u32, ring)
222 __field(u32, vmid) 222 __field(u32, vmid)
223 __field(u32, vm_hub) 223 __field(u32, vm_hub)
@@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id,
226 ), 226 ),
227 227
228 TP_fast_assign( 228 TP_fast_assign(
229 __entry->vm = vm; 229 __entry->pasid = vm->pasid;
230 __entry->ring = ring->idx; 230 __entry->ring = ring->idx;
231 __entry->vmid = job->vmid; 231 __entry->vmid = job->vmid;
232 __entry->vm_hub = ring->funcs->vmhub, 232 __entry->vm_hub = ring->funcs->vmhub,
233 __entry->pd_addr = job->vm_pd_addr; 233 __entry->pd_addr = job->vm_pd_addr;
234 __entry->needs_flush = job->vm_needs_flush; 234 __entry->needs_flush = job->vm_needs_flush;
235 ), 235 ),
236 TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", 236 TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
237 __entry->vm, __entry->ring, __entry->vmid, 237 __entry->pasid, __entry->ring, __entry->vmid,
238 __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) 238 __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
239); 239);
240 240
@@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush,
378 __entry->vm_hub,__entry->pd_addr) 378 __entry->vm_hub,__entry->pd_addr)
379); 379);
380 380
381DECLARE_EVENT_CLASS(amdgpu_pasid,
382 TP_PROTO(unsigned pasid),
383 TP_ARGS(pasid),
384 TP_STRUCT__entry(
385 __field(unsigned, pasid)
386 ),
387 TP_fast_assign(
388 __entry->pasid = pasid;
389 ),
390 TP_printk("pasid=%u", __entry->pasid)
391);
392
393DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
394 TP_PROTO(unsigned pasid),
395 TP_ARGS(pasid)
396);
397
398DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
399 TP_PROTO(unsigned pasid),
400 TP_ARGS(pasid)
401);
402
381TRACE_EVENT(amdgpu_bo_list_set, 403TRACE_EVENT(amdgpu_bo_list_set,
382 TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), 404 TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
383 TP_ARGS(list, bo), 405 TP_ARGS(list, bo),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e4bb435e614b..205da3ff9cd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -46,6 +46,7 @@
46#include "amdgpu.h" 46#include "amdgpu.h"
47#include "amdgpu_object.h" 47#include "amdgpu_object.h"
48#include "amdgpu_trace.h" 48#include "amdgpu_trace.h"
49#include "amdgpu_amdkfd.h"
49#include "bif/bif_4_1_d.h" 50#include "bif/bif_4_1_d.h"
50 51
51#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) 52#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
161 break; 162 break;
162 case TTM_PL_TT: 163 case TTM_PL_TT:
163 man->func = &amdgpu_gtt_mgr_func; 164 man->func = &amdgpu_gtt_mgr_func;
164 man->gpu_offset = adev->mc.gart_start; 165 man->gpu_offset = adev->gmc.gart_start;
165 man->available_caching = TTM_PL_MASK_CACHING; 166 man->available_caching = TTM_PL_MASK_CACHING;
166 man->default_caching = TTM_PL_FLAG_CACHED; 167 man->default_caching = TTM_PL_FLAG_CACHED;
167 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; 168 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
169 case TTM_PL_VRAM: 170 case TTM_PL_VRAM:
170 /* "On-card" video ram */ 171 /* "On-card" video ram */
171 man->func = &amdgpu_vram_mgr_func; 172 man->func = &amdgpu_vram_mgr_func;
172 man->gpu_offset = adev->mc.vram_start; 173 man->gpu_offset = adev->gmc.vram_start;
173 man->flags = TTM_MEMTYPE_FLAG_FIXED | 174 man->flags = TTM_MEMTYPE_FLAG_FIXED |
174 TTM_MEMTYPE_FLAG_MAPPABLE; 175 TTM_MEMTYPE_FLAG_MAPPABLE;
175 man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; 176 man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
@@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
203 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM 204 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
204 }; 205 };
205 206
207 if (bo->type == ttm_bo_type_sg) {
208 placement->num_placement = 0;
209 placement->num_busy_placement = 0;
210 return;
211 }
212
206 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { 213 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
207 placement->placement = &placements; 214 placement->placement = &placements;
208 placement->busy_placement = &placements; 215 placement->busy_placement = &placements;
@@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
213 abo = ttm_to_amdgpu_bo(bo); 220 abo = ttm_to_amdgpu_bo(bo);
214 switch (bo->mem.mem_type) { 221 switch (bo->mem.mem_type) {
215 case TTM_PL_VRAM: 222 case TTM_PL_VRAM:
216 if (adev->mman.buffer_funcs && 223 if (!adev->mman.buffer_funcs_enabled) {
217 adev->mman.buffer_funcs_ring &&
218 adev->mman.buffer_funcs_ring->ready == false) {
219 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 224 amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
220 } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && 225 } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
221 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { 226 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
222 unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; 227 unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
223 struct drm_mm_node *node = bo->mem.mm_node; 228 struct drm_mm_node *node = bo->mem.mm_node;
224 unsigned long pages_left; 229 unsigned long pages_left;
225 230
@@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
260{ 265{
261 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); 266 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
262 267
268 /*
269 * Don't verify access for KFD BOs. They don't have a GEM
270 * object associated with them.
271 */
272 if (abo->kfd_bo)
273 return 0;
274
263 if (amdgpu_ttm_tt_get_usermm(bo->ttm)) 275 if (amdgpu_ttm_tt_get_usermm(bo->ttm))
264 return -EPERM; 276 return -EPERM;
265 return drm_vma_node_verify_access(&abo->gem_base.vma_node, 277 return drm_vma_node_verify_access(&abo->gem_base.vma_node,
@@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
331 const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * 343 const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
332 AMDGPU_GPU_PAGE_SIZE); 344 AMDGPU_GPU_PAGE_SIZE);
333 345
334 if (!ring->ready) { 346 if (!adev->mman.buffer_funcs_enabled) {
335 DRM_ERROR("Trying to move memory with ring turned off.\n"); 347 DRM_ERROR("Trying to move memory with ring turned off.\n");
336 return -EINVAL; 348 return -EINVAL;
337 } 349 }
@@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
577 amdgpu_move_null(bo, new_mem); 589 amdgpu_move_null(bo, new_mem);
578 return 0; 590 return 0;
579 } 591 }
580 if (adev->mman.buffer_funcs == NULL || 592
581 adev->mman.buffer_funcs_ring == NULL || 593 if (!adev->mman.buffer_funcs_enabled)
582 !adev->mman.buffer_funcs_ring->ready) {
583 /* use memcpy */
584 goto memcpy; 594 goto memcpy;
585 }
586 595
587 if (old_mem->mem_type == TTM_PL_VRAM && 596 if (old_mem->mem_type == TTM_PL_VRAM &&
588 new_mem->mem_type == TTM_PL_SYSTEM) { 597 new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
621{ 630{
622 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; 631 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
623 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 632 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
633 struct drm_mm_node *mm_node = mem->mm_node;
624 634
625 mem->bus.addr = NULL; 635 mem->bus.addr = NULL;
626 mem->bus.offset = 0; 636 mem->bus.offset = 0;
@@ -638,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
638 case TTM_PL_VRAM: 648 case TTM_PL_VRAM:
639 mem->bus.offset = mem->start << PAGE_SHIFT; 649 mem->bus.offset = mem->start << PAGE_SHIFT;
640 /* check if it's visible */ 650 /* check if it's visible */
641 if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) 651 if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
642 return -EINVAL; 652 return -EINVAL;
643 mem->bus.base = adev->mc.aper_base; 653 /* Only physically contiguous buffers apply. In a contiguous
654 * buffer, size of the first mm_node would match the number of
655 * pages in ttm_mem_reg.
656 */
657 if (adev->mman.aper_base_kaddr &&
658 (mm_node->size == mem->num_pages))
659 mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
660 mem->bus.offset;
661
662 mem->bus.base = adev->gmc.aper_base;
644 mem->bus.is_iomem = true; 663 mem->bus.is_iomem = true;
645 break; 664 break;
646 default: 665 default:
@@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list {
674 693
675struct amdgpu_ttm_tt { 694struct amdgpu_ttm_tt {
676 struct ttm_dma_tt ttm; 695 struct ttm_dma_tt ttm;
677 struct amdgpu_device *adev;
678 u64 offset; 696 u64 offset;
679 uint64_t userptr; 697 uint64_t userptr;
680 struct mm_struct *usermm; 698 struct mm_struct *usermm;
@@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
832static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, 850static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
833 struct ttm_mem_reg *bo_mem) 851 struct ttm_mem_reg *bo_mem)
834{ 852{
853 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
835 struct amdgpu_ttm_tt *gtt = (void*)ttm; 854 struct amdgpu_ttm_tt *gtt = (void*)ttm;
836 uint64_t flags; 855 uint64_t flags;
837 int r = 0; 856 int r = 0;
@@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
858 return 0; 877 return 0;
859 } 878 }
860 879
861 flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); 880 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
862 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; 881 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
863 r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, 882 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
864 ttm->pages, gtt->ttm.dma_address, flags); 883 ttm->pages, gtt->ttm.dma_address, flags);
865 884
866 if (r) 885 if (r)
@@ -891,7 +910,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
891 placement.num_busy_placement = 1; 910 placement.num_busy_placement = 1;
892 placement.busy_placement = &placements; 911 placement.busy_placement = &placements;
893 placements.fpfn = 0; 912 placements.fpfn = 0;
894 placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; 913 placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
895 placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | 914 placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
896 TTM_PL_FLAG_TT; 915 TTM_PL_FLAG_TT;
897 916
@@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
937 956
938static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) 957static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
939{ 958{
959 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
940 struct amdgpu_ttm_tt *gtt = (void *)ttm; 960 struct amdgpu_ttm_tt *gtt = (void *)ttm;
941 int r; 961 int r;
942 962
@@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
947 return 0; 967 return 0;
948 968
949 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ 969 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
950 r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); 970 r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
951 if (r) 971 if (r)
952 DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", 972 DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
953 gtt->ttm.ttm.num_pages, gtt->offset); 973 gtt->ttm.ttm.num_pages, gtt->offset);
@@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = {
968 .destroy = &amdgpu_ttm_backend_destroy, 988 .destroy = &amdgpu_ttm_backend_destroy,
969}; 989};
970 990
971static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, 991static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
972 unsigned long size, uint32_t page_flags, 992 uint32_t page_flags)
973 struct page *dummy_read_page)
974{ 993{
975 struct amdgpu_device *adev; 994 struct amdgpu_device *adev;
976 struct amdgpu_ttm_tt *gtt; 995 struct amdgpu_ttm_tt *gtt;
977 996
978 adev = amdgpu_ttm_adev(bdev); 997 adev = amdgpu_ttm_adev(bo->bdev);
979 998
980 gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); 999 gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
981 if (gtt == NULL) { 1000 if (gtt == NULL) {
982 return NULL; 1001 return NULL;
983 } 1002 }
984 gtt->ttm.ttm.func = &amdgpu_backend_func; 1003 gtt->ttm.ttm.func = &amdgpu_backend_func;
985 gtt->adev = adev; 1004 if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
986 if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
987 kfree(gtt); 1005 kfree(gtt);
988 return NULL; 1006 return NULL;
989 } 1007 }
@@ -997,9 +1015,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
997 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1015 struct amdgpu_ttm_tt *gtt = (void *)ttm;
998 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); 1016 bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
999 1017
1000 if (ttm->state != tt_unpopulated)
1001 return 0;
1002
1003 if (gtt && gtt->userptr) { 1018 if (gtt && gtt->userptr) {
1004 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); 1019 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1005 if (!ttm->sg) 1020 if (!ttm->sg)
@@ -1012,13 +1027,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
1012 1027
1013 if (slave && ttm->sg) { 1028 if (slave && ttm->sg) {
1014 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, 1029 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
1015 gtt->ttm.dma_address, ttm->num_pages); 1030 gtt->ttm.dma_address,
1031 ttm->num_pages);
1016 ttm->state = tt_unbound; 1032 ttm->state = tt_unbound;
1017 return 0; 1033 return 0;
1018 } 1034 }
1019 1035
1020#ifdef CONFIG_SWIOTLB 1036#ifdef CONFIG_SWIOTLB
1021 if (swiotlb_nr_tbl()) { 1037 if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1022 return ttm_dma_populate(&gtt->ttm, adev->dev, ctx); 1038 return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
1023 } 1039 }
1024#endif 1040#endif
@@ -1045,7 +1061,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
1045 adev = amdgpu_ttm_adev(ttm->bdev); 1061 adev = amdgpu_ttm_adev(ttm->bdev);
1046 1062
1047#ifdef CONFIG_SWIOTLB 1063#ifdef CONFIG_SWIOTLB
1048 if (swiotlb_nr_tbl()) { 1064 if (adev->need_swiotlb && swiotlb_nr_tbl()) {
1049 ttm_dma_unpopulate(&gtt->ttm, adev->dev); 1065 ttm_dma_unpopulate(&gtt->ttm, adev->dev);
1050 return; 1066 return;
1051 } 1067 }
@@ -1170,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
1170{ 1186{
1171 unsigned long num_pages = bo->mem.num_pages; 1187 unsigned long num_pages = bo->mem.num_pages;
1172 struct drm_mm_node *node = bo->mem.mm_node; 1188 struct drm_mm_node *node = bo->mem.mm_node;
1189 struct reservation_object_list *flist;
1190 struct dma_fence *f;
1191 int i;
1192
1193 /* If bo is a KFD BO, check if the bo belongs to the current process.
1194 * If true, then return false as any KFD process needs all its BOs to
1195 * be resident to run successfully
1196 */
1197 flist = reservation_object_get_list(bo->resv);
1198 if (flist) {
1199 for (i = 0; i < flist->shared_count; ++i) {
1200 f = rcu_dereference_protected(flist->shared[i],
1201 reservation_object_held(bo->resv));
1202 if (amdkfd_fence_check_mm(f, current->mm))
1203 return false;
1204 }
1205 }
1173 1206
1174 switch (bo->mem.mem_type) { 1207 switch (bo->mem.mem_type) {
1175 case TTM_PL_TT: 1208 case TTM_PL_TT:
@@ -1212,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1212 nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); 1245 nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
1213 pos = (nodes->start << PAGE_SHIFT) + offset; 1246 pos = (nodes->start << PAGE_SHIFT) + offset;
1214 1247
1215 while (len && pos < adev->mc.mc_vram_size) { 1248 while (len && pos < adev->gmc.mc_vram_size) {
1216 uint64_t aligned_pos = pos & ~(uint64_t)3; 1249 uint64_t aligned_pos = pos & ~(uint64_t)3;
1217 uint32_t bytes = 4 - (pos & 3); 1250 uint32_t bytes = 4 - (pos & 3);
1218 uint32_t shift = (pos & 3) * 8; 1251 uint32_t shift = (pos & 3) * 8;
@@ -1298,7 +1331,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1298 struct ttm_operation_ctx ctx = { false, false }; 1331 struct ttm_operation_ctx ctx = { false, false };
1299 int r = 0; 1332 int r = 0;
1300 int i; 1333 int i;
1301 u64 vram_size = adev->mc.visible_vram_size; 1334 u64 vram_size = adev->gmc.visible_vram_size;
1302 u64 offset = adev->fw_vram_usage.start_offset; 1335 u64 offset = adev->fw_vram_usage.start_offset;
1303 u64 size = adev->fw_vram_usage.size; 1336 u64 size = adev->fw_vram_usage.size;
1304 struct amdgpu_bo *bo; 1337 struct amdgpu_bo *bo;
@@ -1309,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
1309 if (adev->fw_vram_usage.size > 0 && 1342 if (adev->fw_vram_usage.size > 0 &&
1310 adev->fw_vram_usage.size <= vram_size) { 1343 adev->fw_vram_usage.size <= vram_size) {
1311 1344
1312 r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, 1345 r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
1313 PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 1346 AMDGPU_GEM_DOMAIN_VRAM,
1314 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1347 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1315 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, 1348 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1316 &adev->fw_vram_usage.reserved_bo); 1349 ttm_bo_type_kernel, NULL,
1350 &adev->fw_vram_usage.reserved_bo);
1317 if (r) 1351 if (r)
1318 goto error_create; 1352 goto error_create;
1319 1353
@@ -1387,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1387 return r; 1421 return r;
1388 } 1422 }
1389 adev->mman.initialized = true; 1423 adev->mman.initialized = true;
1424
1425 /* We opt to avoid OOM on system pages allocations */
1426 adev->mman.bdev.no_retry = true;
1427
1390 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, 1428 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
1391 adev->mc.real_vram_size >> PAGE_SHIFT); 1429 adev->gmc.real_vram_size >> PAGE_SHIFT);
1392 if (r) { 1430 if (r) {
1393 DRM_ERROR("Failed initializing VRAM heap.\n"); 1431 DRM_ERROR("Failed initializing VRAM heap.\n");
1394 return r; 1432 return r;
@@ -1397,11 +1435,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1397 /* Reduce size of CPU-visible VRAM if requested */ 1435 /* Reduce size of CPU-visible VRAM if requested */
1398 vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; 1436 vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
1399 if (amdgpu_vis_vram_limit > 0 && 1437 if (amdgpu_vis_vram_limit > 0 &&
1400 vis_vram_limit <= adev->mc.visible_vram_size) 1438 vis_vram_limit <= adev->gmc.visible_vram_size)
1401 adev->mc.visible_vram_size = vis_vram_limit; 1439 adev->gmc.visible_vram_size = vis_vram_limit;
1402 1440
1403 /* Change the size here instead of the init above so only lpfn is affected */ 1441 /* Change the size here instead of the init above so only lpfn is affected */
1404 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 1442 amdgpu_ttm_set_buffer_funcs_status(adev, false);
1443#ifdef CONFIG_64BIT
1444 adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
1445 adev->gmc.visible_vram_size);
1446#endif
1405 1447
1406 /* 1448 /*
1407 *The reserved vram for firmware must be pinned to the specified 1449 *The reserved vram for firmware must be pinned to the specified
@@ -1412,21 +1454,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
1412 return r; 1454 return r;
1413 } 1455 }
1414 1456
1415 r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, 1457 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
1416 AMDGPU_GEM_DOMAIN_VRAM, 1458 AMDGPU_GEM_DOMAIN_VRAM,
1417 &adev->stolen_vga_memory, 1459 &adev->stolen_vga_memory,
1418 NULL, NULL); 1460 NULL, NULL);
1419 if (r) 1461 if (r)
1420 return r; 1462 return r;
1421 DRM_INFO("amdgpu: %uM of VRAM memory ready\n", 1463 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1422 (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); 1464 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1423 1465
1424 if (amdgpu_gtt_size == -1) { 1466 if (amdgpu_gtt_size == -1) {
1425 struct sysinfo si; 1467 struct sysinfo si;
1426 1468
1427 si_meminfo(&si); 1469 si_meminfo(&si);
1428 gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 1470 gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1429 adev->mc.mc_vram_size), 1471 adev->gmc.mc_vram_size),
1430 ((uint64_t)si.totalram * si.mem_unit * 3/4)); 1472 ((uint64_t)si.totalram * si.mem_unit * 3/4));
1431 } 1473 }
1432 else 1474 else
@@ -1494,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
1494 amdgpu_ttm_debugfs_fini(adev); 1536 amdgpu_ttm_debugfs_fini(adev);
1495 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); 1537 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
1496 amdgpu_ttm_fw_reserve_vram_fini(adev); 1538 amdgpu_ttm_fw_reserve_vram_fini(adev);
1539 if (adev->mman.aper_base_kaddr)
1540 iounmap(adev->mman.aper_base_kaddr);
1541 adev->mman.aper_base_kaddr = NULL;
1497 1542
1498 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); 1543 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
1499 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); 1544 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
@@ -1509,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
1509 DRM_INFO("amdgpu: ttm finalized\n"); 1554 DRM_INFO("amdgpu: ttm finalized\n");
1510} 1555}
1511 1556
1512/* this should only be called at bootup or when userspace 1557/**
1513 * isn't running */ 1558 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
1514void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size) 1559 *
1560 * @adev: amdgpu_device pointer
1561 * @enable: true when we can use buffer functions.
1562 *
1563 * Enable/disable use of buffer functions during suspend/resume. This should
1564 * only be called at bootup or when userspace isn't running.
1565 */
1566void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
1515{ 1567{
1516 struct ttm_mem_type_manager *man; 1568 struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
1569 uint64_t size;
1517 1570
1518 if (!adev->mman.initialized) 1571 if (!adev->mman.initialized || adev->in_gpu_reset)
1519 return; 1572 return;
1520 1573
1521 man = &adev->mman.bdev.man[TTM_PL_VRAM];
1522 /* this just adjusts TTM size idea, which sets lpfn to the correct value */ 1574 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1575 if (enable)
1576 size = adev->gmc.real_vram_size;
1577 else
1578 size = adev->gmc.visible_vram_size;
1523 man->size = size >> PAGE_SHIFT; 1579 man->size = size >> PAGE_SHIFT;
1580 adev->mman.buffer_funcs_enabled = enable;
1524} 1581}
1525 1582
1526int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) 1583int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1559,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
1559 BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < 1616 BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
1560 AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); 1617 AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
1561 1618
1562 *addr = adev->mc.gart_start; 1619 *addr = adev->gmc.gart_start;
1563 *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 1620 *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
1564 AMDGPU_GPU_PAGE_SIZE; 1621 AMDGPU_GPU_PAGE_SIZE;
1565 1622
@@ -1619,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
1619 unsigned i; 1676 unsigned i;
1620 int r; 1677 int r;
1621 1678
1679 if (direct_submit && !ring->ready) {
1680 DRM_ERROR("Trying to move memory with ring turned off.\n");
1681 return -EINVAL;
1682 }
1683
1622 max_bytes = adev->mman.buffer_funcs->copy_max_bytes; 1684 max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
1623 num_loops = DIV_ROUND_UP(byte_count, max_bytes); 1685 num_loops = DIV_ROUND_UP(byte_count, max_bytes);
1624 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; 1686 num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
@@ -1677,13 +1739,12 @@ error_free:
1677} 1739}
1678 1740
1679int amdgpu_fill_buffer(struct amdgpu_bo *bo, 1741int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1680 uint64_t src_data, 1742 uint32_t src_data,
1681 struct reservation_object *resv, 1743 struct reservation_object *resv,
1682 struct dma_fence **fence) 1744 struct dma_fence **fence)
1683{ 1745{
1684 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 1746 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1685 uint32_t max_bytes = 8 * 1747 uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
1686 adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
1687 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 1748 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
1688 1749
1689 struct drm_mm_node *mm_node; 1750 struct drm_mm_node *mm_node;
@@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1693 struct amdgpu_job *job; 1754 struct amdgpu_job *job;
1694 int r; 1755 int r;
1695 1756
1696 if (!ring->ready) { 1757 if (!adev->mman.buffer_funcs_enabled) {
1697 DRM_ERROR("Trying to clear memory with ring turned off.\n"); 1758 DRM_ERROR("Trying to clear memory with ring turned off.\n");
1698 return -EINVAL; 1759 return -EINVAL;
1699 } 1760 }
@@ -1714,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1714 num_pages -= mm_node->size; 1775 num_pages -= mm_node->size;
1715 ++mm_node; 1776 ++mm_node;
1716 } 1777 }
1717 1778 num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
1718 /* num of dwords for each SDMA_OP_PTEPDE cmd */
1719 num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
1720 1779
1721 /* for IB padding */ 1780 /* for IB padding */
1722 num_dw += 64; 1781 num_dw += 64;
@@ -1741,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1741 uint32_t byte_count = mm_node->size << PAGE_SHIFT; 1800 uint32_t byte_count = mm_node->size << PAGE_SHIFT;
1742 uint64_t dst_addr; 1801 uint64_t dst_addr;
1743 1802
1744 WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
1745
1746 dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); 1803 dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
1747 while (byte_count) { 1804 while (byte_count) {
1748 uint32_t cur_size_in_bytes = min(byte_count, max_bytes); 1805 uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
1749 1806
1750 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], 1807 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
1751 dst_addr, 0, 1808 dst_addr, cur_size_in_bytes);
1752 cur_size_in_bytes >> 3, 0,
1753 src_data);
1754 1809
1755 dst_addr += cur_size_in_bytes; 1810 dst_addr += cur_size_in_bytes;
1756 byte_count -= cur_size_in_bytes; 1811 byte_count -= cur_size_in_bytes;
@@ -1811,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
1811 if (size & 0x3 || *pos & 0x3) 1866 if (size & 0x3 || *pos & 0x3)
1812 return -EINVAL; 1867 return -EINVAL;
1813 1868
1814 if (*pos >= adev->mc.mc_vram_size) 1869 if (*pos >= adev->gmc.mc_vram_size)
1815 return -ENXIO; 1870 return -ENXIO;
1816 1871
1817 while (size) { 1872 while (size) {
1818 unsigned long flags; 1873 unsigned long flags;
1819 uint32_t value; 1874 uint32_t value;
1820 1875
1821 if (*pos >= adev->mc.mc_vram_size) 1876 if (*pos >= adev->gmc.mc_vram_size)
1822 return result; 1877 return result;
1823 1878
1824 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 1879 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
@@ -1850,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
1850 if (size & 0x3 || *pos & 0x3) 1905 if (size & 0x3 || *pos & 0x3)
1851 return -EINVAL; 1906 return -EINVAL;
1852 1907
1853 if (*pos >= adev->mc.mc_vram_size) 1908 if (*pos >= adev->gmc.mc_vram_size)
1854 return -ENXIO; 1909 return -ENXIO;
1855 1910
1856 while (size) { 1911 while (size) {
1857 unsigned long flags; 1912 unsigned long flags;
1858 uint32_t value; 1913 uint32_t value;
1859 1914
1860 if (*pos >= adev->mc.mc_vram_size) 1915 if (*pos >= adev->gmc.mc_vram_size)
1861 return result; 1916 return result;
1862 1917
1863 r = get_user(value, (uint32_t *)buf); 1918 r = get_user(value, (uint32_t *)buf);
@@ -1935,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
1935 1990
1936#endif 1991#endif
1937 1992
1938static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf, 1993static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
1939 size_t size, loff_t *pos) 1994 size_t size, loff_t *pos)
1940{ 1995{
1941 struct amdgpu_device *adev = file_inode(f)->i_private; 1996 struct amdgpu_device *adev = file_inode(f)->i_private;
1942 int r;
1943 uint64_t phys;
1944 struct iommu_domain *dom; 1997 struct iommu_domain *dom;
1998 ssize_t result = 0;
1999 int r;
1945 2000
1946 // always return 8 bytes 2001 dom = iommu_get_domain_for_dev(adev->dev);
1947 if (size != 8)
1948 return -EINVAL;
1949 2002
1950 // only accept page addresses 2003 while (size) {
1951 if (*pos & 0xFFF) 2004 phys_addr_t addr = *pos & PAGE_MASK;
1952 return -EINVAL; 2005 loff_t off = *pos & ~PAGE_MASK;
2006 size_t bytes = PAGE_SIZE - off;
2007 unsigned long pfn;
2008 struct page *p;
2009 void *ptr;
2010
2011 bytes = bytes < size ? bytes : size;
2012
2013 addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
2014
2015 pfn = addr >> PAGE_SHIFT;
2016 if (!pfn_valid(pfn))
2017 return -EPERM;
2018
2019 p = pfn_to_page(pfn);
2020 if (p->mapping != adev->mman.bdev.dev_mapping)
2021 return -EPERM;
2022
2023 ptr = kmap(p);
2024 r = copy_to_user(buf, ptr + off, bytes);
2025 kunmap(p);
2026 if (r)
2027 return -EFAULT;
2028
2029 size -= bytes;
2030 *pos += bytes;
2031 result += bytes;
2032 }
2033
2034 return result;
2035}
2036
2037static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
2038 size_t size, loff_t *pos)
2039{
2040 struct amdgpu_device *adev = file_inode(f)->i_private;
2041 struct iommu_domain *dom;
2042 ssize_t result = 0;
2043 int r;
1953 2044
1954 dom = iommu_get_domain_for_dev(adev->dev); 2045 dom = iommu_get_domain_for_dev(adev->dev);
1955 if (dom)
1956 phys = iommu_iova_to_phys(dom, *pos);
1957 else
1958 phys = *pos;
1959 2046
1960 r = copy_to_user(buf, &phys, 8); 2047 while (size) {
1961 if (r) 2048 phys_addr_t addr = *pos & PAGE_MASK;
1962 return -EFAULT; 2049 loff_t off = *pos & ~PAGE_MASK;
2050 size_t bytes = PAGE_SIZE - off;
2051 unsigned long pfn;
2052 struct page *p;
2053 void *ptr;
2054
2055 bytes = bytes < size ? bytes : size;
2056
2057 addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
1963 2058
1964 return 8; 2059 pfn = addr >> PAGE_SHIFT;
2060 if (!pfn_valid(pfn))
2061 return -EPERM;
2062
2063 p = pfn_to_page(pfn);
2064 if (p->mapping != adev->mman.bdev.dev_mapping)
2065 return -EPERM;
2066
2067 ptr = kmap(p);
2068 r = copy_from_user(ptr + off, buf, bytes);
2069 kunmap(p);
2070 if (r)
2071 return -EFAULT;
2072
2073 size -= bytes;
2074 *pos += bytes;
2075 result += bytes;
2076 }
2077
2078 return result;
1965} 2079}
1966 2080
1967static const struct file_operations amdgpu_ttm_iova_fops = { 2081static const struct file_operations amdgpu_ttm_iomem_fops = {
1968 .owner = THIS_MODULE, 2082 .owner = THIS_MODULE,
1969 .read = amdgpu_iova_to_phys_read, 2083 .read = amdgpu_iomem_read,
2084 .write = amdgpu_iomem_write,
1970 .llseek = default_llseek 2085 .llseek = default_llseek
1971}; 2086};
1972 2087
@@ -1979,7 +2094,7 @@ static const struct {
1979#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 2094#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1980 { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, 2095 { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
1981#endif 2096#endif
1982 { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM }, 2097 { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
1983}; 2098};
1984 2099
1985#endif 2100#endif
@@ -2001,16 +2116,16 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
2001 if (IS_ERR(ent)) 2116 if (IS_ERR(ent))
2002 return PTR_ERR(ent); 2117 return PTR_ERR(ent);
2003 if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) 2118 if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
2004 i_size_write(ent->d_inode, adev->mc.mc_vram_size); 2119 i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
2005 else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) 2120 else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
2006 i_size_write(ent->d_inode, adev->mc.gart_size); 2121 i_size_write(ent->d_inode, adev->gmc.gart_size);
2007 adev->mman.debugfs_entries[count] = ent; 2122 adev->mman.debugfs_entries[count] = ent;
2008 } 2123 }
2009 2124
2010 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); 2125 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
2011 2126
2012#ifdef CONFIG_SWIOTLB 2127#ifdef CONFIG_SWIOTLB
2013 if (!swiotlb_nr_tbl()) 2128 if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
2014 --count; 2129 --count;
2015#endif 2130#endif
2016 2131
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 167856f6080f..6ea7de863041 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -44,6 +44,7 @@ struct amdgpu_mman {
44 struct ttm_bo_device bdev; 44 struct ttm_bo_device bdev;
45 bool mem_global_referenced; 45 bool mem_global_referenced;
46 bool initialized; 46 bool initialized;
47 void __iomem *aper_base_kaddr;
47 48
48#if defined(CONFIG_DEBUG_FS) 49#if defined(CONFIG_DEBUG_FS)
49 struct dentry *debugfs_entries[8]; 50 struct dentry *debugfs_entries[8];
@@ -52,6 +53,7 @@ struct amdgpu_mman {
52 /* buffer handling */ 53 /* buffer handling */
53 const struct amdgpu_buffer_funcs *buffer_funcs; 54 const struct amdgpu_buffer_funcs *buffer_funcs;
54 struct amdgpu_ring *buffer_funcs_ring; 55 struct amdgpu_ring *buffer_funcs_ring;
56 bool buffer_funcs_enabled;
55 57
56 struct mutex gtt_window_lock; 58 struct mutex gtt_window_lock;
57 /* Scheduler entity for buffer moves */ 59 /* Scheduler entity for buffer moves */
@@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
74uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); 76uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
75uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); 77uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
76 78
79int amdgpu_ttm_init(struct amdgpu_device *adev);
80void amdgpu_ttm_fini(struct amdgpu_device *adev);
81void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
82 bool enable);
83
77int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, 84int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
78 uint64_t dst_offset, uint32_t byte_count, 85 uint64_t dst_offset, uint32_t byte_count,
79 struct reservation_object *resv, 86 struct reservation_object *resv,
@@ -86,7 +93,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
86 struct reservation_object *resv, 93 struct reservation_object *resv,
87 struct dma_fence **f); 94 struct dma_fence **f);
88int amdgpu_fill_buffer(struct amdgpu_bo *bo, 95int amdgpu_fill_buffer(struct amdgpu_bo *bo,
89 uint64_t src_data, 96 uint32_t src_data,
90 struct reservation_object *resv, 97 struct reservation_object *resv,
91 struct dma_fence **fence); 98 struct dma_fence **fence);
92 99
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 474f88fbafce..5916cc25e28b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -271,12 +271,13 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
271 return AMDGPU_FW_LOAD_SMU; 271 return AMDGPU_FW_LOAD_SMU;
272 case CHIP_VEGA10: 272 case CHIP_VEGA10:
273 case CHIP_RAVEN: 273 case CHIP_RAVEN:
274 case CHIP_VEGA12:
274 if (!load_type) 275 if (!load_type)
275 return AMDGPU_FW_LOAD_DIRECT; 276 return AMDGPU_FW_LOAD_DIRECT;
276 else 277 else
277 return AMDGPU_FW_LOAD_PSP; 278 return AMDGPU_FW_LOAD_PSP;
278 default: 279 default:
279 DRM_ERROR("Unknow firmware load type\n"); 280 DRM_ERROR("Unknown firmware load type\n");
280 } 281 }
281 282
282 return AMDGPU_FW_LOAD_DIRECT; 283 return AMDGPU_FW_LOAD_DIRECT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2eae86bf906..627542b22ae4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -68,6 +68,7 @@
68#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" 68#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
69 69
70#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" 70#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
71#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
71 72
72#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) 73#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
73#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) 74#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
@@ -110,6 +111,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
110MODULE_FIRMWARE(FIRMWARE_POLARIS12); 111MODULE_FIRMWARE(FIRMWARE_POLARIS12);
111 112
112MODULE_FIRMWARE(FIRMWARE_VEGA10); 113MODULE_FIRMWARE(FIRMWARE_VEGA10);
114MODULE_FIRMWARE(FIRMWARE_VEGA12);
113 115
114static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 116static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
115 117
@@ -161,11 +163,14 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
161 case CHIP_POLARIS11: 163 case CHIP_POLARIS11:
162 fw_name = FIRMWARE_POLARIS11; 164 fw_name = FIRMWARE_POLARIS11;
163 break; 165 break;
166 case CHIP_POLARIS12:
167 fw_name = FIRMWARE_POLARIS12;
168 break;
164 case CHIP_VEGA10: 169 case CHIP_VEGA10:
165 fw_name = FIRMWARE_VEGA10; 170 fw_name = FIRMWARE_VEGA10;
166 break; 171 break;
167 case CHIP_POLARIS12: 172 case CHIP_VEGA12:
168 fw_name = FIRMWARE_POLARIS12; 173 fw_name = FIRMWARE_VEGA12;
169 break; 174 break;
170 default: 175 default:
171 return -EINVAL; 176 return -EINVAL;
@@ -299,12 +304,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
299 304
300 cancel_delayed_work_sync(&adev->uvd.idle_work); 305 cancel_delayed_work_sync(&adev->uvd.idle_work);
301 306
302 for (i = 0; i < adev->uvd.max_handles; ++i) 307 /* only valid for physical mode */
303 if (atomic_read(&adev->uvd.handles[i])) 308 if (adev->asic_type < CHIP_POLARIS10) {
304 break; 309 for (i = 0; i < adev->uvd.max_handles; ++i)
310 if (atomic_read(&adev->uvd.handles[i]))
311 break;
305 312
306 if (i == AMDGPU_MAX_UVD_HANDLES) 313 if (i == adev->uvd.max_handles)
307 return 0; 314 return 0;
315 }
308 316
309 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 317 size = amdgpu_bo_size(adev->uvd.vcpu_bo);
310 ptr = adev->uvd.cpu_addr; 318 ptr = adev->uvd.cpu_addr;
@@ -952,37 +960,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
952static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, 960static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
953 bool direct, struct dma_fence **fence) 961 bool direct, struct dma_fence **fence)
954{ 962{
955 struct ttm_operation_ctx ctx = { true, false }; 963 struct amdgpu_device *adev = ring->adev;
956 struct ttm_validate_buffer tv; 964 struct dma_fence *f = NULL;
957 struct ww_acquire_ctx ticket;
958 struct list_head head;
959 struct amdgpu_job *job; 965 struct amdgpu_job *job;
960 struct amdgpu_ib *ib; 966 struct amdgpu_ib *ib;
961 struct dma_fence *f = NULL;
962 struct amdgpu_device *adev = ring->adev;
963 uint64_t addr;
964 uint32_t data[4]; 967 uint32_t data[4];
965 int i, r; 968 uint64_t addr;
966 969 long r;
967 memset(&tv, 0, sizeof(tv)); 970 int i;
968 tv.bo = &bo->tbo;
969
970 INIT_LIST_HEAD(&head);
971 list_add(&tv.head, &head);
972 971
973 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); 972 amdgpu_bo_kunmap(bo);
974 if (r) 973 amdgpu_bo_unpin(bo);
975 return r;
976 974
977 if (!ring->adev->uvd.address_64_bit) { 975 if (!ring->adev->uvd.address_64_bit) {
976 struct ttm_operation_ctx ctx = { true, false };
977
978 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); 978 amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
979 amdgpu_uvd_force_into_uvd_segment(bo); 979 amdgpu_uvd_force_into_uvd_segment(bo);
980 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
981 if (r)
982 goto err;
980 } 983 }
981 984
982 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
983 if (r)
984 goto err;
985
986 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 985 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
987 if (r) 986 if (r)
988 goto err; 987 goto err;
@@ -1014,6 +1013,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
1014 ib->length_dw = 16; 1013 ib->length_dw = 16;
1015 1014
1016 if (direct) { 1015 if (direct) {
1016 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
1017 true, false,
1018 msecs_to_jiffies(10));
1019 if (r == 0)
1020 r = -ETIMEDOUT;
1021 if (r < 0)
1022 goto err_free;
1023
1017 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 1024 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
1018 job->fence = dma_fence_get(f); 1025 job->fence = dma_fence_get(f);
1019 if (r) 1026 if (r)
@@ -1021,17 +1028,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
1021 1028
1022 amdgpu_job_free(job); 1029 amdgpu_job_free(job);
1023 } else { 1030 } else {
1031 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
1032 AMDGPU_FENCE_OWNER_UNDEFINED, false);
1033 if (r)
1034 goto err_free;
1035
1024 r = amdgpu_job_submit(job, ring, &adev->uvd.entity, 1036 r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
1025 AMDGPU_FENCE_OWNER_UNDEFINED, &f); 1037 AMDGPU_FENCE_OWNER_UNDEFINED, &f);
1026 if (r) 1038 if (r)
1027 goto err_free; 1039 goto err_free;
1028 } 1040 }
1029 1041
1030 ttm_eu_fence_buffer_objects(&ticket, &head, f); 1042 amdgpu_bo_fence(bo, f, false);
1043 amdgpu_bo_unreserve(bo);
1044 amdgpu_bo_unref(&bo);
1031 1045
1032 if (fence) 1046 if (fence)
1033 *fence = dma_fence_get(f); 1047 *fence = dma_fence_get(f);
1034 amdgpu_bo_unref(&bo);
1035 dma_fence_put(f); 1048 dma_fence_put(f);
1036 1049
1037 return 0; 1050 return 0;
@@ -1040,7 +1053,8 @@ err_free:
1040 amdgpu_job_free(job); 1053 amdgpu_job_free(job);
1041 1054
1042err: 1055err:
1043 ttm_eu_backoff_reservation(&ticket, &head); 1056 amdgpu_bo_unreserve(bo);
1057 amdgpu_bo_unref(&bo);
1044 return r; 1058 return r;
1045} 1059}
1046 1060
@@ -1051,31 +1065,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
1051 struct dma_fence **fence) 1065 struct dma_fence **fence)
1052{ 1066{
1053 struct amdgpu_device *adev = ring->adev; 1067 struct amdgpu_device *adev = ring->adev;
1054 struct amdgpu_bo *bo; 1068 struct amdgpu_bo *bo = NULL;
1055 uint32_t *msg; 1069 uint32_t *msg;
1056 int r, i; 1070 int r, i;
1057 1071
1058 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 1072 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1059 AMDGPU_GEM_DOMAIN_VRAM, 1073 AMDGPU_GEM_DOMAIN_VRAM,
1060 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1074 &bo, NULL, (void **)&msg);
1061 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1062 NULL, NULL, 0, &bo);
1063 if (r) 1075 if (r)
1064 return r; 1076 return r;
1065 1077
1066 r = amdgpu_bo_reserve(bo, false);
1067 if (r) {
1068 amdgpu_bo_unref(&bo);
1069 return r;
1070 }
1071
1072 r = amdgpu_bo_kmap(bo, (void **)&msg);
1073 if (r) {
1074 amdgpu_bo_unreserve(bo);
1075 amdgpu_bo_unref(&bo);
1076 return r;
1077 }
1078
1079 /* stitch together an UVD create msg */ 1078 /* stitch together an UVD create msg */
1080 msg[0] = cpu_to_le32(0x00000de4); 1079 msg[0] = cpu_to_le32(0x00000de4);
1081 msg[1] = cpu_to_le32(0x00000000); 1080 msg[1] = cpu_to_le32(0x00000000);
@@ -1091,9 +1090,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
1091 for (i = 11; i < 1024; ++i) 1090 for (i = 11; i < 1024; ++i)
1092 msg[i] = cpu_to_le32(0x0); 1091 msg[i] = cpu_to_le32(0x0);
1093 1092
1094 amdgpu_bo_kunmap(bo);
1095 amdgpu_bo_unreserve(bo);
1096
1097 return amdgpu_uvd_send_msg(ring, bo, true, fence); 1093 return amdgpu_uvd_send_msg(ring, bo, true, fence);
1098} 1094}
1099 1095
@@ -1101,31 +1097,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1101 bool direct, struct dma_fence **fence) 1097 bool direct, struct dma_fence **fence)
1102{ 1098{
1103 struct amdgpu_device *adev = ring->adev; 1099 struct amdgpu_device *adev = ring->adev;
1104 struct amdgpu_bo *bo; 1100 struct amdgpu_bo *bo = NULL;
1105 uint32_t *msg; 1101 uint32_t *msg;
1106 int r, i; 1102 int r, i;
1107 1103
1108 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 1104 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
1109 AMDGPU_GEM_DOMAIN_VRAM, 1105 AMDGPU_GEM_DOMAIN_VRAM,
1110 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1106 &bo, NULL, (void **)&msg);
1111 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1112 NULL, NULL, 0, &bo);
1113 if (r) 1107 if (r)
1114 return r; 1108 return r;
1115 1109
1116 r = amdgpu_bo_reserve(bo, false);
1117 if (r) {
1118 amdgpu_bo_unref(&bo);
1119 return r;
1120 }
1121
1122 r = amdgpu_bo_kmap(bo, (void **)&msg);
1123 if (r) {
1124 amdgpu_bo_unreserve(bo);
1125 amdgpu_bo_unref(&bo);
1126 return r;
1127 }
1128
1129 /* stitch together an UVD destroy msg */ 1110 /* stitch together an UVD destroy msg */
1130 msg[0] = cpu_to_le32(0x00000de4); 1111 msg[0] = cpu_to_le32(0x00000de4);
1131 msg[1] = cpu_to_le32(0x00000002); 1112 msg[1] = cpu_to_le32(0x00000002);
@@ -1134,9 +1115,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
1134 for (i = 4; i < 1024; ++i) 1115 for (i = 4; i < 1024; ++i)
1135 msg[i] = cpu_to_le32(0x0); 1116 msg[i] = cpu_to_le32(0x0);
1136 1117
1137 amdgpu_bo_kunmap(bo);
1138 amdgpu_bo_unreserve(bo);
1139
1140 return amdgpu_uvd_send_msg(ring, bo, direct, fence); 1118 return amdgpu_uvd_send_msg(ring, bo, direct, fence);
1141} 1119}
1142 1120
@@ -1146,9 +1124,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1146 container_of(work, struct amdgpu_device, uvd.idle_work.work); 1124 container_of(work, struct amdgpu_device, uvd.idle_work.work);
1147 unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); 1125 unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
1148 1126
1149 if (amdgpu_sriov_vf(adev))
1150 return;
1151
1152 if (fences == 0) { 1127 if (fences == 0) {
1153 if (adev->pm.dpm_enabled) { 1128 if (adev->pm.dpm_enabled) {
1154 amdgpu_dpm_enable_uvd(adev, false); 1129 amdgpu_dpm_enable_uvd(adev, false);
@@ -1168,11 +1143,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
1168void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) 1143void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1169{ 1144{
1170 struct amdgpu_device *adev = ring->adev; 1145 struct amdgpu_device *adev = ring->adev;
1171 bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); 1146 bool set_clocks;
1172 1147
1173 if (amdgpu_sriov_vf(adev)) 1148 if (amdgpu_sriov_vf(adev))
1174 return; 1149 return;
1175 1150
1151 set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
1176 if (set_clocks) { 1152 if (set_clocks) {
1177 if (adev->pm.dpm_enabled) { 1153 if (adev->pm.dpm_enabled) {
1178 amdgpu_dpm_enable_uvd(adev, true); 1154 amdgpu_dpm_enable_uvd(adev, true);
@@ -1188,7 +1164,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
1188 1164
1189void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) 1165void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
1190{ 1166{
1191 schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); 1167 if (!amdgpu_sriov_vf(ring->adev))
1168 schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
1192} 1169}
1193 1170
1194/** 1171/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index d274ae535530..a33804bd3314 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -55,6 +55,7 @@
55#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" 55#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
56 56
57#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" 57#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
58#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
58 59
59#ifdef CONFIG_DRM_AMDGPU_CIK 60#ifdef CONFIG_DRM_AMDGPU_CIK
60MODULE_FIRMWARE(FIRMWARE_BONAIRE); 61MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -72,6 +73,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
72MODULE_FIRMWARE(FIRMWARE_POLARIS12); 73MODULE_FIRMWARE(FIRMWARE_POLARIS12);
73 74
74MODULE_FIRMWARE(FIRMWARE_VEGA10); 75MODULE_FIRMWARE(FIRMWARE_VEGA10);
76MODULE_FIRMWARE(FIRMWARE_VEGA12);
75 77
76static void amdgpu_vce_idle_work_handler(struct work_struct *work); 78static void amdgpu_vce_idle_work_handler(struct work_struct *work);
77 79
@@ -127,11 +129,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
127 case CHIP_POLARIS11: 129 case CHIP_POLARIS11:
128 fw_name = FIRMWARE_POLARIS11; 130 fw_name = FIRMWARE_POLARIS11;
129 break; 131 break;
132 case CHIP_POLARIS12:
133 fw_name = FIRMWARE_POLARIS12;
134 break;
130 case CHIP_VEGA10: 135 case CHIP_VEGA10:
131 fw_name = FIRMWARE_VEGA10; 136 fw_name = FIRMWARE_VEGA10;
132 break; 137 break;
133 case CHIP_POLARIS12: 138 case CHIP_VEGA12:
134 fw_name = FIRMWARE_POLARIS12; 139 fw_name = FIRMWARE_VEGA12;
135 break; 140 break;
136 141
137 default: 142 default:
@@ -300,9 +305,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
300 container_of(work, struct amdgpu_device, vce.idle_work.work); 305 container_of(work, struct amdgpu_device, vce.idle_work.work);
301 unsigned i, count = 0; 306 unsigned i, count = 0;
302 307
303 if (amdgpu_sriov_vf(adev))
304 return;
305
306 for (i = 0; i < adev->vce.num_rings; i++) 308 for (i = 0; i < adev->vce.num_rings; i++)
307 count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); 309 count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
308 310
@@ -362,7 +364,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
362 */ 364 */
363void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) 365void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
364{ 366{
365 schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); 367 if (!amdgpu_sriov_vf(ring->adev))
368 schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
366} 369}
367 370
368/** 371/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 0fd378ae92c3..71781267ee4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -30,6 +30,8 @@
30#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) 30#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
31#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) 31#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
32 32
33#define AMDGPU_VCE_FW_53_45 ((53 << 24) | (45 << 16))
34
33struct amdgpu_vce { 35struct amdgpu_vce {
34 struct amdgpu_bo *vcpu_bo; 36 struct amdgpu_bo *vcpu_bo;
35 uint64_t gpu_addr; 37 uint64_t gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 837962118dbc..58e495330b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
270 return r; 270 return r;
271} 271}
272 272
273static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, 273static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
274 bool direct, struct dma_fence **fence) 274 struct amdgpu_bo *bo, bool direct,
275 struct dma_fence **fence)
275{ 276{
276 struct ttm_operation_ctx ctx = { true, false }; 277 struct amdgpu_device *adev = ring->adev;
277 struct ttm_validate_buffer tv; 278 struct dma_fence *f = NULL;
278 struct ww_acquire_ctx ticket;
279 struct list_head head;
280 struct amdgpu_job *job; 279 struct amdgpu_job *job;
281 struct amdgpu_ib *ib; 280 struct amdgpu_ib *ib;
282 struct dma_fence *f = NULL;
283 struct amdgpu_device *adev = ring->adev;
284 uint64_t addr; 281 uint64_t addr;
285 int i, r; 282 int i, r;
286 283
287 memset(&tv, 0, sizeof(tv));
288 tv.bo = &bo->tbo;
289
290 INIT_LIST_HEAD(&head);
291 list_add(&tv.head, &head);
292
293 r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
294 if (r)
295 return r;
296
297 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
298 if (r)
299 goto err;
300
301 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 284 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
302 if (r) 285 if (r)
303 goto err; 286 goto err;
@@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b
330 goto err_free; 313 goto err_free;
331 } 314 }
332 315
333 ttm_eu_fence_buffer_objects(&ticket, &head, f); 316 amdgpu_bo_fence(bo, f, false);
317 amdgpu_bo_unreserve(bo);
318 amdgpu_bo_unref(&bo);
334 319
335 if (fence) 320 if (fence)
336 *fence = dma_fence_get(f); 321 *fence = dma_fence_get(f);
337 amdgpu_bo_unref(&bo);
338 dma_fence_put(f); 322 dma_fence_put(f);
339 323
340 return 0; 324 return 0;
@@ -343,7 +327,8 @@ err_free:
343 amdgpu_job_free(job); 327 amdgpu_job_free(job);
344 328
345err: 329err:
346 ttm_eu_backoff_reservation(&ticket, &head); 330 amdgpu_bo_unreserve(bo);
331 amdgpu_bo_unref(&bo);
347 return r; 332 return r;
348} 333}
349 334
@@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
351 struct dma_fence **fence) 336 struct dma_fence **fence)
352{ 337{
353 struct amdgpu_device *adev = ring->adev; 338 struct amdgpu_device *adev = ring->adev;
354 struct amdgpu_bo *bo; 339 struct amdgpu_bo *bo = NULL;
355 uint32_t *msg; 340 uint32_t *msg;
356 int r, i; 341 int r, i;
357 342
358 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 343 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
359 AMDGPU_GEM_DOMAIN_VRAM, 344 AMDGPU_GEM_DOMAIN_VRAM,
360 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 345 &bo, NULL, (void **)&msg);
361 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
362 NULL, NULL, 0, &bo);
363 if (r) 346 if (r)
364 return r; 347 return r;
365 348
366 r = amdgpu_bo_reserve(bo, false);
367 if (r) {
368 amdgpu_bo_unref(&bo);
369 return r;
370 }
371
372 r = amdgpu_bo_kmap(bo, (void **)&msg);
373 if (r) {
374 amdgpu_bo_unreserve(bo);
375 amdgpu_bo_unref(&bo);
376 return r;
377 }
378
379 msg[0] = cpu_to_le32(0x00000028); 349 msg[0] = cpu_to_le32(0x00000028);
380 msg[1] = cpu_to_le32(0x00000038); 350 msg[1] = cpu_to_le32(0x00000038);
381 msg[2] = cpu_to_le32(0x00000001); 351 msg[2] = cpu_to_le32(0x00000001);
@@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
393 for (i = 14; i < 1024; ++i) 363 for (i = 14; i < 1024; ++i)
394 msg[i] = cpu_to_le32(0x0); 364 msg[i] = cpu_to_le32(0x0);
395 365
396 amdgpu_bo_kunmap(bo);
397 amdgpu_bo_unreserve(bo);
398
399 return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); 366 return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
400} 367}
401 368
@@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
403 bool direct, struct dma_fence **fence) 370 bool direct, struct dma_fence **fence)
404{ 371{
405 struct amdgpu_device *adev = ring->adev; 372 struct amdgpu_device *adev = ring->adev;
406 struct amdgpu_bo *bo; 373 struct amdgpu_bo *bo = NULL;
407 uint32_t *msg; 374 uint32_t *msg;
408 int r, i; 375 int r, i;
409 376
410 r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, 377 r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
411 AMDGPU_GEM_DOMAIN_VRAM, 378 AMDGPU_GEM_DOMAIN_VRAM,
412 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 379 &bo, NULL, (void **)&msg);
413 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
414 NULL, NULL, 0, &bo);
415 if (r) 380 if (r)
416 return r; 381 return r;
417 382
418 r = amdgpu_bo_reserve(bo, false);
419 if (r) {
420 amdgpu_bo_unref(&bo);
421 return r;
422 }
423
424 r = amdgpu_bo_kmap(bo, (void **)&msg);
425 if (r) {
426 amdgpu_bo_unreserve(bo);
427 amdgpu_bo_unref(&bo);
428 return r;
429 }
430
431 msg[0] = cpu_to_le32(0x00000028); 383 msg[0] = cpu_to_le32(0x00000028);
432 msg[1] = cpu_to_le32(0x00000018); 384 msg[1] = cpu_to_le32(0x00000018);
433 msg[2] = cpu_to_le32(0x00000000); 385 msg[2] = cpu_to_le32(0x00000000);
@@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
437 for (i = 6; i < 1024; ++i) 389 for (i = 6; i < 1024; ++i)
438 msg[i] = cpu_to_le32(0x0); 390 msg[i] = cpu_to_le32(0x0);
439 391
440 amdgpu_bo_kunmap(bo);
441 amdgpu_bo_unreserve(bo);
442
443 return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); 392 return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
444} 393}
445 394
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e7dfb7b44b4b..21adb1b6e5cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,7 +22,21 @@
22 */ 22 */
23 23
24#include "amdgpu.h" 24#include "amdgpu.h"
25#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ 25#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
26#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
27#define MAX_KIQ_REG_TRY 20
28
29uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
30{
31 uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
32
33 addr -= AMDGPU_VA_RESERVED_SIZE;
34
35 if (addr >= AMDGPU_VA_HOLE_START)
36 addr |= AMDGPU_VA_HOLE_END;
37
38 return addr;
39}
26 40
27bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) 41bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
28{ 42{
@@ -55,14 +69,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
55 69
56/* 70/*
57 * amdgpu_map_static_csa should be called during amdgpu_vm_init 71 * amdgpu_map_static_csa should be called during amdgpu_vm_init
58 * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE" 72 * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
59 * to this VM, and each command submission of GFX should use this virtual 73 * submission of GFX should use this virtual address within META_DATA init
60 * address within META_DATA init package to support SRIOV gfx preemption. 74 * package to support SRIOV gfx preemption.
61 */ 75 */
62
63int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, 76int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
64 struct amdgpu_bo_va **bo_va) 77 struct amdgpu_bo_va **bo_va)
65{ 78{
79 uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
66 struct ww_acquire_ctx ticket; 80 struct ww_acquire_ctx ticket;
67 struct list_head list; 81 struct list_head list;
68 struct amdgpu_bo_list_entry pd; 82 struct amdgpu_bo_list_entry pd;
@@ -90,7 +104,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
90 return -ENOMEM; 104 return -ENOMEM;
91 } 105 }
92 106
93 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, 107 r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
94 AMDGPU_CSA_SIZE); 108 AMDGPU_CSA_SIZE);
95 if (r) { 109 if (r) {
96 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); 110 DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
@@ -99,7 +113,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
99 return r; 113 return r;
100 } 114 }
101 115
102 r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, 116 r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
103 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | 117 AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
104 AMDGPU_PTE_EXECUTABLE); 118 AMDGPU_PTE_EXECUTABLE);
105 119
@@ -125,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
125 139
126uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) 140uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
127{ 141{
128 signed long r; 142 signed long r, cnt = 0;
129 unsigned long flags; 143 unsigned long flags;
130 uint32_t val, seq; 144 uint32_t seq;
131 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 145 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
132 struct amdgpu_ring *ring = &kiq->ring; 146 struct amdgpu_ring *ring = &kiq->ring;
133 147
@@ -141,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
141 spin_unlock_irqrestore(&kiq->ring_lock, flags); 155 spin_unlock_irqrestore(&kiq->ring_lock, flags);
142 156
143 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 157 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
144 if (r < 1) { 158
145 DRM_ERROR("wait for kiq fence error: %ld\n", r); 159 /* don't wait anymore for gpu reset case because this way may
146 return ~0; 160 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
161 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
162 * never return if we keep waiting in virt_kiq_rreg, which cause
163 * gpu_recover() hang there.
164 *
165 * also don't wait anymore for IRQ context
166 * */
167 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
168 goto failed_kiq_read;
169
170 if (in_interrupt())
171 might_sleep();
172
173 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
174 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
175 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
147 } 176 }
148 val = adev->wb.wb[adev->virt.reg_val_offs];
149 177
150 return val; 178 if (cnt > MAX_KIQ_REG_TRY)
179 goto failed_kiq_read;
180
181 return adev->wb.wb[adev->virt.reg_val_offs];
182
183failed_kiq_read:
184 pr_err("failed to read reg:%x\n", reg);
185 return ~0;
151} 186}
152 187
153void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 188void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
154{ 189{
155 signed long r; 190 signed long r, cnt = 0;
156 unsigned long flags; 191 unsigned long flags;
157 uint32_t seq; 192 uint32_t seq;
158 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 193 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -168,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
168 spin_unlock_irqrestore(&kiq->ring_lock, flags); 203 spin_unlock_irqrestore(&kiq->ring_lock, flags);
169 204
170 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 205 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
171 if (r < 1) 206
172 DRM_ERROR("wait for kiq fence error: %ld\n", r); 207 /* don't wait anymore for gpu reset case because this way may
208 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
209 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
210 * never return if we keep waiting in virt_kiq_rreg, which cause
211 * gpu_recover() hang there.
212 *
213 * also don't wait anymore for IRQ context
214 * */
215 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
216 goto failed_kiq_write;
217
218 if (in_interrupt())
219 might_sleep();
220
221 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
222
223 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
224 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
225 }
226
227 if (cnt > MAX_KIQ_REG_TRY)
228 goto failed_kiq_write;
229
230 return;
231
232failed_kiq_write:
233 pr_err("failed to write reg:%x\n", reg);
173} 234}
174 235
175/** 236/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 6a83425aa9ed..880ac113a3a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -251,8 +251,7 @@ struct amdgpu_virt {
251 uint32_t gim_feature; 251 uint32_t gim_feature;
252}; 252};
253 253
254#define AMDGPU_CSA_SIZE (8 * 1024) 254#define AMDGPU_CSA_SIZE (8 * 1024)
255#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
256 255
257#define amdgpu_sriov_enabled(adev) \ 256#define amdgpu_sriov_enabled(adev) \
258((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) 257((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void)
279} 278}
280 279
281struct amdgpu_vm; 280struct amdgpu_vm;
281
282uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
282bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); 283bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
283int amdgpu_allocate_static_csa(struct amdgpu_device *adev); 284int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
284int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, 285int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e714d0..da55a78d7380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
32#include <drm/amdgpu_drm.h> 32#include <drm/amdgpu_drm.h>
33#include "amdgpu.h" 33#include "amdgpu.h"
34#include "amdgpu_trace.h" 34#include "amdgpu_trace.h"
35#include "amdgpu_amdkfd.h"
35 36
36/* 37/*
37 * GPUVM 38 * GPUVM
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params {
75 /* indirect buffer to fill with commands */ 76 /* indirect buffer to fill with commands */
76 struct amdgpu_ib *ib; 77 struct amdgpu_ib *ib;
77 /* Function which actually does the update */ 78 /* Function which actually does the update */
78 void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, 79 void (*func)(struct amdgpu_pte_update_params *params,
80 struct amdgpu_bo *bo, uint64_t pe,
79 uint64_t addr, unsigned count, uint32_t incr, 81 uint64_t addr, unsigned count, uint32_t incr,
80 uint64_t flags); 82 uint64_t flags);
81 /* The next two are used during VM update by CPU 83 /* The next two are used during VM update by CPU
@@ -257,6 +259,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
257} 259}
258 260
259/** 261/**
262 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
263 *
264 * @adev: amdgpu_device pointer
265 * @bo: BO to clear
266 * @level: level this BO is at
267 *
268 * Root PD needs to be reserved when calling this.
269 */
270static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
271 struct amdgpu_vm *vm, struct amdgpu_bo *bo,
272 unsigned level, bool pte_support_ats)
273{
274 struct ttm_operation_ctx ctx = { true, false };
275 struct dma_fence *fence = NULL;
276 unsigned entries, ats_entries;
277 struct amdgpu_ring *ring;
278 struct amdgpu_job *job;
279 uint64_t addr;
280 int r;
281
282 addr = amdgpu_bo_gpu_offset(bo);
283 entries = amdgpu_bo_size(bo) / 8;
284
285 if (pte_support_ats) {
286 if (level == adev->vm_manager.root_level) {
287 ats_entries = amdgpu_vm_level_shift(adev, level);
288 ats_entries += AMDGPU_GPU_PAGE_SHIFT;
289 ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
290 ats_entries = min(ats_entries, entries);
291 entries -= ats_entries;
292 } else {
293 ats_entries = entries;
294 entries = 0;
295 }
296 } else {
297 ats_entries = 0;
298 }
299
300 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
301
302 r = reservation_object_reserve_shared(bo->tbo.resv);
303 if (r)
304 return r;
305
306 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
307 if (r)
308 goto error;
309
310 r = amdgpu_job_alloc_with_ib(adev, 64, &job);
311 if (r)
312 goto error;
313
314 if (ats_entries) {
315 uint64_t ats_value;
316
317 ats_value = AMDGPU_PTE_DEFAULT_ATC;
318 if (level != AMDGPU_VM_PTB)
319 ats_value |= AMDGPU_PDE_PTE;
320
321 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
322 ats_entries, 0, ats_value);
323 addr += ats_entries * 8;
324 }
325
326 if (entries)
327 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
328 entries, 0, 0);
329
330 amdgpu_ring_pad_ib(ring, &job->ibs[0]);
331
332 WARN_ON(job->ibs[0].length_dw > 64);
333 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
334 AMDGPU_FENCE_OWNER_UNDEFINED, false);
335 if (r)
336 goto error_free;
337
338 r = amdgpu_job_submit(job, ring, &vm->entity,
339 AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
340 if (r)
341 goto error_free;
342
343 amdgpu_bo_fence(bo, fence, true);
344 dma_fence_put(fence);
345
346 if (bo->shadow)
347 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
348 level, pte_support_ats);
349
350 return 0;
351
352error_free:
353 amdgpu_job_free(job);
354
355error:
356 return r;
357}
358
359/**
260 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 360 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
261 * 361 *
262 * @adev: amdgpu_device pointer 362 * @adev: amdgpu_device pointer
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
270 struct amdgpu_vm *vm, 370 struct amdgpu_vm *vm,
271 struct amdgpu_vm_pt *parent, 371 struct amdgpu_vm_pt *parent,
272 uint64_t saddr, uint64_t eaddr, 372 uint64_t saddr, uint64_t eaddr,
273 unsigned level) 373 unsigned level, bool ats)
274{ 374{
275 unsigned shift = amdgpu_vm_level_shift(adev, level); 375 unsigned shift = amdgpu_vm_level_shift(adev, level);
276 unsigned pt_idx, from, to; 376 unsigned pt_idx, from, to;
277 int r;
278 u64 flags; 377 u64 flags;
279 uint64_t init_value = 0; 378 int r;
280 379
281 if (!parent->entries) { 380 if (!parent->entries) {
282 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 381 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
299 saddr = saddr & ((1 << shift) - 1); 398 saddr = saddr & ((1 << shift) - 1);
300 eaddr = eaddr & ((1 << shift) - 1); 399 eaddr = eaddr & ((1 << shift) - 1);
301 400
302 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 401 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
303 AMDGPU_GEM_CREATE_VRAM_CLEARED;
304 if (vm->use_cpu_for_update) 402 if (vm->use_cpu_for_update)
305 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 403 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
306 else 404 else
307 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 405 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
308 AMDGPU_GEM_CREATE_SHADOW); 406 AMDGPU_GEM_CREATE_SHADOW);
309 407
310 if (vm->pte_support_ats) {
311 init_value = AMDGPU_PTE_DEFAULT_ATC;
312 if (level != AMDGPU_VM_PTB)
313 init_value |= AMDGPU_PDE_PTE;
314
315 }
316
317 /* walk over the address space and allocate the page tables */ 408 /* walk over the address space and allocate the page tables */
318 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 409 for (pt_idx = from; pt_idx <= to; ++pt_idx) {
319 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 410 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
323 if (!entry->base.bo) { 414 if (!entry->base.bo) {
324 r = amdgpu_bo_create(adev, 415 r = amdgpu_bo_create(adev,
325 amdgpu_vm_bo_size(adev, level), 416 amdgpu_vm_bo_size(adev, level),
326 AMDGPU_GPU_PAGE_SIZE, true, 417 AMDGPU_GPU_PAGE_SIZE,
327 AMDGPU_GEM_DOMAIN_VRAM, 418 AMDGPU_GEM_DOMAIN_VRAM, flags,
328 flags, 419 ttm_bo_type_kernel, resv, &pt);
329 NULL, resv, init_value, &pt);
330 if (r) 420 if (r)
331 return r; 421 return r;
332 422
423 r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
424 if (r) {
425 amdgpu_bo_unref(&pt->shadow);
426 amdgpu_bo_unref(&pt);
427 return r;
428 }
429
333 if (vm->use_cpu_for_update) { 430 if (vm->use_cpu_for_update) {
334 r = amdgpu_bo_kmap(pt, NULL); 431 r = amdgpu_bo_kmap(pt, NULL);
335 if (r) { 432 if (r) {
433 amdgpu_bo_unref(&pt->shadow);
336 amdgpu_bo_unref(&pt); 434 amdgpu_bo_unref(&pt);
337 return r; 435 return r;
338 } 436 }
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
356 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 454 uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
357 ((1 << shift) - 1); 455 ((1 << shift) - 1);
358 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 456 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
359 sub_eaddr, level); 457 sub_eaddr, level, ats);
360 if (r) 458 if (r)
361 return r; 459 return r;
362 } 460 }
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
379 struct amdgpu_vm *vm, 477 struct amdgpu_vm *vm,
380 uint64_t saddr, uint64_t size) 478 uint64_t saddr, uint64_t size)
381{ 479{
382 uint64_t last_pfn;
383 uint64_t eaddr; 480 uint64_t eaddr;
481 bool ats = false;
384 482
385 /* validate the parameters */ 483 /* validate the parameters */
386 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 484 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
387 return -EINVAL; 485 return -EINVAL;
388 486
389 eaddr = saddr + size - 1; 487 eaddr = saddr + size - 1;
390 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 488
391 if (last_pfn >= adev->vm_manager.max_pfn) { 489 if (vm->pte_support_ats)
392 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 490 ats = saddr < AMDGPU_VA_HOLE_START;
393 last_pfn, adev->vm_manager.max_pfn);
394 return -EINVAL;
395 }
396 491
397 saddr /= AMDGPU_GPU_PAGE_SIZE; 492 saddr /= AMDGPU_GPU_PAGE_SIZE;
398 eaddr /= AMDGPU_GPU_PAGE_SIZE; 493 eaddr /= AMDGPU_GPU_PAGE_SIZE;
399 494
495 if (eaddr >= adev->vm_manager.max_pfn) {
496 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
497 eaddr, adev->vm_manager.max_pfn);
498 return -EINVAL;
499 }
500
400 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 501 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
401 adev->vm_manager.root_level); 502 adev->vm_manager.root_level, ats);
402} 503}
403 504
404/** 505/**
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
465 566
466static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) 567static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
467{ 568{
468 return (adev->mc.real_vram_size == adev->mc.visible_vram_size); 569 return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
469} 570}
470 571
471/** 572/**
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
491 id->oa_base != job->oa_base || 592 id->oa_base != job->oa_base ||
492 id->oa_size != job->oa_size); 593 id->oa_size != job->oa_size);
493 bool vm_flush_needed = job->vm_needs_flush; 594 bool vm_flush_needed = job->vm_needs_flush;
595 bool pasid_mapping_needed = id->pasid != job->pasid ||
596 !id->pasid_mapping ||
597 !dma_fence_is_signaled(id->pasid_mapping);
598 struct dma_fence *fence = NULL;
494 unsigned patch_offset = 0; 599 unsigned patch_offset = 0;
495 int r; 600 int r;
496 601
497 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 602 if (amdgpu_vmid_had_gpu_reset(adev, id)) {
498 gds_switch_needed = true; 603 gds_switch_needed = true;
499 vm_flush_needed = true; 604 vm_flush_needed = true;
605 pasid_mapping_needed = true;
500 } 606 }
501 607
608 gds_switch_needed &= !!ring->funcs->emit_gds_switch;
609 vm_flush_needed &= !!ring->funcs->emit_vm_flush;
610 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
611 ring->funcs->emit_wreg;
612
502 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 613 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
503 return 0; 614 return 0;
504 615
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
508 if (need_pipe_sync) 619 if (need_pipe_sync)
509 amdgpu_ring_emit_pipeline_sync(ring); 620 amdgpu_ring_emit_pipeline_sync(ring);
510 621
511 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 622 if (vm_flush_needed) {
512 struct dma_fence *fence;
513
514 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 623 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
515 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 624 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
625 }
626
627 if (pasid_mapping_needed)
628 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
516 629
630 if (vm_flush_needed || pasid_mapping_needed) {
517 r = amdgpu_fence_emit(ring, &fence); 631 r = amdgpu_fence_emit(ring, &fence);
518 if (r) 632 if (r)
519 return r; 633 return r;
634 }
520 635
636 if (vm_flush_needed) {
521 mutex_lock(&id_mgr->lock); 637 mutex_lock(&id_mgr->lock);
522 dma_fence_put(id->last_flush); 638 dma_fence_put(id->last_flush);
523 id->last_flush = fence; 639 id->last_flush = dma_fence_get(fence);
524 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 640 id->current_gpu_reset_count =
641 atomic_read(&adev->gpu_reset_counter);
525 mutex_unlock(&id_mgr->lock); 642 mutex_unlock(&id_mgr->lock);
526 } 643 }
527 644
645 if (pasid_mapping_needed) {
646 id->pasid = job->pasid;
647 dma_fence_put(id->pasid_mapping);
648 id->pasid_mapping = dma_fence_get(fence);
649 }
650 dma_fence_put(fence);
651
528 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 652 if (ring->funcs->emit_gds_switch && gds_switch_needed) {
529 id->gds_base = job->gds_base; 653 id->gds_base = job->gds_base;
530 id->gds_size = job->gds_size; 654 id->gds_size = job->gds_size;
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
578 * amdgpu_vm_do_set_ptes - helper to call the right asic function 702 * amdgpu_vm_do_set_ptes - helper to call the right asic function
579 * 703 *
580 * @params: see amdgpu_pte_update_params definition 704 * @params: see amdgpu_pte_update_params definition
705 * @bo: PD/PT to update
581 * @pe: addr of the page entry 706 * @pe: addr of the page entry
582 * @addr: dst addr to write into pe 707 * @addr: dst addr to write into pe
583 * @count: number of page entries to update 708 * @count: number of page entries to update
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
588 * to setup the page table using the DMA. 713 * to setup the page table using the DMA.
589 */ 714 */
590static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 715static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
716 struct amdgpu_bo *bo,
591 uint64_t pe, uint64_t addr, 717 uint64_t pe, uint64_t addr,
592 unsigned count, uint32_t incr, 718 unsigned count, uint32_t incr,
593 uint64_t flags) 719 uint64_t flags)
594{ 720{
721 pe += amdgpu_bo_gpu_offset(bo);
595 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 722 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
596 723
597 if (count < 3) { 724 if (count < 3) {
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
608 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 735 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
609 * 736 *
610 * @params: see amdgpu_pte_update_params definition 737 * @params: see amdgpu_pte_update_params definition
738 * @bo: PD/PT to update
611 * @pe: addr of the page entry 739 * @pe: addr of the page entry
612 * @addr: dst addr to write into pe 740 * @addr: dst addr to write into pe
613 * @count: number of page entries to update 741 * @count: number of page entries to update
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
617 * Traces the parameters and calls the DMA function to copy the PTEs. 745 * Traces the parameters and calls the DMA function to copy the PTEs.
618 */ 746 */
619static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 747static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
748 struct amdgpu_bo *bo,
620 uint64_t pe, uint64_t addr, 749 uint64_t pe, uint64_t addr,
621 unsigned count, uint32_t incr, 750 unsigned count, uint32_t incr,
622 uint64_t flags) 751 uint64_t flags)
623{ 752{
624 uint64_t src = (params->src + (addr >> 12) * 8); 753 uint64_t src = (params->src + (addr >> 12) * 8);
625 754
626 755 pe += amdgpu_bo_gpu_offset(bo);
627 trace_amdgpu_vm_copy_ptes(pe, src, count); 756 trace_amdgpu_vm_copy_ptes(pe, src, count);
628 757
629 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 758 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
657 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 786 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
658 * 787 *
659 * @params: see amdgpu_pte_update_params definition 788 * @params: see amdgpu_pte_update_params definition
789 * @bo: PD/PT to update
660 * @pe: kmap addr of the page entry 790 * @pe: kmap addr of the page entry
661 * @addr: dst addr to write into pe 791 * @addr: dst addr to write into pe
662 * @count: number of page entries to update 792 * @count: number of page entries to update
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
666 * Write count number of PT/PD entries directly. 796 * Write count number of PT/PD entries directly.
667 */ 797 */
668static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 798static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
799 struct amdgpu_bo *bo,
669 uint64_t pe, uint64_t addr, 800 uint64_t pe, uint64_t addr,
670 unsigned count, uint32_t incr, 801 unsigned count, uint32_t incr,
671 uint64_t flags) 802 uint64_t flags)
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
673 unsigned int i; 804 unsigned int i;
674 uint64_t value; 805 uint64_t value;
675 806
807 pe += (unsigned long)amdgpu_bo_kptr(bo);
808
676 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 809 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
677 810
678 for (i = 0; i < count; i++) { 811 for (i = 0; i < count; i++) {
679 value = params->pages_addr ? 812 value = params->pages_addr ?
680 amdgpu_vm_map_gart(params->pages_addr, addr) : 813 amdgpu_vm_map_gart(params->pages_addr, addr) :
681 addr; 814 addr;
682 amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 815 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
683 i, value, flags); 816 i, value, flags);
684 addr += incr; 817 addr += incr;
685 } 818 }
686} 819}
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
714 struct amdgpu_vm_pt *parent, 847 struct amdgpu_vm_pt *parent,
715 struct amdgpu_vm_pt *entry) 848 struct amdgpu_vm_pt *entry)
716{ 849{
717 struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; 850 struct amdgpu_bo *bo = parent->base.bo, *pbo;
718 uint64_t pd_addr, shadow_addr = 0;
719 uint64_t pde, pt, flags; 851 uint64_t pde, pt, flags;
720 unsigned level; 852 unsigned level;
721 853
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
723 if (entry->huge) 855 if (entry->huge)
724 return; 856 return;
725 857
726 if (vm->use_cpu_for_update) { 858 for (level = 0, pbo = bo->parent; pbo; ++level)
727 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
728 } else {
729 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
730 shadow = parent->base.bo->shadow;
731 if (shadow)
732 shadow_addr = amdgpu_bo_gpu_offset(shadow);
733 }
734
735 for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
736 pbo = pbo->parent; 859 pbo = pbo->parent;
737 860
738 level += params->adev->vm_manager.root_level; 861 level += params->adev->vm_manager.root_level;
739 pt = amdgpu_bo_gpu_offset(bo); 862 pt = amdgpu_bo_gpu_offset(entry->base.bo);
740 flags = AMDGPU_PTE_VALID; 863 flags = AMDGPU_PTE_VALID;
741 amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); 864 amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
742 if (shadow) { 865 pde = (entry - parent->entries) * 8;
743 pde = shadow_addr + (entry - parent->entries) * 8; 866 if (bo->shadow)
744 params->func(params, pde, pt, 1, 0, flags); 867 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
745 } 868 params->func(params, bo, pde, pt, 1, 0, flags);
746
747 pde = pd_addr + (entry - parent->entries) * 8;
748 params->func(params, pde, pt, 1, 0, flags);
749} 869}
750 870
751/* 871/*
@@ -856,7 +976,7 @@ restart:
856 if (vm->use_cpu_for_update) { 976 if (vm->use_cpu_for_update) {
857 /* Flush HDP */ 977 /* Flush HDP */
858 mb(); 978 mb();
859 amdgpu_gart_flush_gpu_tlb(adev, 0); 979 amdgpu_asic_flush_hdp(adev, NULL);
860 } else if (params.ib->length_dw == 0) { 980 } else if (params.ib->length_dw == 0) {
861 amdgpu_job_free(job); 981 amdgpu_job_free(job);
862 } else { 982 } else {
@@ -870,11 +990,6 @@ restart:
870 amdgpu_ring_pad_ib(ring, params.ib); 990 amdgpu_ring_pad_ib(ring, params.ib);
871 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, 991 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
872 AMDGPU_FENCE_OWNER_VM, false); 992 AMDGPU_FENCE_OWNER_VM, false);
873 if (root->shadow)
874 amdgpu_sync_resv(adev, &job->sync,
875 root->shadow->tbo.resv,
876 AMDGPU_FENCE_OWNER_VM, false);
877
878 WARN_ON(params.ib->length_dw > ndw); 993 WARN_ON(params.ib->length_dw > ndw);
879 r = amdgpu_job_submit(job, ring, &vm->entity, 994 r = amdgpu_job_submit(job, ring, &vm->entity,
880 AMDGPU_FENCE_OWNER_VM, &fence); 995 AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
946 unsigned nptes, uint64_t dst, 1061 unsigned nptes, uint64_t dst,
947 uint64_t flags) 1062 uint64_t flags)
948{ 1063{
949 uint64_t pd_addr, pde; 1064 uint64_t pde;
950 1065
951 /* In the case of a mixed PT the PDE must point to it*/ 1066 /* In the case of a mixed PT the PDE must point to it*/
952 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && 1067 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
967 } 1082 }
968 1083
969 entry->huge = true; 1084 entry->huge = true;
970 amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, 1085 amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
971 &dst, &flags);
972 1086
973 if (p->func == amdgpu_vm_cpu_set_ptes) { 1087 pde = (entry - parent->entries) * 8;
974 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 1088 if (parent->base.bo->shadow)
975 } else { 1089 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
976 if (parent->base.bo->shadow) { 1090 p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
977 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
978 pde = pd_addr + (entry - parent->entries) * 8;
979 p->func(p, pde, dst, 1, 0, flags);
980 }
981 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
982 }
983 pde = pd_addr + (entry - parent->entries) * 8;
984 p->func(p, pde, dst, 1, 0, flags);
985} 1091}
986 1092
987/** 1093/**
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1007 uint64_t addr, pe_start; 1113 uint64_t addr, pe_start;
1008 struct amdgpu_bo *pt; 1114 struct amdgpu_bo *pt;
1009 unsigned nptes; 1115 unsigned nptes;
1010 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
1011 1116
1012 /* walk over the address space and update the page tables */ 1117 /* walk over the address space and update the page tables */
1013 for (addr = start; addr < end; addr += nptes, 1118 for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1030 continue; 1135 continue;
1031 1136
1032 pt = entry->base.bo; 1137 pt = entry->base.bo;
1033 if (use_cpu_update) { 1138 pe_start = (addr & mask) * 8;
1034 pe_start = (unsigned long)amdgpu_bo_kptr(pt); 1139 if (pt->shadow)
1035 } else { 1140 params->func(params, pt->shadow, pe_start, dst, nptes,
1036 if (pt->shadow) { 1141 AMDGPU_GPU_PAGE_SIZE, flags);
1037 pe_start = amdgpu_bo_gpu_offset(pt->shadow); 1142 params->func(params, pt, pe_start, dst, nptes,
1038 pe_start += (addr & mask) * 8;
1039 params->func(params, pe_start, dst, nptes,
1040 AMDGPU_GPU_PAGE_SIZE, flags);
1041 }
1042 pe_start = amdgpu_bo_gpu_offset(pt);
1043 }
1044
1045 pe_start += (addr & mask) * 8;
1046 params->func(params, pe_start, dst, nptes,
1047 AMDGPU_GPU_PAGE_SIZE, flags); 1143 AMDGPU_GPU_PAGE_SIZE, flags);
1048 } 1144 }
1049 1145
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1204 1300
1205 } else { 1301 } else {
1206 /* set page commands needed */ 1302 /* set page commands needed */
1207 ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; 1303 ndw += ncmds * 10;
1208 1304
1209 /* extra commands for begin/end fragments */ 1305 /* extra commands for begin/end fragments */
1210 ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw 1306 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1211 * adev->vm_manager.fragment_size;
1212 1307
1213 params.func = amdgpu_vm_do_set_ptes; 1308 params.func = amdgpu_vm_do_set_ptes;
1214 } 1309 }
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1457 if (vm->use_cpu_for_update) { 1552 if (vm->use_cpu_for_update) {
1458 /* Flush HDP */ 1553 /* Flush HDP */
1459 mb(); 1554 mb();
1460 amdgpu_gart_flush_gpu_tlb(adev, 0); 1555 amdgpu_asic_flush_hdp(adev, NULL);
1461 } 1556 }
1462 1557
1463 spin_lock(&vm->status_lock); 1558 spin_lock(&vm->status_lock);
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1485 1580
1486 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1581 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1487 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1582 enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1488 adev->gart.gart_funcs->set_prt(adev, enable); 1583 adev->gmc.gmc_funcs->set_prt(adev, enable);
1489 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1584 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1490} 1585}
1491 1586
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1494 */ 1589 */
1495static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1590static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1496{ 1591{
1497 if (!adev->gart.gart_funcs->set_prt) 1592 if (!adev->gmc.gmc_funcs->set_prt)
1498 return; 1593 return;
1499 1594
1500 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1595 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1529{ 1624{
1530 struct amdgpu_prt_cb *cb; 1625 struct amdgpu_prt_cb *cb;
1531 1626
1532 if (!adev->gart.gart_funcs->set_prt) 1627 if (!adev->gmc.gmc_funcs->set_prt)
1533 return; 1628 return;
1534 1629
1535 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1630 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1623 struct dma_fence **fence) 1718 struct dma_fence **fence)
1624{ 1719{
1625 struct amdgpu_bo_va_mapping *mapping; 1720 struct amdgpu_bo_va_mapping *mapping;
1721 uint64_t init_pte_value = 0;
1626 struct dma_fence *f = NULL; 1722 struct dma_fence *f = NULL;
1627 int r; 1723 int r;
1628 uint64_t init_pte_value = 0;
1629 1724
1630 while (!list_empty(&vm->freed)) { 1725 while (!list_empty(&vm->freed)) {
1631 mapping = list_first_entry(&vm->freed, 1726 mapping = list_first_entry(&vm->freed,
1632 struct amdgpu_bo_va_mapping, list); 1727 struct amdgpu_bo_va_mapping, list);
1633 list_del(&mapping->list); 1728 list_del(&mapping->list);
1634 1729
1635 if (vm->pte_support_ats) 1730 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1636 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 1731 init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1637 1732
1638 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 1733 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2262{ 2357{
2263 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2358 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2264 AMDGPU_VM_PTE_COUNT(adev) * 8); 2359 AMDGPU_VM_PTE_COUNT(adev) * 8);
2265 uint64_t init_pde_value = 0, flags;
2266 unsigned ring_instance; 2360 unsigned ring_instance;
2267 struct amdgpu_ring *ring; 2361 struct amdgpu_ring *ring;
2268 struct drm_sched_rq *rq; 2362 struct drm_sched_rq *rq;
2269 unsigned long size; 2363 unsigned long size;
2364 uint64_t flags;
2270 int r, i; 2365 int r, i;
2271 2366
2272 vm->va = RB_ROOT_CACHED; 2367 vm->va = RB_ROOT_CACHED;
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2295 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2390 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2296 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2391 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2297 2392
2298 if (adev->asic_type == CHIP_RAVEN) { 2393 if (adev->asic_type == CHIP_RAVEN)
2299 vm->pte_support_ats = true; 2394 vm->pte_support_ats = true;
2300 init_pde_value = AMDGPU_PTE_DEFAULT_ATC 2395 } else {
2301 | AMDGPU_PDE_PTE;
2302
2303 }
2304 } else
2305 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2396 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2306 AMDGPU_VM_USE_CPU_FOR_GFX); 2397 AMDGPU_VM_USE_CPU_FOR_GFX);
2398 }
2307 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2399 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2308 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2400 vm->use_cpu_for_update ? "CPU" : "SDMA");
2309 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2401 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2310 "CPU update of VM recommended only for large BAR system\n"); 2402 "CPU update of VM recommended only for large BAR system\n");
2311 vm->last_update = NULL; 2403 vm->last_update = NULL;
2312 2404
2313 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2405 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2314 AMDGPU_GEM_CREATE_VRAM_CLEARED;
2315 if (vm->use_cpu_for_update) 2406 if (vm->use_cpu_for_update)
2316 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2407 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2317 else 2408 else
2318 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2409 flags |= AMDGPU_GEM_CREATE_SHADOW;
2319 AMDGPU_GEM_CREATE_SHADOW);
2320 2410
2321 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2411 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2322 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, 2412 r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
2323 flags, NULL, NULL, init_pde_value, 2413 ttm_bo_type_kernel, NULL, &vm->root.base.bo);
2324 &vm->root.base.bo);
2325 if (r) 2414 if (r)
2326 goto error_free_sched_entity; 2415 goto error_free_sched_entity;
2327 2416
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2329 if (r) 2418 if (r)
2330 goto error_free_root; 2419 goto error_free_root;
2331 2420
2421 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2422 adev->vm_manager.root_level,
2423 vm->pte_support_ats);
2424 if (r)
2425 goto error_unreserve;
2426
2332 vm->root.base.vm = vm; 2427 vm->root.base.vm = vm;
2333 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); 2428 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
2334 list_add_tail(&vm->root.base.vm_status, &vm->evicted); 2429 list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2352 2447
2353 return 0; 2448 return 0;
2354 2449
2450error_unreserve:
2451 amdgpu_bo_unreserve(vm->root.base.bo);
2452
2355error_free_root: 2453error_free_root:
2356 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2454 amdgpu_bo_unref(&vm->root.base.bo->shadow);
2357 amdgpu_bo_unref(&vm->root.base.bo); 2455 amdgpu_bo_unref(&vm->root.base.bo);
@@ -2364,6 +2462,73 @@ error_free_sched_entity:
2364} 2462}
2365 2463
2366/** 2464/**
2465 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2466 *
2467 * This only works on GFX VMs that don't have any BOs added and no
2468 * page tables allocated yet.
2469 *
2470 * Changes the following VM parameters:
2471 * - use_cpu_for_update
2472 * - pte_supports_ats
2473 * - pasid (old PASID is released, because compute manages its own PASIDs)
2474 *
2475 * Reinitializes the page directory to reflect the changed ATS
2476 * setting. May leave behind an unused shadow BO for the page
2477 * directory when switching from SDMA updates to CPU updates.
2478 *
2479 * Returns 0 for success, -errno for errors.
2480 */
2481int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2482{
2483 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2484 int r;
2485
2486 r = amdgpu_bo_reserve(vm->root.base.bo, true);
2487 if (r)
2488 return r;
2489
2490 /* Sanity checks */
2491 if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2492 r = -EINVAL;
2493 goto error;
2494 }
2495
2496 /* Check if PD needs to be reinitialized and do it before
2497 * changing any other state, in case it fails.
2498 */
2499 if (pte_support_ats != vm->pte_support_ats) {
2500 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2501 adev->vm_manager.root_level,
2502 pte_support_ats);
2503 if (r)
2504 goto error;
2505 }
2506
2507 /* Update VM state */
2508 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2509 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2510 vm->pte_support_ats = pte_support_ats;
2511 DRM_DEBUG_DRIVER("VM update mode is %s\n",
2512 vm->use_cpu_for_update ? "CPU" : "SDMA");
2513 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2514 "CPU update of VM recommended only for large BAR system\n");
2515
2516 if (vm->pasid) {
2517 unsigned long flags;
2518
2519 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2520 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2521 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2522
2523 vm->pasid = 0;
2524 }
2525
2526error:
2527 amdgpu_bo_unreserve(vm->root.base.bo);
2528 return r;
2529}
2530
2531/**
2367 * amdgpu_vm_free_levels - free PD/PT levels 2532 * amdgpu_vm_free_levels - free PD/PT levels
2368 * 2533 *
2369 * @adev: amdgpu device structure 2534 * @adev: amdgpu device structure
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2405void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2570void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2406{ 2571{
2407 struct amdgpu_bo_va_mapping *mapping, *tmp; 2572 struct amdgpu_bo_va_mapping *mapping, *tmp;
2408 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2573 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2409 struct amdgpu_bo *root; 2574 struct amdgpu_bo *root;
2410 u64 fault; 2575 u64 fault;
2411 int i, r; 2576 int i, r;
2412 2577
2578 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2579
2413 /* Clear pending page faults from IH when the VM is destroyed */ 2580 /* Clear pending page faults from IH when the VM is destroyed */
2414 while (kfifo_get(&vm->faults, &fault)) 2581 while (kfifo_get(&vm->faults, &fault))
2415 amdgpu_ih_clear_fault(adev, fault); 2582 amdgpu_ih_clear_fault(adev, fault);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 21a80f1bb2b9..30f080364c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -28,6 +28,7 @@
28#include <linux/kfifo.h> 28#include <linux/kfifo.h>
29#include <linux/rbtree.h> 29#include <linux/rbtree.h>
30#include <drm/gpu_scheduler.h> 30#include <drm/gpu_scheduler.h>
31#include <drm/drm_file.h>
31 32
32#include "amdgpu_sync.h" 33#include "amdgpu_sync.h"
33#include "amdgpu_ring.h" 34#include "amdgpu_ring.h"
@@ -99,7 +100,7 @@ struct amdgpu_bo_list_entry;
99#define AMDGPU_MMHUB 1 100#define AMDGPU_MMHUB 1
100 101
101/* hardcode that limit for now */ 102/* hardcode that limit for now */
102#define AMDGPU_VA_RESERVED_SIZE (8ULL << 20) 103#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
103 104
104/* VA hole for 48bit addresses on Vega10 */ 105/* VA hole for 48bit addresses on Vega10 */
105#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL 106#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
@@ -206,6 +207,15 @@ struct amdgpu_vm {
206 207
207 /* Limit non-retry fault storms */ 208 /* Limit non-retry fault storms */
208 unsigned int fault_credit; 209 unsigned int fault_credit;
210
211 /* Points to the KFD process VM info */
212 struct amdkfd_process_info *process_info;
213
214 /* List node in amdkfd_process_info.vm_list_head */
215 struct list_head vm_list_node;
216
217 /* Valid while the PD is reserved or fenced */
218 uint64_t pd_phys_addr;
209}; 219};
210 220
211struct amdgpu_vm_manager { 221struct amdgpu_vm_manager {
@@ -250,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
250void amdgpu_vm_manager_fini(struct amdgpu_device *adev); 260void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
251int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 261int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
252 int vm_context, unsigned int pasid); 262 int vm_context, unsigned int pasid);
263int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
253void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); 264void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
254bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, 265bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
255 unsigned int pasid); 266 unsigned int pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 4acca92f6a52..9aca653bec07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
89 uint64_t start = node->start << PAGE_SHIFT; 89 uint64_t start = node->start << PAGE_SHIFT;
90 uint64_t end = (node->size + node->start) << PAGE_SHIFT; 90 uint64_t end = (node->size + node->start) << PAGE_SHIFT;
91 91
92 if (start >= adev->mc.visible_vram_size) 92 if (start >= adev->gmc.visible_vram_size)
93 return 0; 93 return 0;
94 94
95 return (end > adev->mc.visible_vram_size ? 95 return (end > adev->gmc.visible_vram_size ?
96 adev->mc.visible_vram_size : end) - start; 96 adev->gmc.visible_vram_size : end) - start;
97} 97}
98 98
99/** 99/**
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 2af26d2da127..d702fb8e3427 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -34,7 +34,7 @@
34#include <linux/backlight.h> 34#include <linux/backlight.h>
35#include "bif/bif_4_1_d.h" 35#include "bif/bif_4_1_d.h"
36 36
37static u8 37u8
38amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) 38amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
39{ 39{
40 u8 backlight_level; 40 u8 backlight_level;
@@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
48 return backlight_level; 48 return backlight_level;
49} 49}
50 50
51static void 51void
52amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, 52amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
53 u8 backlight_level) 53 u8 backlight_level)
54{ 54{
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
index 2bdec40515ce..f77cbdef679e 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
@@ -25,6 +25,11 @@
25#define __ATOMBIOS_ENCODER_H__ 25#define __ATOMBIOS_ENCODER_H__
26 26
27u8 27u8
28amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
29void
30amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
31 u8 backlight_level);
32u8
28amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder); 33amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
29void 34void
30amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder, 35amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a0943aa8d1d3..47ef3e6e7178 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
65#define VOLTAGE_VID_OFFSET_SCALE1 625 65#define VOLTAGE_VID_OFFSET_SCALE1 625
66#define VOLTAGE_VID_OFFSET_SCALE2 100 66#define VOLTAGE_VID_OFFSET_SCALE2 100
67 67
68static const struct amd_pm_funcs ci_dpm_funcs;
69
68static const struct ci_pt_defaults defaults_hawaii_xt = 70static const struct ci_pt_defaults defaults_hawaii_xt =
69{ 71{
70 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000, 72 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
@@ -905,7 +907,7 @@ static bool ci_dpm_vblank_too_short(void *handle)
905{ 907{
906 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 908 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
907 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); 909 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
908 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; 910 u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
909 911
910 /* disable mclk switching if the refresh is >120Hz, even if the 912 /* disable mclk switching if the refresh is >120Hz, even if the
911 * blanking period would allow it 913 * blanking period would allow it
@@ -2954,7 +2956,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev,
2954 mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK; 2956 mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK;
2955 mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT); 2957 mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT);
2956 2958
2957 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { 2959 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
2958 mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK | 2960 mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK |
2959 MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK); 2961 MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK);
2960 mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) | 2962 mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) |
@@ -3077,7 +3079,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
3077 (memory_clock <= pi->mclk_strobe_mode_threshold)) 3079 (memory_clock <= pi->mclk_strobe_mode_threshold))
3078 memory_level->StrobeEnable = 1; 3080 memory_level->StrobeEnable = 1;
3079 3081
3080 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { 3082 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
3081 memory_level->StrobeRatio = 3083 memory_level->StrobeRatio =
3082 ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); 3084 ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
3083 if (pi->mclk_edc_enable_threshold && 3085 if (pi->mclk_edc_enable_threshold &&
@@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table,
3695 return ret; 3697 return ret;
3696} 3698}
3697 3699
3698static void ci_save_default_power_profile(struct amdgpu_device *adev)
3699{
3700 struct ci_power_info *pi = ci_get_pi(adev);
3701 struct SMU7_Discrete_GraphicsLevel *levels =
3702 pi->smc_state_table.GraphicsLevel;
3703 uint32_t min_level = 0;
3704
3705 pi->default_gfx_power_profile.activity_threshold =
3706 be16_to_cpu(levels[0].ActivityLevel);
3707 pi->default_gfx_power_profile.up_hyst = levels[0].UpH;
3708 pi->default_gfx_power_profile.down_hyst = levels[0].DownH;
3709 pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE;
3710
3711 pi->default_compute_power_profile = pi->default_gfx_power_profile;
3712 pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE;
3713
3714 /* Optimize compute power profile: Use only highest
3715 * 2 power levels (if more than 2 are available), Hysteresis:
3716 * 0ms up, 5ms down
3717 */
3718 if (pi->smc_state_table.GraphicsDpmLevelCount > 2)
3719 min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2;
3720 else if (pi->smc_state_table.GraphicsDpmLevelCount == 2)
3721 min_level = 1;
3722 pi->default_compute_power_profile.min_sclk =
3723 be32_to_cpu(levels[min_level].SclkFrequency);
3724
3725 pi->default_compute_power_profile.up_hyst = 0;
3726 pi->default_compute_power_profile.down_hyst = 5;
3727
3728 pi->gfx_power_profile = pi->default_gfx_power_profile;
3729 pi->compute_power_profile = pi->default_compute_power_profile;
3730}
3731
3732static int ci_init_smc_table(struct amdgpu_device *adev) 3700static int ci_init_smc_table(struct amdgpu_device *adev)
3733{ 3701{
3734 struct ci_power_info *pi = ci_get_pi(adev); 3702 struct ci_power_info *pi = ci_get_pi(adev);
@@ -3752,7 +3720,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
3752 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) 3720 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
3753 table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; 3721 table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
3754 3722
3755 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) 3723 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
3756 table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; 3724 table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
3757 3725
3758 if (ulv->supported) { 3726 if (ulv->supported) {
@@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
3874 if (ret) 3842 if (ret)
3875 return ret; 3843 return ret;
3876 3844
3877 ci_save_default_power_profile(adev);
3878
3879 return 0; 3845 return 0;
3880} 3846}
3881 3847
@@ -4549,12 +4515,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev,
4549 for (k = 0; k < table->num_entries; k++) { 4515 for (k = 0; k < table->num_entries; k++) {
4550 table->mc_reg_table_entry[k].mc_data[j] = 4516 table->mc_reg_table_entry[k].mc_data[j] =
4551 (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); 4517 (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
4552 if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) 4518 if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
4553 table->mc_reg_table_entry[k].mc_data[j] |= 0x100; 4519 table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
4554 } 4520 }
4555 j++; 4521 j++;
4556 4522
4557 if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { 4523 if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
4558 if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) 4524 if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
4559 return -EINVAL; 4525 return -EINVAL;
4560 table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; 4526 table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
@@ -6277,6 +6243,8 @@ static int ci_dpm_early_init(void *handle)
6277{ 6243{
6278 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6244 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6279 6245
6246 adev->powerplay.pp_funcs = &ci_dpm_funcs;
6247 adev->powerplay.pp_handle = adev;
6280 ci_dpm_set_irq_funcs(adev); 6248 ci_dpm_set_irq_funcs(adev);
6281 6249
6282 return 0; 6250 return 0;
@@ -6639,9 +6607,10 @@ static int ci_dpm_force_clock_level(void *handle,
6639 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6607 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6640 struct ci_power_info *pi = ci_get_pi(adev); 6608 struct ci_power_info *pi = ci_get_pi(adev);
6641 6609
6642 if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | 6610 if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL)
6643 AMD_DPM_FORCED_LEVEL_LOW | 6611 return -EINVAL;
6644 AMD_DPM_FORCED_LEVEL_HIGH)) 6612
6613 if (mask == 0)
6645 return -EINVAL; 6614 return -EINVAL;
6646 6615
6647 switch (type) { 6616 switch (type) {
@@ -6662,15 +6631,15 @@ static int ci_dpm_force_clock_level(void *handle,
6662 case PP_PCIE: 6631 case PP_PCIE:
6663 { 6632 {
6664 uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask; 6633 uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
6665 uint32_t level = 0;
6666 6634
6667 while (tmp >>= 1) 6635 if (!pi->pcie_dpm_key_disabled) {
6668 level++; 6636 if (fls(tmp) != ffs(tmp))
6669 6637 amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel);
6670 if (!pi->pcie_dpm_key_disabled) 6638 else
6671 amdgpu_ci_send_msg_to_smc_with_parameter(adev, 6639 amdgpu_ci_send_msg_to_smc_with_parameter(adev,
6672 PPSMC_MSG_PCIeDPM_ForceLevel, 6640 PPSMC_MSG_PCIeDPM_ForceLevel,
6673 level); 6641 fls(tmp) - 1);
6642 }
6674 break; 6643 break;
6675 } 6644 }
6676 default: 6645 default:
@@ -6752,222 +6721,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value)
6752 return 0; 6721 return 0;
6753} 6722}
6754 6723
6755static int ci_dpm_get_power_profile_state(void *handle,
6756 struct amd_pp_profile *query)
6757{
6758 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6759 struct ci_power_info *pi = ci_get_pi(adev);
6760
6761 if (!pi || !query)
6762 return -EINVAL;
6763
6764 if (query->type == AMD_PP_GFX_PROFILE)
6765 memcpy(query, &pi->gfx_power_profile,
6766 sizeof(struct amd_pp_profile));
6767 else if (query->type == AMD_PP_COMPUTE_PROFILE)
6768 memcpy(query, &pi->compute_power_profile,
6769 sizeof(struct amd_pp_profile));
6770 else
6771 return -EINVAL;
6772
6773 return 0;
6774}
6775
6776static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev,
6777 struct amd_pp_profile *request)
6778{
6779 struct ci_power_info *pi = ci_get_pi(adev);
6780 struct ci_dpm_table *dpm_table = &(pi->dpm_table);
6781 struct SMU7_Discrete_GraphicsLevel *levels =
6782 pi->smc_state_table.GraphicsLevel;
6783 uint32_t array = pi->dpm_table_start +
6784 offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
6785 uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) *
6786 SMU7_MAX_LEVELS_GRAPHICS;
6787 uint32_t i;
6788
6789 for (i = 0; i < dpm_table->sclk_table.count; i++) {
6790 levels[i].ActivityLevel =
6791 cpu_to_be16(request->activity_threshold);
6792 levels[i].EnabledForActivity = 1;
6793 levels[i].UpH = request->up_hyst;
6794 levels[i].DownH = request->down_hyst;
6795 }
6796
6797 return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels,
6798 array_size, pi->sram_end);
6799}
6800
6801static void ci_find_min_clock_masks(struct amdgpu_device *adev,
6802 uint32_t *sclk_mask, uint32_t *mclk_mask,
6803 uint32_t min_sclk, uint32_t min_mclk)
6804{
6805 struct ci_power_info *pi = ci_get_pi(adev);
6806 struct ci_dpm_table *dpm_table = &(pi->dpm_table);
6807 uint32_t i;
6808
6809 for (i = 0; i < dpm_table->sclk_table.count; i++) {
6810 if (dpm_table->sclk_table.dpm_levels[i].enabled &&
6811 dpm_table->sclk_table.dpm_levels[i].value >= min_sclk)
6812 *sclk_mask |= 1 << i;
6813 }
6814
6815 for (i = 0; i < dpm_table->mclk_table.count; i++) {
6816 if (dpm_table->mclk_table.dpm_levels[i].enabled &&
6817 dpm_table->mclk_table.dpm_levels[i].value >= min_mclk)
6818 *mclk_mask |= 1 << i;
6819 }
6820}
6821
6822static int ci_set_power_profile_state(struct amdgpu_device *adev,
6823 struct amd_pp_profile *request)
6824{
6825 struct ci_power_info *pi = ci_get_pi(adev);
6826 int tmp_result, result = 0;
6827 uint32_t sclk_mask = 0, mclk_mask = 0;
6828
6829 tmp_result = ci_freeze_sclk_mclk_dpm(adev);
6830 if (tmp_result) {
6831 DRM_ERROR("Failed to freeze SCLK MCLK DPM!");
6832 result = tmp_result;
6833 }
6834
6835 tmp_result = ci_populate_requested_graphic_levels(adev,
6836 request);
6837 if (tmp_result) {
6838 DRM_ERROR("Failed to populate requested graphic levels!");
6839 result = tmp_result;
6840 }
6841
6842 tmp_result = ci_unfreeze_sclk_mclk_dpm(adev);
6843 if (tmp_result) {
6844 DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!");
6845 result = tmp_result;
6846 }
6847
6848 ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask,
6849 request->min_sclk, request->min_mclk);
6850
6851 if (sclk_mask) {
6852 if (!pi->sclk_dpm_key_disabled)
6853 amdgpu_ci_send_msg_to_smc_with_parameter(
6854 adev,
6855 PPSMC_MSG_SCLKDPM_SetEnabledMask,
6856 pi->dpm_level_enable_mask.
6857 sclk_dpm_enable_mask &
6858 sclk_mask);
6859 }
6860
6861 if (mclk_mask) {
6862 if (!pi->mclk_dpm_key_disabled)
6863 amdgpu_ci_send_msg_to_smc_with_parameter(
6864 adev,
6865 PPSMC_MSG_MCLKDPM_SetEnabledMask,
6866 pi->dpm_level_enable_mask.
6867 mclk_dpm_enable_mask &
6868 mclk_mask);
6869 }
6870
6871
6872 return result;
6873}
6874
6875static int ci_dpm_set_power_profile_state(void *handle,
6876 struct amd_pp_profile *request)
6877{
6878 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6879 struct ci_power_info *pi = ci_get_pi(adev);
6880 int ret = -1;
6881
6882 if (!pi || !request)
6883 return -EINVAL;
6884
6885 if (adev->pm.dpm.forced_level !=
6886 AMD_DPM_FORCED_LEVEL_AUTO)
6887 return -EINVAL;
6888
6889 if (request->min_sclk ||
6890 request->min_mclk ||
6891 request->activity_threshold ||
6892 request->up_hyst ||
6893 request->down_hyst) {
6894 if (request->type == AMD_PP_GFX_PROFILE)
6895 memcpy(&pi->gfx_power_profile, request,
6896 sizeof(struct amd_pp_profile));
6897 else if (request->type == AMD_PP_COMPUTE_PROFILE)
6898 memcpy(&pi->compute_power_profile, request,
6899 sizeof(struct amd_pp_profile));
6900 else
6901 return -EINVAL;
6902
6903 if (request->type == pi->current_power_profile)
6904 ret = ci_set_power_profile_state(
6905 adev,
6906 request);
6907 } else {
6908 /* set power profile if it exists */
6909 switch (request->type) {
6910 case AMD_PP_GFX_PROFILE:
6911 ret = ci_set_power_profile_state(
6912 adev,
6913 &pi->gfx_power_profile);
6914 break;
6915 case AMD_PP_COMPUTE_PROFILE:
6916 ret = ci_set_power_profile_state(
6917 adev,
6918 &pi->compute_power_profile);
6919 break;
6920 default:
6921 return -EINVAL;
6922 }
6923 }
6924
6925 if (!ret)
6926 pi->current_power_profile = request->type;
6927
6928 return 0;
6929}
6930
6931static int ci_dpm_reset_power_profile_state(void *handle,
6932 struct amd_pp_profile *request)
6933{
6934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6935 struct ci_power_info *pi = ci_get_pi(adev);
6936
6937 if (!pi || !request)
6938 return -EINVAL;
6939
6940 if (request->type == AMD_PP_GFX_PROFILE) {
6941 pi->gfx_power_profile = pi->default_gfx_power_profile;
6942 return ci_dpm_set_power_profile_state(adev,
6943 &pi->gfx_power_profile);
6944 } else if (request->type == AMD_PP_COMPUTE_PROFILE) {
6945 pi->compute_power_profile =
6946 pi->default_compute_power_profile;
6947 return ci_dpm_set_power_profile_state(adev,
6948 &pi->compute_power_profile);
6949 } else
6950 return -EINVAL;
6951}
6952
6953static int ci_dpm_switch_power_profile(void *handle,
6954 enum amd_pp_profile_type type)
6955{
6956 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6957 struct ci_power_info *pi = ci_get_pi(adev);
6958 struct amd_pp_profile request = {0};
6959
6960 if (!pi)
6961 return -EINVAL;
6962
6963 if (pi->current_power_profile != type) {
6964 request.type = type;
6965 return ci_dpm_set_power_profile_state(adev, &request);
6966 }
6967
6968 return 0;
6969}
6970
6971static int ci_dpm_read_sensor(void *handle, int idx, 6724static int ci_dpm_read_sensor(void *handle, int idx,
6972 void *value, int *size) 6725 void *value, int *size)
6973{ 6726{
@@ -7011,7 +6764,7 @@ static int ci_dpm_read_sensor(void *handle, int idx,
7011 } 6764 }
7012} 6765}
7013 6766
7014const struct amd_ip_funcs ci_dpm_ip_funcs = { 6767static const struct amd_ip_funcs ci_dpm_ip_funcs = {
7015 .name = "ci_dpm", 6768 .name = "ci_dpm",
7016 .early_init = ci_dpm_early_init, 6769 .early_init = ci_dpm_early_init,
7017 .late_init = ci_dpm_late_init, 6770 .late_init = ci_dpm_late_init,
@@ -7028,8 +6781,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = {
7028 .set_powergating_state = ci_dpm_set_powergating_state, 6781 .set_powergating_state = ci_dpm_set_powergating_state,
7029}; 6782};
7030 6783
7031const struct amd_pm_funcs ci_dpm_funcs = { 6784const struct amdgpu_ip_block_version ci_smu_ip_block =
7032 .get_temperature = &ci_dpm_get_temp, 6785{
6786 .type = AMD_IP_BLOCK_TYPE_SMC,
6787 .major = 7,
6788 .minor = 0,
6789 .rev = 0,
6790 .funcs = &ci_dpm_ip_funcs,
6791};
6792
6793static const struct amd_pm_funcs ci_dpm_funcs = {
7033 .pre_set_power_state = &ci_dpm_pre_set_power_state, 6794 .pre_set_power_state = &ci_dpm_pre_set_power_state,
7034 .set_power_state = &ci_dpm_set_power_state, 6795 .set_power_state = &ci_dpm_set_power_state,
7035 .post_set_power_state = &ci_dpm_post_set_power_state, 6796 .post_set_power_state = &ci_dpm_post_set_power_state,
@@ -7053,10 +6814,6 @@ const struct amd_pm_funcs ci_dpm_funcs = {
7053 .set_mclk_od = ci_dpm_set_mclk_od, 6814 .set_mclk_od = ci_dpm_set_mclk_od,
7054 .check_state_equal = ci_check_state_equal, 6815 .check_state_equal = ci_check_state_equal,
7055 .get_vce_clock_state = amdgpu_get_vce_clock_state, 6816 .get_vce_clock_state = amdgpu_get_vce_clock_state,
7056 .get_power_profile_state = ci_dpm_get_power_profile_state,
7057 .set_power_profile_state = ci_dpm_set_power_profile_state,
7058 .reset_power_profile_state = ci_dpm_reset_power_profile_state,
7059 .switch_power_profile = ci_dpm_switch_power_profile,
7060 .read_sensor = ci_dpm_read_sensor, 6817 .read_sensor = ci_dpm_read_sensor,
7061}; 6818};
7062 6819
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index 84cbc9c45f4d..91be2996ae7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -295,13 +295,6 @@ struct ci_power_info {
295 bool fan_is_controlled_by_smc; 295 bool fan_is_controlled_by_smc;
296 u32 t_min; 296 u32 t_min;
297 u32 fan_ctrl_default_mode; 297 u32 fan_ctrl_default_mode;
298
299 /* power profile */
300 struct amd_pp_profile gfx_power_profile;
301 struct amd_pp_profile compute_power_profile;
302 struct amd_pp_profile default_gfx_power_profile;
303 struct amd_pp_profile default_compute_power_profile;
304 enum amd_pp_profile_type current_power_profile;
305}; 298};
306 299
307#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 300#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 8e59e65efd44..0df22030e713 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -67,7 +67,6 @@
67 67
68#include "amdgpu_dm.h" 68#include "amdgpu_dm.h"
69#include "amdgpu_amdkfd.h" 69#include "amdgpu_amdkfd.h"
70#include "amdgpu_powerplay.h"
71#include "dce_virtual.h" 70#include "dce_virtual.h"
72 71
73/* 72/*
@@ -1715,6 +1714,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
1715 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; 1714 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
1716} 1715}
1717 1716
1717static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
1718{
1719 if (!ring || !ring->funcs->emit_wreg) {
1720 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
1721 RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
1722 } else {
1723 amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
1724 }
1725}
1726
1727static void cik_invalidate_hdp(struct amdgpu_device *adev,
1728 struct amdgpu_ring *ring)
1729{
1730 if (!ring || !ring->funcs->emit_wreg) {
1731 WREG32(mmHDP_DEBUG0, 1);
1732 RREG32(mmHDP_DEBUG0);
1733 } else {
1734 amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
1735 }
1736}
1737
1718static const struct amdgpu_asic_funcs cik_asic_funcs = 1738static const struct amdgpu_asic_funcs cik_asic_funcs =
1719{ 1739{
1720 .read_disabled_bios = &cik_read_disabled_bios, 1740 .read_disabled_bios = &cik_read_disabled_bios,
@@ -1726,6 +1746,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
1726 .set_uvd_clocks = &cik_set_uvd_clocks, 1746 .set_uvd_clocks = &cik_set_uvd_clocks,
1727 .set_vce_clocks = &cik_set_vce_clocks, 1747 .set_vce_clocks = &cik_set_vce_clocks,
1728 .get_config_memsize = &cik_get_config_memsize, 1748 .get_config_memsize = &cik_get_config_memsize,
1749 .flush_hdp = &cik_flush_hdp,
1750 .invalidate_hdp = &cik_invalidate_hdp,
1729}; 1751};
1730 1752
1731static int cik_common_early_init(void *handle) 1753static int cik_common_early_init(void *handle)
@@ -1864,10 +1886,6 @@ static int cik_common_early_init(void *handle)
1864 return -EINVAL; 1886 return -EINVAL;
1865 } 1887 }
1866 1888
1867 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1868
1869 amdgpu_device_get_pcie_info(adev);
1870
1871 return 0; 1889 return 0;
1872} 1890}
1873 1891
@@ -1977,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
1977 amdgpu_device_ip_block_add(adev, &cik_common_ip_block); 1995 amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
1978 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); 1996 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
1979 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); 1997 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
1980 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1998 if (amdgpu_dpm == -1)
1999 amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
2000 else
2001 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1981 if (adev->enable_virtual_display) 2002 if (adev->enable_virtual_display)
1982 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2003 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1983#if defined(CONFIG_DRM_AMD_DC) 2004#if defined(CONFIG_DRM_AMD_DC)
@@ -1995,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
1995 amdgpu_device_ip_block_add(adev, &cik_common_ip_block); 2016 amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
1996 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); 2017 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
1997 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); 2018 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
1998 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2019 if (amdgpu_dpm == -1)
2020 amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
2021 else
2022 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1999 if (adev->enable_virtual_display) 2023 if (adev->enable_virtual_display)
2000 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2024 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
2001#if defined(CONFIG_DRM_AMD_DC) 2025#if defined(CONFIG_DRM_AMD_DC)
@@ -2013,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
2013 amdgpu_device_ip_block_add(adev, &cik_common_ip_block); 2037 amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
2014 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); 2038 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
2015 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); 2039 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
2016 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2040 amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
2017 if (adev->enable_virtual_display) 2041 if (adev->enable_virtual_display)
2018 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2042 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
2019#if defined(CONFIG_DRM_AMD_DC) 2043#if defined(CONFIG_DRM_AMD_DC)
@@ -2032,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
2032 amdgpu_device_ip_block_add(adev, &cik_common_ip_block); 2056 amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
2033 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); 2057 amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
2034 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); 2058 amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
2035 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2059 amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
2036 if (adev->enable_virtual_display) 2060 if (adev->enable_virtual_display)
2037 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2061 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
2038#if defined(CONFIG_DRM_AMD_DC) 2062#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index c4989f51ecef..e49c6f15a0a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -24,6 +24,8 @@
24#ifndef __CIK_H__ 24#ifndef __CIK_H__
25#define __CIK_H__ 25#define __CIK_H__
26 26
27#define CIK_FLUSH_GPU_TLB_NUM_WREG 3
28
27void cik_srbm_select(struct amdgpu_device *adev, 29void cik_srbm_select(struct amdgpu_device *adev,
28 u32 me, u32 pipe, u32 queue, u32 vmid); 30 u32 me, u32 pipe, u32 queue, u32 vmid);
29int cik_set_ip_blocks(struct amdgpu_device *adev); 31int cik_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
index c7b4349f6319..2a086610f74d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
@@ -24,8 +24,7 @@
24#ifndef __CIK_DPM_H__ 24#ifndef __CIK_DPM_H__
25#define __CIK_DPM_H__ 25#define __CIK_DPM_H__
26 26
27extern const struct amd_ip_funcs ci_dpm_ip_funcs; 27extern const struct amdgpu_ip_block_version ci_smu_ip_block;
28extern const struct amd_ip_funcs kv_dpm_ip_funcs; 28extern const struct amdgpu_ip_block_version kv_smu_ip_block;
29extern const struct amd_pm_funcs ci_dpm_funcs; 29
30extern const struct amd_pm_funcs kv_dpm_funcs;
31#endif 30#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index d5a05c19708f..44d10c2172f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
111 cik_ih_disable_interrupts(adev); 111 cik_ih_disable_interrupts(adev);
112 112
113 /* setup interrupt control */ 113 /* setup interrupt control */
114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL); 115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
281 entry->src_data[0] = dw[1] & 0xfffffff; 281 entry->src_data[0] = dw[1] & 0xfffffff;
282 entry->ring_id = dw[2] & 0xff; 282 entry->ring_id = dw[2] & 0xff;
283 entry->vmid = (dw[2] >> 8) & 0xff; 283 entry->vmid = (dw[2] >> 8) & 0xff;
284 entry->pas_id = (dw[2] >> 16) & 0xffff; 284 entry->pasid = (dw[2] >> 16) & 0xffff;
285 285
286 /* wptr/rptr are in bytes! */ 286 /* wptr/rptr are in bytes! */
287 adev->irq.ih.rptr += 16; 287 adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 6e8278e689b1..a7576255cc30 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
261 amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ 261 amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
262} 262}
263 263
264static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
265{
266 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
267 amdgpu_ring_write(ring, mmHDP_DEBUG0);
268 amdgpu_ring_write(ring, 1);
269}
270
271/** 264/**
272 * cik_sdma_ring_emit_fence - emit a fence on the DMA ring 265 * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
273 * 266 *
@@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
317 310
318 if ((adev->mman.buffer_funcs_ring == sdma0) || 311 if ((adev->mman.buffer_funcs_ring == sdma0) ||
319 (adev->mman.buffer_funcs_ring == sdma1)) 312 (adev->mman.buffer_funcs_ring == sdma1))
320 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 313 amdgpu_ttm_set_buffer_funcs_status(adev, false);
321 314
322 for (i = 0; i < adev->sdma.num_instances; i++) { 315 for (i = 0; i < adev->sdma.num_instances; i++) {
323 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 316 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
517 } 510 }
518 511
519 if (adev->mman.buffer_funcs_ring == ring) 512 if (adev->mman.buffer_funcs_ring == ring)
520 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 513 amdgpu_ttm_set_buffer_funcs_status(adev, true);
521 } 514 }
522 515
523 return 0; 516 return 0;
@@ -866,7 +859,7 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
866 amdgpu_ring_write(ring, addr & 0xfffffffc); 859 amdgpu_ring_write(ring, addr & 0xfffffffc);
867 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 860 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
868 amdgpu_ring_write(ring, seq); /* reference */ 861 amdgpu_ring_write(ring, seq); /* reference */
869 amdgpu_ring_write(ring, 0xfffffff); /* mask */ 862 amdgpu_ring_write(ring, 0xffffffff); /* mask */
870 amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */ 863 amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
871} 864}
872 865
@@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
885 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | 878 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
886 SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ 879 SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
887 880
888 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); 881 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
889 if (vmid < 8) {
890 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
891 } else {
892 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
893 }
894 amdgpu_ring_write(ring, pd_addr >> 12);
895
896 /* flush TLB */
897 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
898 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
899 amdgpu_ring_write(ring, 1 << vmid);
900 882
901 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); 883 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
902 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 884 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
906 amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ 888 amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
907} 889}
908 890
891static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
892 uint32_t reg, uint32_t val)
893{
894 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
895 amdgpu_ring_write(ring, reg);
896 amdgpu_ring_write(ring, val);
897}
898
909static void cik_enable_sdma_mgcg(struct amdgpu_device *adev, 899static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
910 bool enable) 900 bool enable)
911{ 901{
@@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1279 .set_wptr = cik_sdma_ring_set_wptr, 1269 .set_wptr = cik_sdma_ring_set_wptr,
1280 .emit_frame_size = 1270 .emit_frame_size =
1281 6 + /* cik_sdma_ring_emit_hdp_flush */ 1271 6 + /* cik_sdma_ring_emit_hdp_flush */
1282 3 + /* cik_sdma_ring_emit_hdp_invalidate */ 1272 3 + /* hdp invalidate */
1283 6 + /* cik_sdma_ring_emit_pipeline_sync */ 1273 6 + /* cik_sdma_ring_emit_pipeline_sync */
1284 12 + /* cik_sdma_ring_emit_vm_flush */ 1274 CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
1285 9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */ 1275 9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
1286 .emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */ 1276 .emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
1287 .emit_ib = cik_sdma_ring_emit_ib, 1277 .emit_ib = cik_sdma_ring_emit_ib,
@@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1289 .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync, 1279 .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
1290 .emit_vm_flush = cik_sdma_ring_emit_vm_flush, 1280 .emit_vm_flush = cik_sdma_ring_emit_vm_flush,
1291 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, 1281 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
1292 .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
1293 .test_ring = cik_sdma_ring_test_ring, 1282 .test_ring = cik_sdma_ring_test_ring,
1294 .test_ib = cik_sdma_ring_test_ib, 1283 .test_ib = cik_sdma_ring_test_ib,
1295 .insert_nop = cik_sdma_ring_insert_nop, 1284 .insert_nop = cik_sdma_ring_insert_nop,
1296 .pad_ib = cik_sdma_ring_pad_ib, 1285 .pad_ib = cik_sdma_ring_pad_ib,
1286 .emit_wreg = cik_sdma_ring_emit_wreg,
1297}; 1287};
1298 1288
1299static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) 1289static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
1391 .copy_pte = cik_sdma_vm_copy_pte, 1381 .copy_pte = cik_sdma_vm_copy_pte,
1392 1382
1393 .write_pte = cik_sdma_vm_write_pte, 1383 .write_pte = cik_sdma_vm_write_pte,
1394
1395 .set_max_nums_pte_pde = 0x1fffff >> 3,
1396 .set_pte_pde_num_dw = 10,
1397 .set_pte_pde = cik_sdma_vm_set_pte_pde, 1384 .set_pte_pde = cik_sdma_vm_set_pte_pde,
1398}; 1385};
1399 1386
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index f576e9cbbc61..960c29e17da6 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
111 cz_ih_disable_interrupts(adev); 111 cz_ih_disable_interrupts(adev);
112 112
113 /* setup interrupt control */ 113 /* setup interrupt control */
114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL); 115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
260 entry->src_data[0] = dw[1] & 0xfffffff; 260 entry->src_data[0] = dw[1] & 0xfffffff;
261 entry->ring_id = dw[2] & 0xff; 261 entry->ring_id = dw[2] & 0xff;
262 entry->vmid = (dw[2] >> 8) & 0xff; 262 entry->vmid = (dw[2] >> 8) & 0xff;
263 entry->pas_id = (dw[2] >> 16) & 0xffff; 263 entry->pasid = (dw[2] >> 16) & 0xffff;
264 264
265 /* wptr/rptr are in bytes! */ 265 /* wptr/rptr are in bytes! */
266 adev->irq.ih.rptr += 16; 266 adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index f34bc68aadfb..452f88ea46a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -190,66 +190,6 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
190 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); 190 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
191} 191}
192 192
193static bool dce_v10_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
194{
195 if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
196 CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
197 return true;
198 else
199 return false;
200}
201
202static bool dce_v10_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
203{
204 u32 pos1, pos2;
205
206 pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
207 pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
208
209 if (pos1 != pos2)
210 return true;
211 else
212 return false;
213}
214
215/**
216 * dce_v10_0_vblank_wait - vblank wait asic callback.
217 *
218 * @adev: amdgpu_device pointer
219 * @crtc: crtc to wait for vblank on
220 *
221 * Wait for vblank on the requested crtc (evergreen+).
222 */
223static void dce_v10_0_vblank_wait(struct amdgpu_device *adev, int crtc)
224{
225 unsigned i = 100;
226
227 if (crtc >= adev->mode_info.num_crtc)
228 return;
229
230 if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
231 return;
232
233 /* depending on when we hit vblank, we may be close to active; if so,
234 * wait for another frame.
235 */
236 while (dce_v10_0_is_in_vblank(adev, crtc)) {
237 if (i++ == 100) {
238 i = 0;
239 if (!dce_v10_0_is_counter_moving(adev, crtc))
240 break;
241 }
242 }
243
244 while (!dce_v10_0_is_in_vblank(adev, crtc)) {
245 if (i++ == 100) {
246 i = 0;
247 if (!dce_v10_0_is_counter_moving(adev, crtc))
248 break;
249 }
250 }
251}
252
253static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) 193static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
254{ 194{
255 if (crtc >= adev->mode_info.num_crtc) 195 if (crtc >= adev->mode_info.num_crtc)
@@ -1205,7 +1145,7 @@ static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev)
1205 u32 num_heads = 0, lb_size; 1145 u32 num_heads = 0, lb_size;
1206 int i; 1146 int i;
1207 1147
1208 amdgpu_update_display_priority(adev); 1148 amdgpu_display_update_priority(adev);
1209 1149
1210 for (i = 0; i < adev->mode_info.num_crtc; i++) { 1150 for (i = 0; i < adev->mode_info.num_crtc; i++) {
1211 if (adev->mode_info.crtcs[i]->base.enabled) 1151 if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2517,9 +2457,9 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
2517 .cursor_set2 = dce_v10_0_crtc_cursor_set2, 2457 .cursor_set2 = dce_v10_0_crtc_cursor_set2,
2518 .cursor_move = dce_v10_0_crtc_cursor_move, 2458 .cursor_move = dce_v10_0_crtc_cursor_move,
2519 .gamma_set = dce_v10_0_crtc_gamma_set, 2459 .gamma_set = dce_v10_0_crtc_gamma_set,
2520 .set_config = amdgpu_crtc_set_config, 2460 .set_config = amdgpu_display_crtc_set_config,
2521 .destroy = dce_v10_0_crtc_destroy, 2461 .destroy = dce_v10_0_crtc_destroy,
2522 .page_flip_target = amdgpu_crtc_page_flip_target, 2462 .page_flip_target = amdgpu_display_crtc_page_flip_target,
2523}; 2463};
2524 2464
2525static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) 2465static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2537,7 +2477,8 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2537 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2477 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2538 dce_v10_0_vga_enable(crtc, false); 2478 dce_v10_0_vga_enable(crtc, false);
2539 /* Make sure VBLANK and PFLIP interrupts are still enabled */ 2479 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2540 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2480 type = amdgpu_display_crtc_idx_to_irq_type(adev,
2481 amdgpu_crtc->crtc_id);
2541 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2482 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2542 amdgpu_irq_update(adev, &adev->pageflip_irq, type); 2483 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2543 drm_crtc_vblank_on(crtc); 2484 drm_crtc_vblank_on(crtc);
@@ -2676,7 +2617,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc,
2676 amdgpu_crtc->connector = NULL; 2617 amdgpu_crtc->connector = NULL;
2677 return false; 2618 return false;
2678 } 2619 }
2679 if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) 2620 if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
2680 return false; 2621 return false;
2681 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) 2622 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
2682 return false; 2623 return false;
@@ -2824,9 +2765,9 @@ static int dce_v10_0_sw_init(void *handle)
2824 adev->ddev->mode_config.preferred_depth = 24; 2765 adev->ddev->mode_config.preferred_depth = 24;
2825 adev->ddev->mode_config.prefer_shadow = 1; 2766 adev->ddev->mode_config.prefer_shadow = 1;
2826 2767
2827 adev->ddev->mode_config.fb_base = adev->mc.aper_base; 2768 adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
2828 2769
2829 r = amdgpu_modeset_create_props(adev); 2770 r = amdgpu_display_modeset_create_props(adev);
2830 if (r) 2771 if (r)
2831 return r; 2772 return r;
2832 2773
@@ -2841,7 +2782,7 @@ static int dce_v10_0_sw_init(void *handle)
2841 } 2782 }
2842 2783
2843 if (amdgpu_atombios_get_connector_info_from_object_table(adev)) 2784 if (amdgpu_atombios_get_connector_info_from_object_table(adev))
2844 amdgpu_print_display_setup(adev->ddev); 2785 amdgpu_display_print_display_setup(adev->ddev);
2845 else 2786 else
2846 return -EINVAL; 2787 return -EINVAL;
2847 2788
@@ -2921,6 +2862,11 @@ static int dce_v10_0_hw_fini(void *handle)
2921 2862
2922static int dce_v10_0_suspend(void *handle) 2863static int dce_v10_0_suspend(void *handle)
2923{ 2864{
2865 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2866
2867 adev->mode_info.bl_level =
2868 amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
2869
2924 return dce_v10_0_hw_fini(handle); 2870 return dce_v10_0_hw_fini(handle);
2925} 2871}
2926 2872
@@ -2929,6 +2875,9 @@ static int dce_v10_0_resume(void *handle)
2929 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2875 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2930 int ret; 2876 int ret;
2931 2877
2878 amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
2879 adev->mode_info.bl_level);
2880
2932 ret = dce_v10_0_hw_init(handle); 2881 ret = dce_v10_0_hw_init(handle);
2933 2882
2934 /* turn on the BL */ 2883 /* turn on the BL */
@@ -3249,7 +3198,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
3249{ 3198{
3250 unsigned crtc = entry->src_id - 1; 3199 unsigned crtc = entry->src_id - 1;
3251 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); 3200 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
3252 unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); 3201 unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc);
3253 3202
3254 switch (entry->src_data[0]) { 3203 switch (entry->src_data[0]) {
3255 case 0: /* vblank */ 3204 case 0: /* vblank */
@@ -3601,7 +3550,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
3601static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { 3550static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
3602 .bandwidth_update = &dce_v10_0_bandwidth_update, 3551 .bandwidth_update = &dce_v10_0_bandwidth_update,
3603 .vblank_get_counter = &dce_v10_0_vblank_get_counter, 3552 .vblank_get_counter = &dce_v10_0_vblank_get_counter,
3604 .vblank_wait = &dce_v10_0_vblank_wait,
3605 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, 3553 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
3606 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, 3554 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
3607 .hpd_sense = &dce_v10_0_hpd_sense, 3555 .hpd_sense = &dce_v10_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 26378bd6aba4..a7c1c584a191 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -207,66 +207,6 @@ static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
207 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); 207 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
208} 208}
209 209
210static bool dce_v11_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
211{
212 if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
213 CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
214 return true;
215 else
216 return false;
217}
218
219static bool dce_v11_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
220{
221 u32 pos1, pos2;
222
223 pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
224 pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
225
226 if (pos1 != pos2)
227 return true;
228 else
229 return false;
230}
231
232/**
233 * dce_v11_0_vblank_wait - vblank wait asic callback.
234 *
235 * @adev: amdgpu_device pointer
236 * @crtc: crtc to wait for vblank on
237 *
238 * Wait for vblank on the requested crtc (evergreen+).
239 */
240static void dce_v11_0_vblank_wait(struct amdgpu_device *adev, int crtc)
241{
242 unsigned i = 100;
243
244 if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
245 return;
246
247 if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
248 return;
249
250 /* depending on when we hit vblank, we may be close to active; if so,
251 * wait for another frame.
252 */
253 while (dce_v11_0_is_in_vblank(adev, crtc)) {
254 if (i++ == 100) {
255 i = 0;
256 if (!dce_v11_0_is_counter_moving(adev, crtc))
257 break;
258 }
259 }
260
261 while (!dce_v11_0_is_in_vblank(adev, crtc)) {
262 if (i++ == 100) {
263 i = 0;
264 if (!dce_v11_0_is_counter_moving(adev, crtc))
265 break;
266 }
267 }
268}
269
270static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) 210static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
271{ 211{
272 if (crtc < 0 || crtc >= adev->mode_info.num_crtc) 212 if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
@@ -1229,7 +1169,7 @@ static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
1229 u32 num_heads = 0, lb_size; 1169 u32 num_heads = 0, lb_size;
1230 int i; 1170 int i;
1231 1171
1232 amdgpu_update_display_priority(adev); 1172 amdgpu_display_update_priority(adev);
1233 1173
1234 for (i = 0; i < adev->mode_info.num_crtc; i++) { 1174 for (i = 0; i < adev->mode_info.num_crtc; i++) {
1235 if (adev->mode_info.crtcs[i]->base.enabled) 1175 if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2592,9 +2532,9 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
2592 .cursor_set2 = dce_v11_0_crtc_cursor_set2, 2532 .cursor_set2 = dce_v11_0_crtc_cursor_set2,
2593 .cursor_move = dce_v11_0_crtc_cursor_move, 2533 .cursor_move = dce_v11_0_crtc_cursor_move,
2594 .gamma_set = dce_v11_0_crtc_gamma_set, 2534 .gamma_set = dce_v11_0_crtc_gamma_set,
2595 .set_config = amdgpu_crtc_set_config, 2535 .set_config = amdgpu_display_crtc_set_config,
2596 .destroy = dce_v11_0_crtc_destroy, 2536 .destroy = dce_v11_0_crtc_destroy,
2597 .page_flip_target = amdgpu_crtc_page_flip_target, 2537 .page_flip_target = amdgpu_display_crtc_page_flip_target,
2598}; 2538};
2599 2539
2600static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) 2540static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2612,7 +2552,8 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2612 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2552 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2613 dce_v11_0_vga_enable(crtc, false); 2553 dce_v11_0_vga_enable(crtc, false);
2614 /* Make sure VBLANK and PFLIP interrupts are still enabled */ 2554 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2615 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2555 type = amdgpu_display_crtc_idx_to_irq_type(adev,
2556 amdgpu_crtc->crtc_id);
2616 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2557 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2617 amdgpu_irq_update(adev, &adev->pageflip_irq, type); 2558 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2618 drm_crtc_vblank_on(crtc); 2559 drm_crtc_vblank_on(crtc);
@@ -2779,7 +2720,7 @@ static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
2779 amdgpu_crtc->connector = NULL; 2720 amdgpu_crtc->connector = NULL;
2780 return false; 2721 return false;
2781 } 2722 }
2782 if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) 2723 if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
2783 return false; 2724 return false;
2784 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) 2725 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
2785 return false; 2726 return false;
@@ -2939,9 +2880,9 @@ static int dce_v11_0_sw_init(void *handle)
2939 adev->ddev->mode_config.preferred_depth = 24; 2880 adev->ddev->mode_config.preferred_depth = 24;
2940 adev->ddev->mode_config.prefer_shadow = 1; 2881 adev->ddev->mode_config.prefer_shadow = 1;
2941 2882
2942 adev->ddev->mode_config.fb_base = adev->mc.aper_base; 2883 adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
2943 2884
2944 r = amdgpu_modeset_create_props(adev); 2885 r = amdgpu_display_modeset_create_props(adev);
2945 if (r) 2886 if (r)
2946 return r; 2887 return r;
2947 2888
@@ -2957,7 +2898,7 @@ static int dce_v11_0_sw_init(void *handle)
2957 } 2898 }
2958 2899
2959 if (amdgpu_atombios_get_connector_info_from_object_table(adev)) 2900 if (amdgpu_atombios_get_connector_info_from_object_table(adev))
2960 amdgpu_print_display_setup(adev->ddev); 2901 amdgpu_display_print_display_setup(adev->ddev);
2961 else 2902 else
2962 return -EINVAL; 2903 return -EINVAL;
2963 2904
@@ -3047,6 +2988,11 @@ static int dce_v11_0_hw_fini(void *handle)
3047 2988
3048static int dce_v11_0_suspend(void *handle) 2989static int dce_v11_0_suspend(void *handle)
3049{ 2990{
2991 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2992
2993 adev->mode_info.bl_level =
2994 amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
2995
3050 return dce_v11_0_hw_fini(handle); 2996 return dce_v11_0_hw_fini(handle);
3051} 2997}
3052 2998
@@ -3055,6 +3001,9 @@ static int dce_v11_0_resume(void *handle)
3055 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3001 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3056 int ret; 3002 int ret;
3057 3003
3004 amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
3005 adev->mode_info.bl_level);
3006
3058 ret = dce_v11_0_hw_init(handle); 3007 ret = dce_v11_0_hw_init(handle);
3059 3008
3060 /* turn on the BL */ 3009 /* turn on the BL */
@@ -3368,7 +3317,8 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
3368{ 3317{
3369 unsigned crtc = entry->src_id - 1; 3318 unsigned crtc = entry->src_id - 1;
3370 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); 3319 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
3371 unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); 3320 unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
3321 crtc);
3372 3322
3373 switch (entry->src_data[0]) { 3323 switch (entry->src_data[0]) {
3374 case 0: /* vblank */ 3324 case 0: /* vblank */
@@ -3725,7 +3675,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
3725static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { 3675static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
3726 .bandwidth_update = &dce_v11_0_bandwidth_update, 3676 .bandwidth_update = &dce_v11_0_bandwidth_update,
3727 .vblank_get_counter = &dce_v11_0_vblank_get_counter, 3677 .vblank_get_counter = &dce_v11_0_vblank_get_counter,
3728 .vblank_wait = &dce_v11_0_vblank_wait,
3729 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, 3678 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
3730 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, 3679 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
3731 .hpd_sense = &dce_v11_0_hpd_sense, 3680 .hpd_sense = &dce_v11_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index bd2c4f727df6..9f67b7fd3487 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -142,64 +142,6 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
142 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); 142 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
143} 143}
144 144
145static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
146{
147 if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & CRTC_STATUS__CRTC_V_BLANK_MASK)
148 return true;
149 else
150 return false;
151}
152
153static bool dce_v6_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
154{
155 u32 pos1, pos2;
156
157 pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
158 pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
159
160 if (pos1 != pos2)
161 return true;
162 else
163 return false;
164}
165
166/**
167 * dce_v6_0_wait_for_vblank - vblank wait asic callback.
168 *
169 * @crtc: crtc to wait for vblank on
170 *
171 * Wait for vblank on the requested crtc (evergreen+).
172 */
173static void dce_v6_0_vblank_wait(struct amdgpu_device *adev, int crtc)
174{
175 unsigned i = 100;
176
177 if (crtc >= adev->mode_info.num_crtc)
178 return;
179
180 if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
181 return;
182
183 /* depending on when we hit vblank, we may be close to active; if so,
184 * wait for another frame.
185 */
186 while (dce_v6_0_is_in_vblank(adev, crtc)) {
187 if (i++ == 100) {
188 i = 0;
189 if (!dce_v6_0_is_counter_moving(adev, crtc))
190 break;
191 }
192 }
193
194 while (!dce_v6_0_is_in_vblank(adev, crtc)) {
195 if (i++ == 100) {
196 i = 0;
197 if (!dce_v6_0_is_counter_moving(adev, crtc))
198 break;
199 }
200 }
201}
202
203static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) 145static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
204{ 146{
205 if (crtc >= adev->mode_info.num_crtc) 147 if (crtc >= adev->mode_info.num_crtc)
@@ -1108,7 +1050,7 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
1108 if (!adev->mode_info.mode_config_initialized) 1050 if (!adev->mode_info.mode_config_initialized)
1109 return; 1051 return;
1110 1052
1111 amdgpu_update_display_priority(adev); 1053 amdgpu_display_update_priority(adev);
1112 1054
1113 for (i = 0; i < adev->mode_info.num_crtc; i++) { 1055 for (i = 0; i < adev->mode_info.num_crtc; i++) {
1114 if (adev->mode_info.crtcs[i]->base.enabled) 1056 if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2407,9 +2349,9 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = {
2407 .cursor_set2 = dce_v6_0_crtc_cursor_set2, 2349 .cursor_set2 = dce_v6_0_crtc_cursor_set2,
2408 .cursor_move = dce_v6_0_crtc_cursor_move, 2350 .cursor_move = dce_v6_0_crtc_cursor_move,
2409 .gamma_set = dce_v6_0_crtc_gamma_set, 2351 .gamma_set = dce_v6_0_crtc_gamma_set,
2410 .set_config = amdgpu_crtc_set_config, 2352 .set_config = amdgpu_display_crtc_set_config,
2411 .destroy = dce_v6_0_crtc_destroy, 2353 .destroy = dce_v6_0_crtc_destroy,
2412 .page_flip_target = amdgpu_crtc_page_flip_target, 2354 .page_flip_target = amdgpu_display_crtc_page_flip_target,
2413}; 2355};
2414 2356
2415static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) 2357static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2425,7 +2367,8 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2425 amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE); 2367 amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
2426 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2368 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2427 /* Make sure VBLANK and PFLIP interrupts are still enabled */ 2369 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2428 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2370 type = amdgpu_display_crtc_idx_to_irq_type(adev,
2371 amdgpu_crtc->crtc_id);
2429 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2372 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2430 amdgpu_irq_update(adev, &adev->pageflip_irq, type); 2373 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2431 drm_crtc_vblank_on(crtc); 2374 drm_crtc_vblank_on(crtc);
@@ -2562,7 +2505,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
2562 amdgpu_crtc->connector = NULL; 2505 amdgpu_crtc->connector = NULL;
2563 return false; 2506 return false;
2564 } 2507 }
2565 if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) 2508 if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
2566 return false; 2509 return false;
2567 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) 2510 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
2568 return false; 2511 return false;
@@ -2693,9 +2636,9 @@ static int dce_v6_0_sw_init(void *handle)
2693 adev->ddev->mode_config.max_height = 16384; 2636 adev->ddev->mode_config.max_height = 16384;
2694 adev->ddev->mode_config.preferred_depth = 24; 2637 adev->ddev->mode_config.preferred_depth = 24;
2695 adev->ddev->mode_config.prefer_shadow = 1; 2638 adev->ddev->mode_config.prefer_shadow = 1;
2696 adev->ddev->mode_config.fb_base = adev->mc.aper_base; 2639 adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
2697 2640
2698 r = amdgpu_modeset_create_props(adev); 2641 r = amdgpu_display_modeset_create_props(adev);
2699 if (r) 2642 if (r)
2700 return r; 2643 return r;
2701 2644
@@ -2711,7 +2654,7 @@ static int dce_v6_0_sw_init(void *handle)
2711 2654
2712 ret = amdgpu_atombios_get_connector_info_from_object_table(adev); 2655 ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
2713 if (ret) 2656 if (ret)
2714 amdgpu_print_display_setup(adev->ddev); 2657 amdgpu_display_print_display_setup(adev->ddev);
2715 else 2658 else
2716 return -EINVAL; 2659 return -EINVAL;
2717 2660
@@ -2787,6 +2730,11 @@ static int dce_v6_0_hw_fini(void *handle)
2787 2730
2788static int dce_v6_0_suspend(void *handle) 2731static int dce_v6_0_suspend(void *handle)
2789{ 2732{
2733 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2734
2735 adev->mode_info.bl_level =
2736 amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
2737
2790 return dce_v6_0_hw_fini(handle); 2738 return dce_v6_0_hw_fini(handle);
2791} 2739}
2792 2740
@@ -2795,6 +2743,9 @@ static int dce_v6_0_resume(void *handle)
2795 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2743 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2796 int ret; 2744 int ret;
2797 2745
2746 amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
2747 adev->mode_info.bl_level);
2748
2798 ret = dce_v6_0_hw_init(handle); 2749 ret = dce_v6_0_hw_init(handle);
2799 2750
2800 /* turn on the BL */ 2751 /* turn on the BL */
@@ -2966,7 +2917,8 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
2966{ 2917{
2967 unsigned crtc = entry->src_id - 1; 2918 unsigned crtc = entry->src_id - 1;
2968 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); 2919 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
2969 unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); 2920 unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
2921 crtc);
2970 2922
2971 switch (entry->src_data[0]) { 2923 switch (entry->src_data[0]) {
2972 case 0: /* vblank */ 2924 case 0: /* vblank */
@@ -3093,7 +3045,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
3093 tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; 3045 tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
3094 WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); 3046 WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
3095 schedule_work(&adev->hotplug_work); 3047 schedule_work(&adev->hotplug_work);
3096 DRM_INFO("IH: HPD%d\n", hpd + 1); 3048 DRM_DEBUG("IH: HPD%d\n", hpd + 1);
3097 } 3049 }
3098 3050
3099 return 0; 3051 return 0;
@@ -3407,7 +3359,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
3407static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { 3359static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
3408 .bandwidth_update = &dce_v6_0_bandwidth_update, 3360 .bandwidth_update = &dce_v6_0_bandwidth_update,
3409 .vblank_get_counter = &dce_v6_0_vblank_get_counter, 3361 .vblank_get_counter = &dce_v6_0_vblank_get_counter,
3410 .vblank_wait = &dce_v6_0_vblank_wait,
3411 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, 3362 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
3412 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, 3363 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
3413 .hpd_sense = &dce_v6_0_hpd_sense, 3364 .hpd_sense = &dce_v6_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index c008dc030687..f55422cbd77a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -140,66 +140,6 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
140 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); 140 spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
141} 141}
142 142
143static bool dce_v8_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
144{
145 if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
146 CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
147 return true;
148 else
149 return false;
150}
151
152static bool dce_v8_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
153{
154 u32 pos1, pos2;
155
156 pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
157 pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
158
159 if (pos1 != pos2)
160 return true;
161 else
162 return false;
163}
164
165/**
166 * dce_v8_0_vblank_wait - vblank wait asic callback.
167 *
168 * @adev: amdgpu_device pointer
169 * @crtc: crtc to wait for vblank on
170 *
171 * Wait for vblank on the requested crtc (evergreen+).
172 */
173static void dce_v8_0_vblank_wait(struct amdgpu_device *adev, int crtc)
174{
175 unsigned i = 100;
176
177 if (crtc >= adev->mode_info.num_crtc)
178 return;
179
180 if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
181 return;
182
183 /* depending on when we hit vblank, we may be close to active; if so,
184 * wait for another frame.
185 */
186 while (dce_v8_0_is_in_vblank(adev, crtc)) {
187 if (i++ == 100) {
188 i = 0;
189 if (!dce_v8_0_is_counter_moving(adev, crtc))
190 break;
191 }
192 }
193
194 while (!dce_v8_0_is_in_vblank(adev, crtc)) {
195 if (i++ == 100) {
196 i = 0;
197 if (!dce_v8_0_is_counter_moving(adev, crtc))
198 break;
199 }
200 }
201}
202
203static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) 143static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
204{ 144{
205 if (crtc >= adev->mode_info.num_crtc) 145 if (crtc >= adev->mode_info.num_crtc)
@@ -1144,7 +1084,7 @@ static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev)
1144 u32 num_heads = 0, lb_size; 1084 u32 num_heads = 0, lb_size;
1145 int i; 1085 int i;
1146 1086
1147 amdgpu_update_display_priority(adev); 1087 amdgpu_display_update_priority(adev);
1148 1088
1149 for (i = 0; i < adev->mode_info.num_crtc; i++) { 1089 for (i = 0; i < adev->mode_info.num_crtc; i++) {
1150 if (adev->mode_info.crtcs[i]->base.enabled) 1090 if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2421,9 +2361,9 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
2421 .cursor_set2 = dce_v8_0_crtc_cursor_set2, 2361 .cursor_set2 = dce_v8_0_crtc_cursor_set2,
2422 .cursor_move = dce_v8_0_crtc_cursor_move, 2362 .cursor_move = dce_v8_0_crtc_cursor_move,
2423 .gamma_set = dce_v8_0_crtc_gamma_set, 2363 .gamma_set = dce_v8_0_crtc_gamma_set,
2424 .set_config = amdgpu_crtc_set_config, 2364 .set_config = amdgpu_display_crtc_set_config,
2425 .destroy = dce_v8_0_crtc_destroy, 2365 .destroy = dce_v8_0_crtc_destroy,
2426 .page_flip_target = amdgpu_crtc_page_flip_target, 2366 .page_flip_target = amdgpu_display_crtc_page_flip_target,
2427}; 2367};
2428 2368
2429static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) 2369static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2441,7 +2381,8 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2441 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2381 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2442 dce_v8_0_vga_enable(crtc, false); 2382 dce_v8_0_vga_enable(crtc, false);
2443 /* Make sure VBLANK and PFLIP interrupts are still enabled */ 2383 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2444 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2384 type = amdgpu_display_crtc_idx_to_irq_type(adev,
2385 amdgpu_crtc->crtc_id);
2445 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2386 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2446 amdgpu_irq_update(adev, &adev->pageflip_irq, type); 2387 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2447 drm_crtc_vblank_on(crtc); 2388 drm_crtc_vblank_on(crtc);
@@ -2587,7 +2528,7 @@ static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc,
2587 amdgpu_crtc->connector = NULL; 2528 amdgpu_crtc->connector = NULL;
2588 return false; 2529 return false;
2589 } 2530 }
2590 if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) 2531 if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
2591 return false; 2532 return false;
2592 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) 2533 if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
2593 return false; 2534 return false;
@@ -2724,9 +2665,9 @@ static int dce_v8_0_sw_init(void *handle)
2724 adev->ddev->mode_config.preferred_depth = 24; 2665 adev->ddev->mode_config.preferred_depth = 24;
2725 adev->ddev->mode_config.prefer_shadow = 1; 2666 adev->ddev->mode_config.prefer_shadow = 1;
2726 2667
2727 adev->ddev->mode_config.fb_base = adev->mc.aper_base; 2668 adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
2728 2669
2729 r = amdgpu_modeset_create_props(adev); 2670 r = amdgpu_display_modeset_create_props(adev);
2730 if (r) 2671 if (r)
2731 return r; 2672 return r;
2732 2673
@@ -2741,7 +2682,7 @@ static int dce_v8_0_sw_init(void *handle)
2741 } 2682 }
2742 2683
2743 if (amdgpu_atombios_get_connector_info_from_object_table(adev)) 2684 if (amdgpu_atombios_get_connector_info_from_object_table(adev))
2744 amdgpu_print_display_setup(adev->ddev); 2685 amdgpu_display_print_display_setup(adev->ddev);
2745 else 2686 else
2746 return -EINVAL; 2687 return -EINVAL;
2747 2688
@@ -2819,6 +2760,11 @@ static int dce_v8_0_hw_fini(void *handle)
2819 2760
2820static int dce_v8_0_suspend(void *handle) 2761static int dce_v8_0_suspend(void *handle)
2821{ 2762{
2763 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2764
2765 adev->mode_info.bl_level =
2766 amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
2767
2822 return dce_v8_0_hw_fini(handle); 2768 return dce_v8_0_hw_fini(handle);
2823} 2769}
2824 2770
@@ -2827,6 +2773,9 @@ static int dce_v8_0_resume(void *handle)
2827 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2773 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2828 int ret; 2774 int ret;
2829 2775
2776 amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
2777 adev->mode_info.bl_level);
2778
2830 ret = dce_v8_0_hw_init(handle); 2779 ret = dce_v8_0_hw_init(handle);
2831 2780
2832 /* turn on the BL */ 2781 /* turn on the BL */
@@ -3063,7 +3012,8 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
3063{ 3012{
3064 unsigned crtc = entry->src_id - 1; 3013 unsigned crtc = entry->src_id - 1;
3065 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); 3014 uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
3066 unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); 3015 unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
3016 crtc);
3067 3017
3068 switch (entry->src_data[0]) { 3018 switch (entry->src_data[0]) {
3069 case 0: /* vblank */ 3019 case 0: /* vblank */
@@ -3491,7 +3441,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
3491static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { 3441static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
3492 .bandwidth_update = &dce_v8_0_bandwidth_update, 3442 .bandwidth_update = &dce_v8_0_bandwidth_update,
3493 .vblank_get_counter = &dce_v8_0_vblank_get_counter, 3443 .vblank_get_counter = &dce_v8_0_vblank_get_counter,
3494 .vblank_wait = &dce_v8_0_vblank_wait,
3495 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, 3444 .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
3496 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, 3445 .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
3497 .hpd_sense = &dce_v8_0_hpd_sense, 3446 .hpd_sense = &dce_v8_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 120dd3b26fc2..b51f05dc9582 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -48,19 +48,6 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad
48 int crtc, 48 int crtc,
49 enum amdgpu_interrupt_state state); 49 enum amdgpu_interrupt_state state);
50 50
51/**
52 * dce_virtual_vblank_wait - vblank wait asic callback.
53 *
54 * @adev: amdgpu_device pointer
55 * @crtc: crtc to wait for vblank on
56 *
57 * Wait for vblank on the requested crtc (evergreen+).
58 */
59static void dce_virtual_vblank_wait(struct amdgpu_device *adev, int crtc)
60{
61 return;
62}
63
64static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc) 51static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc)
65{ 52{
66 return 0; 53 return 0;
@@ -130,9 +117,9 @@ static const struct drm_crtc_funcs dce_virtual_crtc_funcs = {
130 .cursor_set2 = NULL, 117 .cursor_set2 = NULL,
131 .cursor_move = NULL, 118 .cursor_move = NULL,
132 .gamma_set = dce_virtual_crtc_gamma_set, 119 .gamma_set = dce_virtual_crtc_gamma_set,
133 .set_config = amdgpu_crtc_set_config, 120 .set_config = amdgpu_display_crtc_set_config,
134 .destroy = dce_virtual_crtc_destroy, 121 .destroy = dce_virtual_crtc_destroy,
135 .page_flip_target = amdgpu_crtc_page_flip_target, 122 .page_flip_target = amdgpu_display_crtc_page_flip_target,
136}; 123};
137 124
138static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) 125static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -149,7 +136,8 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
149 case DRM_MODE_DPMS_ON: 136 case DRM_MODE_DPMS_ON:
150 amdgpu_crtc->enabled = true; 137 amdgpu_crtc->enabled = true;
151 /* Make sure VBLANK interrupts are still enabled */ 138 /* Make sure VBLANK interrupts are still enabled */
152 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 139 type = amdgpu_display_crtc_idx_to_irq_type(adev,
140 amdgpu_crtc->crtc_id);
153 amdgpu_irq_update(adev, &adev->crtc_irq, type); 141 amdgpu_irq_update(adev, &adev->crtc_irq, type);
154 drm_crtc_vblank_on(crtc); 142 drm_crtc_vblank_on(crtc);
155 break; 143 break;
@@ -406,9 +394,9 @@ static int dce_virtual_sw_init(void *handle)
406 adev->ddev->mode_config.preferred_depth = 24; 394 adev->ddev->mode_config.preferred_depth = 24;
407 adev->ddev->mode_config.prefer_shadow = 1; 395 adev->ddev->mode_config.prefer_shadow = 1;
408 396
409 adev->ddev->mode_config.fb_base = adev->mc.aper_base; 397 adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
410 398
411 r = amdgpu_modeset_create_props(adev); 399 r = amdgpu_display_modeset_create_props(adev);
412 if (r) 400 if (r)
413 return r; 401 return r;
414 402
@@ -485,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
485 /* no DCE */ 473 /* no DCE */
486 break; 474 break;
487 case CHIP_VEGA10: 475 case CHIP_VEGA10:
476 case CHIP_VEGA12:
488 break; 477 break;
489 default: 478 default:
490 DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); 479 DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
@@ -653,7 +642,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
653static const struct amdgpu_display_funcs dce_virtual_display_funcs = { 642static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
654 .bandwidth_update = &dce_virtual_bandwidth_update, 643 .bandwidth_update = &dce_virtual_bandwidth_update,
655 .vblank_get_counter = &dce_virtual_vblank_get_counter, 644 .vblank_get_counter = &dce_virtual_vblank_get_counter,
656 .vblank_wait = &dce_virtual_vblank_wait,
657 .backlight_set_level = NULL, 645 .backlight_set_level = NULL,
658 .backlight_get_level = NULL, 646 .backlight_get_level = NULL,
659 .hpd_sense = &dce_virtual_hpd_sense, 647 .hpd_sense = &dce_virtual_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
index c0c4bfdcdb14..d72c25c1b987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h
+++ b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright 2015 Advanced Micro Devices, Inc. 2 * Copyright 2018 Advanced Micro Devices, Inc.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -19,15 +19,15 @@
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE. 20 * OTHER DEALINGS IN THE SOFTWARE.
21 * 21 *
22 * Authors: AMD
23 *
24 */ 22 */
23#include "amdgpu.h"
24#include "soc15.h"
25 25
26#ifndef __AMDGPU_POWERPLAY_H__ 26#include "soc15_common.h"
27#define __AMDGPU_POWERPLAY_H__ 27#include "soc15_hw_ip.h"
28
29#include "amd_shared.h"
30 28
31extern const struct amdgpu_ip_block_version amdgpu_pp_ip_block; 29int emu_soc_asic_init(struct amdgpu_device *adev)
30{
31 return 0;
32}
32 33
33#endif /* __AMDGPU_POWERPLAY_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 9870d83b68c1..cd6bf291a853 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -38,6 +38,7 @@
38#include "dce/dce_6_0_sh_mask.h" 38#include "dce/dce_6_0_sh_mask.h"
39#include "gca/gfx_7_2_enum.h" 39#include "gca/gfx_7_2_enum.h"
40#include "si_enums.h" 40#include "si_enums.h"
41#include "si.h"
41 42
42static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); 43static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
43static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); 44static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1808,17 +1809,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
1808 return r; 1809 return r;
1809} 1810}
1810 1811
1811static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1812{
1813 /* flush hdp cache */
1814 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1815 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1816 WRITE_DATA_DST_SEL(0)));
1817 amdgpu_ring_write(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL);
1818 amdgpu_ring_write(ring, 0);
1819 amdgpu_ring_write(ring, 0x1);
1820}
1821
1822static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 1812static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
1823{ 1813{
1824 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 1814 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
@@ -1826,24 +1816,6 @@ static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
1826 EVENT_INDEX(0)); 1816 EVENT_INDEX(0));
1827} 1817}
1828 1818
1829/**
1830 * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
1831 *
1832 * @adev: amdgpu_device pointer
1833 * @ridx: amdgpu ring index
1834 *
1835 * Emits an hdp invalidate on the cp.
1836 */
1837static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
1838{
1839 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1840 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
1841 WRITE_DATA_DST_SEL(0)));
1842 amdgpu_ring_write(ring, mmHDP_DEBUG0);
1843 amdgpu_ring_write(ring, 0);
1844 amdgpu_ring_write(ring, 0x1);
1845}
1846
1847static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 1819static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1848 u64 seq, unsigned flags) 1820 u64 seq, unsigned flags)
1849{ 1821{
@@ -2358,25 +2330,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
2358{ 2330{
2359 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 2331 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
2360 2332
2361 /* write new base address */ 2333 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
2362 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2363 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
2364 WRITE_DATA_DST_SEL(0)));
2365 if (vmid < 8) {
2366 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid ));
2367 } else {
2368 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
2369 }
2370 amdgpu_ring_write(ring, 0);
2371 amdgpu_ring_write(ring, pd_addr >> 12);
2372
2373 /* bits 0-15 are the VM contexts0-15 */
2374 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2375 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
2376 WRITE_DATA_DST_SEL(0)));
2377 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
2378 amdgpu_ring_write(ring, 0);
2379 amdgpu_ring_write(ring, 1 << vmid);
2380 2334
2381 /* wait for the invalidate to complete */ 2335 /* wait for the invalidate to complete */
2382 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 2336 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -2401,6 +2355,18 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
2401 } 2355 }
2402} 2356}
2403 2357
2358static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
2359 uint32_t reg, uint32_t val)
2360{
2361 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
2362
2363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
2365 WRITE_DATA_DST_SEL(0)));
2366 amdgpu_ring_write(ring, reg);
2367 amdgpu_ring_write(ring, 0);
2368 amdgpu_ring_write(ring, val);
2369}
2404 2370
2405static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) 2371static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
2406{ 2372{
@@ -3095,11 +3061,18 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
3095 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 3061 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
3096} 3062}
3097 3063
3064static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
3065 u32 me, u32 pipe, u32 q)
3066{
3067 DRM_INFO("Not implemented\n");
3068}
3069
3098static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { 3070static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
3099 .get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter, 3071 .get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter,
3100 .select_se_sh = &gfx_v6_0_select_se_sh, 3072 .select_se_sh = &gfx_v6_0_select_se_sh,
3101 .read_wave_data = &gfx_v6_0_read_wave_data, 3073 .read_wave_data = &gfx_v6_0_read_wave_data,
3102 .read_wave_sgprs = &gfx_v6_0_read_wave_sgprs, 3074 .read_wave_sgprs = &gfx_v6_0_read_wave_sgprs,
3075 .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
3103}; 3076};
3104 3077
3105static int gfx_v6_0_early_init(void *handle) 3078static int gfx_v6_0_early_init(void *handle)
@@ -3511,23 +3484,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
3511 .get_wptr = gfx_v6_0_ring_get_wptr, 3484 .get_wptr = gfx_v6_0_ring_get_wptr,
3512 .set_wptr = gfx_v6_0_ring_set_wptr_gfx, 3485 .set_wptr = gfx_v6_0_ring_set_wptr_gfx,
3513 .emit_frame_size = 3486 .emit_frame_size =
3514 5 + /* gfx_v6_0_ring_emit_hdp_flush */ 3487 5 + 5 + /* hdp flush / invalidate */
3515 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
3516 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 3488 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
3517 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ 3489 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
3518 17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ 3490 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
3519 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */ 3491 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
3520 .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ 3492 .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
3521 .emit_ib = gfx_v6_0_ring_emit_ib, 3493 .emit_ib = gfx_v6_0_ring_emit_ib,
3522 .emit_fence = gfx_v6_0_ring_emit_fence, 3494 .emit_fence = gfx_v6_0_ring_emit_fence,
3523 .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync, 3495 .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
3524 .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush, 3496 .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
3525 .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
3526 .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
3527 .test_ring = gfx_v6_0_ring_test_ring, 3497 .test_ring = gfx_v6_0_ring_test_ring,
3528 .test_ib = gfx_v6_0_ring_test_ib, 3498 .test_ib = gfx_v6_0_ring_test_ib,
3529 .insert_nop = amdgpu_ring_insert_nop, 3499 .insert_nop = amdgpu_ring_insert_nop,
3530 .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl, 3500 .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
3501 .emit_wreg = gfx_v6_0_ring_emit_wreg,
3531}; 3502};
3532 3503
3533static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { 3504static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
@@ -3538,21 +3509,19 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
3538 .get_wptr = gfx_v6_0_ring_get_wptr, 3509 .get_wptr = gfx_v6_0_ring_get_wptr,
3539 .set_wptr = gfx_v6_0_ring_set_wptr_compute, 3510 .set_wptr = gfx_v6_0_ring_set_wptr_compute,
3540 .emit_frame_size = 3511 .emit_frame_size =
3541 5 + /* gfx_v6_0_ring_emit_hdp_flush */ 3512 5 + 5 + /* hdp flush / invalidate */
3542 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
3543 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ 3513 7 + /* gfx_v6_0_ring_emit_pipeline_sync */
3544 17 + /* gfx_v6_0_ring_emit_vm_flush */ 3514 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
3545 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 3515 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
3546 .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ 3516 .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
3547 .emit_ib = gfx_v6_0_ring_emit_ib, 3517 .emit_ib = gfx_v6_0_ring_emit_ib,
3548 .emit_fence = gfx_v6_0_ring_emit_fence, 3518 .emit_fence = gfx_v6_0_ring_emit_fence,
3549 .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync, 3519 .emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
3550 .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush, 3520 .emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
3551 .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
3552 .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
3553 .test_ring = gfx_v6_0_ring_test_ring, 3521 .test_ring = gfx_v6_0_ring_test_ring,
3554 .test_ib = gfx_v6_0_ring_test_ib, 3522 .test_ib = gfx_v6_0_ring_test_ib,
3555 .insert_nop = amdgpu_ring_insert_nop, 3523 .insert_nop = amdgpu_ring_insert_nop,
3524 .emit_wreg = gfx_v6_0_ring_emit_wreg,
3556}; 3525};
3557 3526
3558static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev) 3527static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a066c5eda135..42b6144c1fd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1946,7 +1946,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
1946 if (i == 0) 1946 if (i == 0)
1947 sh_mem_base = 0; 1947 sh_mem_base = 0;
1948 else 1948 else
1949 sh_mem_base = adev->mc.shared_aperture_start >> 48; 1949 sh_mem_base = adev->gmc.shared_aperture_start >> 48;
1950 cik_srbm_select(adev, 0, 0, 0, i); 1950 cik_srbm_select(adev, 0, 0, 0, i);
1951 /* CP and shaders */ 1951 /* CP and shaders */
1952 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg); 1952 WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
@@ -2147,26 +2147,6 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
2147 EVENT_INDEX(0)); 2147 EVENT_INDEX(0));
2148} 2148}
2149 2149
2150
2151/**
2152 * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
2153 *
2154 * @adev: amdgpu_device pointer
2155 * @ridx: amdgpu ring index
2156 *
2157 * Emits an hdp invalidate on the cp.
2158 */
2159static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
2160{
2161 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2162 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2163 WRITE_DATA_DST_SEL(0) |
2164 WR_CONFIRM));
2165 amdgpu_ring_write(ring, mmHDP_DEBUG0);
2166 amdgpu_ring_write(ring, 0);
2167 amdgpu_ring_write(ring, 1);
2168}
2169
2170/** 2150/**
2171 * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring 2151 * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
2172 * 2152 *
@@ -3243,26 +3223,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3243{ 3223{
3244 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 3224 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3245 3225
3246 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3226 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3247 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3248 WRITE_DATA_DST_SEL(0)));
3249 if (vmid < 8) {
3250 amdgpu_ring_write(ring,
3251 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
3252 } else {
3253 amdgpu_ring_write(ring,
3254 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
3255 }
3256 amdgpu_ring_write(ring, 0);
3257 amdgpu_ring_write(ring, pd_addr >> 12);
3258
3259 /* bits 0-15 are the VM contexts0-15 */
3260 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3261 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3262 WRITE_DATA_DST_SEL(0)));
3263 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3264 amdgpu_ring_write(ring, 0);
3265 amdgpu_ring_write(ring, 1 << vmid);
3266 3227
3267 /* wait for the invalidate to complete */ 3228 /* wait for the invalidate to complete */
3268 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 3229 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -3289,6 +3250,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3289 } 3250 }
3290} 3251}
3291 3252
3253static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
3254 uint32_t reg, uint32_t val)
3255{
3256 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3257
3258 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3259 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3260 WRITE_DATA_DST_SEL(0)));
3261 amdgpu_ring_write(ring, reg);
3262 amdgpu_ring_write(ring, 0);
3263 amdgpu_ring_write(ring, val);
3264}
3265
3292/* 3266/*
3293 * RLC 3267 * RLC
3294 * The RLC is a multi-purpose microengine that handles a 3268 * The RLC is a multi-purpose microengine that handles a
@@ -4296,11 +4270,18 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
4296 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 4270 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
4297} 4271}
4298 4272
4273static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
4274 u32 me, u32 pipe, u32 q)
4275{
4276 cik_srbm_select(adev, me, pipe, q, 0);
4277}
4278
4299static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { 4279static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
4300 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, 4280 .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
4301 .select_se_sh = &gfx_v7_0_select_se_sh, 4281 .select_se_sh = &gfx_v7_0_select_se_sh,
4302 .read_wave_data = &gfx_v7_0_read_wave_data, 4282 .read_wave_data = &gfx_v7_0_read_wave_data,
4303 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs, 4283 .read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
4284 .select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
4304}; 4285};
4305 4286
4306static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { 4287static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
@@ -4384,34 +4365,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
4384 case CHIP_KAVERI: 4365 case CHIP_KAVERI:
4385 adev->gfx.config.max_shader_engines = 1; 4366 adev->gfx.config.max_shader_engines = 1;
4386 adev->gfx.config.max_tile_pipes = 4; 4367 adev->gfx.config.max_tile_pipes = 4;
4387 if ((adev->pdev->device == 0x1304) || 4368 adev->gfx.config.max_cu_per_sh = 8;
4388 (adev->pdev->device == 0x1305) || 4369 adev->gfx.config.max_backends_per_se = 2;
4389 (adev->pdev->device == 0x130C) ||
4390 (adev->pdev->device == 0x130F) ||
4391 (adev->pdev->device == 0x1310) ||
4392 (adev->pdev->device == 0x1311) ||
4393 (adev->pdev->device == 0x131C)) {
4394 adev->gfx.config.max_cu_per_sh = 8;
4395 adev->gfx.config.max_backends_per_se = 2;
4396 } else if ((adev->pdev->device == 0x1309) ||
4397 (adev->pdev->device == 0x130A) ||
4398 (adev->pdev->device == 0x130D) ||
4399 (adev->pdev->device == 0x1313) ||
4400 (adev->pdev->device == 0x131D)) {
4401 adev->gfx.config.max_cu_per_sh = 6;
4402 adev->gfx.config.max_backends_per_se = 2;
4403 } else if ((adev->pdev->device == 0x1306) ||
4404 (adev->pdev->device == 0x1307) ||
4405 (adev->pdev->device == 0x130B) ||
4406 (adev->pdev->device == 0x130E) ||
4407 (adev->pdev->device == 0x1315) ||
4408 (adev->pdev->device == 0x131B)) {
4409 adev->gfx.config.max_cu_per_sh = 4;
4410 adev->gfx.config.max_backends_per_se = 1;
4411 } else {
4412 adev->gfx.config.max_cu_per_sh = 3;
4413 adev->gfx.config.max_backends_per_se = 1;
4414 }
4415 adev->gfx.config.max_sh_per_se = 1; 4370 adev->gfx.config.max_sh_per_se = 1;
4416 adev->gfx.config.max_texture_channel_caches = 4; 4371 adev->gfx.config.max_texture_channel_caches = 4;
4417 adev->gfx.config.max_gprs = 256; 4372 adev->gfx.config.max_gprs = 256;
@@ -5115,10 +5070,10 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5115 .emit_frame_size = 5070 .emit_frame_size =
5116 20 + /* gfx_v7_0_ring_emit_gds_switch */ 5071 20 + /* gfx_v7_0_ring_emit_gds_switch */
5117 7 + /* gfx_v7_0_ring_emit_hdp_flush */ 5072 7 + /* gfx_v7_0_ring_emit_hdp_flush */
5118 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ 5073 5 + /* hdp invalidate */
5119 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 5074 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
5120 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ 5075 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
5121 17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ 5076 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
5122 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ 5077 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
5123 .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ 5078 .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
5124 .emit_ib = gfx_v7_0_ring_emit_ib_gfx, 5079 .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
@@ -5127,12 +5082,12 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5127 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, 5082 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5128 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, 5083 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5129 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5084 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5130 .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
5131 .test_ring = gfx_v7_0_ring_test_ring, 5085 .test_ring = gfx_v7_0_ring_test_ring,
5132 .test_ib = gfx_v7_0_ring_test_ib, 5086 .test_ib = gfx_v7_0_ring_test_ib,
5133 .insert_nop = amdgpu_ring_insert_nop, 5087 .insert_nop = amdgpu_ring_insert_nop,
5134 .pad_ib = amdgpu_ring_generic_pad_ib, 5088 .pad_ib = amdgpu_ring_generic_pad_ib,
5135 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, 5089 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
5090 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5136}; 5091};
5137 5092
5138static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 5093static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5146,9 +5101,9 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5146 .emit_frame_size = 5101 .emit_frame_size =
5147 20 + /* gfx_v7_0_ring_emit_gds_switch */ 5102 20 + /* gfx_v7_0_ring_emit_gds_switch */
5148 7 + /* gfx_v7_0_ring_emit_hdp_flush */ 5103 7 + /* gfx_v7_0_ring_emit_hdp_flush */
5149 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ 5104 5 + /* hdp invalidate */
5150 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ 5105 7 + /* gfx_v7_0_ring_emit_pipeline_sync */
5151 17 + /* gfx_v7_0_ring_emit_vm_flush */ 5106 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
5152 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ 5107 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
5153 .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ 5108 .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
5154 .emit_ib = gfx_v7_0_ring_emit_ib_compute, 5109 .emit_ib = gfx_v7_0_ring_emit_ib_compute,
@@ -5157,11 +5112,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5157 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, 5112 .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
5158 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, 5113 .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
5159 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5114 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5160 .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
5161 .test_ring = gfx_v7_0_ring_test_ring, 5115 .test_ring = gfx_v7_0_ring_test_ring,
5162 .test_ib = gfx_v7_0_ring_test_ib, 5116 .test_ib = gfx_v7_0_ring_test_ib,
5163 .insert_nop = amdgpu_ring_insert_nop, 5117 .insert_nop = amdgpu_ring_insert_nop,
5164 .pad_ib = amdgpu_ring_generic_pad_ib, 5118 .pad_ib = amdgpu_ring_generic_pad_ib,
5119 .emit_wreg = gfx_v7_0_ring_emit_wreg,
5165}; 5120};
5166 5121
5167static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) 5122static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4e694ae9f308..b0e591eaa71a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3475,6 +3475,12 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3475 WREG32(mmGRBM_GFX_INDEX, data); 3475 WREG32(mmGRBM_GFX_INDEX, data);
3476} 3476}
3477 3477
3478static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3479 u32 me, u32 pipe, u32 q)
3480{
3481 vi_srbm_select(adev, me, pipe, q, 0);
3482}
3483
3478static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3484static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3479{ 3485{
3480 u32 data, mask; 3486 u32 data, mask;
@@ -3796,7 +3802,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3802 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3803 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798 WREG32(mmSH_MEM_CONFIG, tmp); 3804 WREG32(mmSH_MEM_CONFIG, tmp);
3799 tmp = adev->mc.shared_aperture_start >> 48; 3805 tmp = adev->gmc.shared_aperture_start >> 48;
3800 WREG32(mmSH_MEM_BASES, tmp); 3806 WREG32(mmSH_MEM_BASES, tmp);
3801 } 3807 }
3802 3808
@@ -4847,6 +4853,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4847 /* reset MQD to a clean status */ 4853 /* reset MQD to a clean status */
4848 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4854 if (adev->gfx.mec.mqd_backup[mqd_idx])
4849 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4855 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4856 /* reset ring buffer */
4857 ring->wptr = 0;
4858 amdgpu_ring_clear_ring(ring);
4850 } else { 4859 } else {
4851 amdgpu_ring_clear_ring(ring); 4860 amdgpu_ring_clear_ring(ring);
4852 } 4861 }
@@ -4921,13 +4930,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4921 /* Test KCQs */ 4930 /* Test KCQs */
4922 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4931 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4923 ring = &adev->gfx.compute_ring[i]; 4932 ring = &adev->gfx.compute_ring[i];
4924 if (adev->in_gpu_reset) {
4925 /* move reset ring buffer to here to workaround
4926 * compute ring test failed
4927 */
4928 ring->wptr = 0;
4929 amdgpu_ring_clear_ring(ring);
4930 }
4931 ring->ready = true; 4933 ring->ready = true;
4932 r = amdgpu_ring_test_ring(ring); 4934 r = amdgpu_ring_test_ring(ring);
4933 if (r) 4935 if (r)
@@ -5446,6 +5448,7 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5446 .select_se_sh = &gfx_v8_0_select_se_sh, 5448 .select_se_sh = &gfx_v8_0_select_se_sh,
5447 .read_wave_data = &gfx_v8_0_read_wave_data, 5449 .read_wave_data = &gfx_v8_0_read_wave_data,
5448 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5450 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5451 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5449}; 5452};
5450 5453
5451static int gfx_v8_0_early_init(void *handle) 5454static int gfx_v8_0_early_init(void *handle)
@@ -6230,19 +6233,6 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6230 EVENT_INDEX(0)); 6233 EVENT_INDEX(0));
6231} 6234}
6232 6235
6233
6234static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6235{
6236 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6237 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6238 WRITE_DATA_DST_SEL(0) |
6239 WR_CONFIRM));
6240 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6241 amdgpu_ring_write(ring, 0);
6242 amdgpu_ring_write(ring, 1);
6243
6244}
6245
6246static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6236static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6247 struct amdgpu_ib *ib, 6237 struct amdgpu_ib *ib,
6248 unsigned vmid, bool ctx_switch) 6238 unsigned vmid, bool ctx_switch)
@@ -6332,28 +6322,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6332{ 6322{
6333 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6323 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6334 6324
6335 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6325 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6336 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6337 WRITE_DATA_DST_SEL(0)) |
6338 WR_CONFIRM);
6339 if (vmid < 8) {
6340 amdgpu_ring_write(ring,
6341 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
6342 } else {
6343 amdgpu_ring_write(ring,
6344 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
6345 }
6346 amdgpu_ring_write(ring, 0);
6347 amdgpu_ring_write(ring, pd_addr >> 12);
6348
6349 /* bits 0-15 are the VM contexts0-15 */
6350 /* invalidate the cache */
6351 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6352 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6353 WRITE_DATA_DST_SEL(0)));
6354 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6355 amdgpu_ring_write(ring, 0);
6356 amdgpu_ring_write(ring, 1 << vmid);
6357 6326
6358 /* wait for the invalidate to complete */ 6327 /* wait for the invalidate to complete */
6359 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6328 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -6617,8 +6586,22 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6617static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6586static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6618 uint32_t val) 6587 uint32_t val)
6619{ 6588{
6589 uint32_t cmd;
6590
6591 switch (ring->funcs->type) {
6592 case AMDGPU_RING_TYPE_GFX:
6593 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6594 break;
6595 case AMDGPU_RING_TYPE_KIQ:
6596 cmd = 1 << 16; /* no inc addr */
6597 break;
6598 default:
6599 cmd = WR_CONFIRM;
6600 break;
6601 }
6602
6620 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6603 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6621 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6604 amdgpu_ring_write(ring, cmd);
6622 amdgpu_ring_write(ring, reg); 6605 amdgpu_ring_write(ring, reg);
6623 amdgpu_ring_write(ring, 0); 6606 amdgpu_ring_write(ring, 0);
6624 amdgpu_ring_write(ring, val); 6607 amdgpu_ring_write(ring, val);
@@ -6871,7 +6854,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6871 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6854 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6872 5 + /* COND_EXEC */ 6855 5 + /* COND_EXEC */
6873 7 + /* PIPELINE_SYNC */ 6856 7 + /* PIPELINE_SYNC */
6874 19 + /* VM_FLUSH */ 6857 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6875 8 + /* FENCE for VM_FLUSH */ 6858 8 + /* FENCE for VM_FLUSH */
6876 20 + /* GDS switch */ 6859 20 + /* GDS switch */
6877 4 + /* double SWITCH_BUFFER, 6860 4 + /* double SWITCH_BUFFER,
@@ -6893,7 +6876,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6893 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6876 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6894 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6877 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6895 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6878 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6896 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6897 .test_ring = gfx_v8_0_ring_test_ring, 6879 .test_ring = gfx_v8_0_ring_test_ring,
6898 .test_ib = gfx_v8_0_ring_test_ib, 6880 .test_ib = gfx_v8_0_ring_test_ib,
6899 .insert_nop = amdgpu_ring_insert_nop, 6881 .insert_nop = amdgpu_ring_insert_nop,
@@ -6902,6 +6884,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6902 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6884 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6903 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6885 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6904 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6886 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6887 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6905}; 6888};
6906 6889
6907static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6890static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6915,9 +6898,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6915 .emit_frame_size = 6898 .emit_frame_size =
6916 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6899 20 + /* gfx_v8_0_ring_emit_gds_switch */
6917 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6900 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6918 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6901 5 + /* hdp_invalidate */
6919 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6902 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6920 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6903 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6921 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6904 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6922 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6905 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6923 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6906 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
@@ -6926,12 +6909,12 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6926 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6909 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6927 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6910 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6928 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6911 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6929 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6930 .test_ring = gfx_v8_0_ring_test_ring, 6912 .test_ring = gfx_v8_0_ring_test_ring,
6931 .test_ib = gfx_v8_0_ring_test_ib, 6913 .test_ib = gfx_v8_0_ring_test_ib,
6932 .insert_nop = amdgpu_ring_insert_nop, 6914 .insert_nop = amdgpu_ring_insert_nop,
6933 .pad_ib = amdgpu_ring_generic_pad_ib, 6915 .pad_ib = amdgpu_ring_generic_pad_ib,
6934 .set_priority = gfx_v8_0_ring_set_priority_compute, 6916 .set_priority = gfx_v8_0_ring_set_priority_compute,
6917 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6935}; 6918};
6936 6919
6937static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6920static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
@@ -6945,7 +6928,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6945 .emit_frame_size = 6928 .emit_frame_size =
6946 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6929 20 + /* gfx_v8_0_ring_emit_gds_switch */
6947 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6930 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6948 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6931 5 + /* hdp_invalidate */
6949 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6932 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6950 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6933 17 + /* gfx_v8_0_ring_emit_vm_flush */
6951 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6934 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
@@ -7151,12 +7134,12 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7151 } ce_payload = {}; 7134 } ce_payload = {};
7152 7135
7153 if (ring->adev->virt.chained_ib_support) { 7136 if (ring->adev->virt.chained_ib_support) {
7154 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7137 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7155 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7138 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7156 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7139 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7157 } else { 7140 } else {
7158 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + 7141 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7159 offsetof(struct vi_gfx_meta_data, ce_payload); 7142 offsetof(struct vi_gfx_meta_data, ce_payload);
7160 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7143 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7161 } 7144 }
7162 7145
@@ -7179,7 +7162,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7179 struct vi_de_ib_state_chained_ib chained; 7162 struct vi_de_ib_state_chained_ib chained;
7180 } de_payload = {}; 7163 } de_payload = {};
7181 7164
7182 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 7165 csa_addr = amdgpu_csa_vaddr(ring->adev);
7183 gds_addr = csa_addr + 4096; 7166 gds_addr = csa_addr + 4096;
7184 if (ring->adev->virt.chained_ib_support) { 7167 if (ring->adev->virt.chained_ib_support) {
7185 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7168 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c06479615e8a..9d39fd5b1822 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -57,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
57MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 57MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
58MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 58MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
59 59
60MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
61MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
62MODULE_FIRMWARE("amdgpu/vega12_me.bin");
63MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
64MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
65MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
66
60MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 67MODULE_FIRMWARE("amdgpu/raven_ce.bin");
61MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 68MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
62MODULE_FIRMWARE("amdgpu/raven_me.bin"); 69MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -144,7 +151,42 @@ static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 151 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
145}; 152};
146 153
154static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
155{
156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
166 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
167 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
168 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
169 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
172};
173
174static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
175{
176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
186};
187
147#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 188#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
189#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
148#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 190#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
149 191
150static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 192static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -168,6 +210,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
168 golden_settings_gc_9_0_vg10, 210 golden_settings_gc_9_0_vg10,
169 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 211 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
170 break; 212 break;
213 case CHIP_VEGA12:
214 soc15_program_register_sequence(adev,
215 golden_settings_gc_9_2_1,
216 ARRAY_SIZE(golden_settings_gc_9_2_1));
217 soc15_program_register_sequence(adev,
218 golden_settings_gc_9_2_1_vg12,
219 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
220 break;
171 case CHIP_RAVEN: 221 case CHIP_RAVEN:
172 soc15_program_register_sequence(adev, 222 soc15_program_register_sequence(adev,
173 golden_settings_gc_9_1, 223 golden_settings_gc_9_1,
@@ -271,58 +321,65 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
271 321
272static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 322static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
273{ 323{
274 struct amdgpu_device *adev = ring->adev; 324 struct amdgpu_device *adev = ring->adev;
275 struct amdgpu_ib ib; 325 struct amdgpu_ib ib;
276 struct dma_fence *f = NULL; 326 struct dma_fence *f = NULL;
277 uint32_t scratch; 327
278 uint32_t tmp = 0; 328 unsigned index;
279 long r; 329 uint64_t gpu_addr;
280 330 uint32_t tmp;
281 r = amdgpu_gfx_scratch_get(adev, &scratch); 331 long r;
282 if (r) { 332
283 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 333 r = amdgpu_device_wb_get(adev, &index);
284 return r; 334 if (r) {
285 } 335 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
286 WREG32(scratch, 0xCAFEDEAD); 336 return r;
287 memset(&ib, 0, sizeof(ib)); 337 }
288 r = amdgpu_ib_get(adev, NULL, 256, &ib); 338
289 if (r) { 339 gpu_addr = adev->wb.gpu_addr + (index * 4);
290 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 340 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
291 goto err1; 341 memset(&ib, 0, sizeof(ib));
292 } 342 r = amdgpu_ib_get(adev, NULL, 16, &ib);
293 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 343 if (r) {
294 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 344 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
295 ib.ptr[2] = 0xDEADBEEF; 345 goto err1;
296 ib.length_dw = 3; 346 }
297 347 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
298 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 348 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
299 if (r) 349 ib.ptr[2] = lower_32_bits(gpu_addr);
300 goto err2; 350 ib.ptr[3] = upper_32_bits(gpu_addr);
301 351 ib.ptr[4] = 0xDEADBEEF;
302 r = dma_fence_wait_timeout(f, false, timeout); 352 ib.length_dw = 5;
303 if (r == 0) { 353
304 DRM_ERROR("amdgpu: IB test timed out.\n"); 354 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
305 r = -ETIMEDOUT; 355 if (r)
306 goto err2; 356 goto err2;
307 } else if (r < 0) { 357
308 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 358 r = dma_fence_wait_timeout(f, false, timeout);
309 goto err2; 359 if (r == 0) {
310 } 360 DRM_ERROR("amdgpu: IB test timed out.\n");
311 tmp = RREG32(scratch); 361 r = -ETIMEDOUT;
312 if (tmp == 0xDEADBEEF) { 362 goto err2;
313 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 363 } else if (r < 0) {
314 r = 0; 364 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
315 } else { 365 goto err2;
316 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 366 }
317 scratch, tmp); 367
318 r = -EINVAL; 368 tmp = adev->wb.wb[index];
319 } 369 if (tmp == 0xDEADBEEF) {
370 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
371 r = 0;
372 } else {
373 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
374 r = -EINVAL;
375 }
376
320err2: 377err2:
321 amdgpu_ib_free(adev, &ib, NULL); 378 amdgpu_ib_free(adev, &ib, NULL);
322 dma_fence_put(f); 379 dma_fence_put(f);
323err1: 380err1:
324 amdgpu_gfx_scratch_free(adev, scratch); 381 amdgpu_device_wb_free(adev, index);
325 return r; 382 return r;
326} 383}
327 384
328 385
@@ -362,6 +419,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
362 case CHIP_VEGA10: 419 case CHIP_VEGA10:
363 chip_name = "vega10"; 420 chip_name = "vega10";
364 break; 421 break;
422 case CHIP_VEGA12:
423 chip_name = "vega12";
424 break;
365 case CHIP_RAVEN: 425 case CHIP_RAVEN:
366 chip_name = "raven"; 426 chip_name = "raven";
367 break; 427 break;
@@ -938,12 +998,19 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
938 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 998 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
939} 999}
940 1000
1001static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1002 u32 me, u32 pipe, u32 q)
1003{
1004 soc15_grbm_select(adev, me, pipe, q, 0);
1005}
1006
941static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1007static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
942 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1008 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
943 .select_se_sh = &gfx_v9_0_select_se_sh, 1009 .select_se_sh = &gfx_v9_0_select_se_sh,
944 .read_wave_data = &gfx_v9_0_read_wave_data, 1010 .read_wave_data = &gfx_v9_0_read_wave_data,
945 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1011 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
946 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1012 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1013 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
947}; 1014};
948 1015
949static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1016static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -961,6 +1028,15 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
961 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1028 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
962 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1029 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
963 break; 1030 break;
1031 case CHIP_VEGA12:
1032 adev->gfx.config.max_hw_contexts = 8;
1033 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1034 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1035 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1036 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1037 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1038 DRM_INFO("fix gfx.config for vega12\n");
1039 break;
964 case CHIP_RAVEN: 1040 case CHIP_RAVEN:
965 adev->gfx.config.max_hw_contexts = 8; 1041 adev->gfx.config.max_hw_contexts = 8;
966 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1042 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1242,6 +1318,7 @@ static int gfx_v9_0_sw_init(void *handle)
1242 1318
1243 switch (adev->asic_type) { 1319 switch (adev->asic_type) {
1244 case CHIP_VEGA10: 1320 case CHIP_VEGA10:
1321 case CHIP_VEGA12:
1245 case CHIP_RAVEN: 1322 case CHIP_RAVEN:
1246 adev->gfx.mec.num_mec = 2; 1323 adev->gfx.mec.num_mec = 2;
1247 break; 1324 break;
@@ -1254,23 +1331,23 @@ static int gfx_v9_0_sw_init(void *handle)
1254 adev->gfx.mec.num_queue_per_pipe = 8; 1331 adev->gfx.mec.num_queue_per_pipe = 8;
1255 1332
1256 /* KIQ event */ 1333 /* KIQ event */
1257 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); 1334 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1258 if (r) 1335 if (r)
1259 return r; 1336 return r;
1260 1337
1261 /* EOP Event */ 1338 /* EOP Event */
1262 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); 1339 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
1263 if (r) 1340 if (r)
1264 return r; 1341 return r;
1265 1342
1266 /* Privileged reg */ 1343 /* Privileged reg */
1267 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, 1344 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
1268 &adev->gfx.priv_reg_irq); 1345 &adev->gfx.priv_reg_irq);
1269 if (r) 1346 if (r)
1270 return r; 1347 return r;
1271 1348
1272 /* Privileged inst */ 1349 /* Privileged inst */
1273 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, 1350 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
1274 &adev->gfx.priv_inst_irq); 1351 &adev->gfx.priv_inst_irq);
1275 if (r) 1352 if (r)
1276 return r; 1353 return r;
@@ -1539,7 +1616,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1539 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1616 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1540 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1617 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1541 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); 1618 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1542 tmp = adev->mc.shared_aperture_start >> 48; 1619 tmp = adev->gmc.shared_aperture_start >> 48;
1543 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); 1620 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1544 } 1621 }
1545 } 1622 }
@@ -2687,6 +2764,45 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2687 return 0; 2764 return 0;
2688} 2765}
2689 2766
2767static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
2768{
2769 struct amdgpu_device *adev = ring->adev;
2770 int j;
2771
2772 /* disable the queue if it's active */
2773 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2774
2775 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2776
2777 for (j = 0; j < adev->usec_timeout; j++) {
2778 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2779 break;
2780 udelay(1);
2781 }
2782
2783 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
2784 DRM_DEBUG("KIQ dequeue request failed.\n");
2785
2786 /* Manual disable if dequeue request times out */
2787 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
2788 }
2789
2790 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2791 0);
2792 }
2793
2794 WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
2795 WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
2796 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
2797 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
2798 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
2799 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
2800 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
2801 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
2802
2803 return 0;
2804}
2805
2690static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 2806static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2691{ 2807{
2692 struct amdgpu_device *adev = ring->adev; 2808 struct amdgpu_device *adev = ring->adev;
@@ -2940,7 +3056,6 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring
2940 return r; 3056 return r;
2941} 3057}
2942 3058
2943
2944static int gfx_v9_0_hw_fini(void *handle) 3059static int gfx_v9_0_hw_fini(void *handle)
2945{ 3060{
2946 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3061 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2954,9 +3069,29 @@ static int gfx_v9_0_hw_fini(void *handle)
2954 gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 3069 gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
2955 3070
2956 if (amdgpu_sriov_vf(adev)) { 3071 if (amdgpu_sriov_vf(adev)) {
2957 pr_debug("For SRIOV client, shouldn't do anything.\n"); 3072 gfx_v9_0_cp_gfx_enable(adev, false);
3073 /* must disable polling for SRIOV when hw finished, otherwise
3074 * CPC engine may still keep fetching WB address which is already
3075 * invalid after sw finished and trigger DMAR reading error in
3076 * hypervisor side.
3077 */
3078 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2958 return 0; 3079 return 0;
2959 } 3080 }
3081
3082 /* Use deinitialize sequence from CAIL when unbinding device from driver,
3083 * otherwise KIQ is hanging when binding back
3084 */
3085 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
3086 mutex_lock(&adev->srbm_mutex);
3087 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3088 adev->gfx.kiq.ring.pipe,
3089 adev->gfx.kiq.ring.queue, 0);
3090 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3091 soc15_grbm_select(adev, 0, 0, 0, 0);
3092 mutex_unlock(&adev->srbm_mutex);
3093 }
3094
2960 gfx_v9_0_cp_enable(adev, false); 3095 gfx_v9_0_cp_enable(adev, false);
2961 gfx_v9_0_rlc_stop(adev); 3096 gfx_v9_0_rlc_stop(adev);
2962 3097
@@ -3469,6 +3604,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
3469 3604
3470 switch (adev->asic_type) { 3605 switch (adev->asic_type) {
3471 case CHIP_VEGA10: 3606 case CHIP_VEGA10:
3607 case CHIP_VEGA12:
3472 case CHIP_RAVEN: 3608 case CHIP_RAVEN:
3473 gfx_v9_0_update_gfx_clock_gating(adev, 3609 gfx_v9_0_update_gfx_clock_gating(adev,
3474 state == AMD_CG_STATE_GATE ? true : false); 3610 state == AMD_CG_STATE_GATE ? true : false);
@@ -3585,14 +3721,6 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3585 ref_and_mask, ref_and_mask, 0x20); 3721 ref_and_mask, ref_and_mask, 0x20);
3586} 3722}
3587 3723
3588static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
3589{
3590 struct amdgpu_device *adev = ring->adev;
3591
3592 gfx_v9_0_write_data_to_reg(ring, 0, true,
3593 SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
3594}
3595
3596static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 3724static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3597 struct amdgpu_ib *ib, 3725 struct amdgpu_ib *ib,
3598 unsigned vmid, bool ctx_switch) 3726 unsigned vmid, bool ctx_switch)
@@ -3686,32 +3814,10 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3686static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 3814static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3687 unsigned vmid, uint64_t pd_addr) 3815 unsigned vmid, uint64_t pd_addr)
3688{ 3816{
3689 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 3817 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3690 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3691 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
3692 uint64_t flags = AMDGPU_PTE_VALID;
3693 unsigned eng = ring->vm_inv_eng;
3694
3695 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
3696 pd_addr |= flags;
3697
3698 gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3699 hub->ctx0_ptb_addr_lo32 + (2 * vmid),
3700 lower_32_bits(pd_addr));
3701
3702 gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3703 hub->ctx0_ptb_addr_hi32 + (2 * vmid),
3704 upper_32_bits(pd_addr));
3705
3706 gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3707 hub->vm_inv_eng0_req + eng, req);
3708
3709 /* wait for the invalidate to complete */
3710 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
3711 eng, 0, 1 << vmid, 1 << vmid, 0x20);
3712 3818
3713 /* compute doesn't have PFP */ 3819 /* compute doesn't have PFP */
3714 if (usepfp) { 3820 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
3715 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 3821 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3716 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 3822 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3717 amdgpu_ring_write(ring, 0x0); 3823 amdgpu_ring_write(ring, 0x0);
@@ -3735,6 +3841,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3735 return wptr; 3841 return wptr;
3736} 3842}
3737 3843
3844static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
3845 bool acquire)
3846{
3847 struct amdgpu_device *adev = ring->adev;
3848 int pipe_num, tmp, reg;
3849 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
3850
3851 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
3852
3853 /* first me only has 2 entries, GFX and HP3D */
3854 if (ring->me > 0)
3855 pipe_num -= 2;
3856
3857 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
3858 tmp = RREG32(reg);
3859 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
3860 WREG32(reg, tmp);
3861}
3862
3863static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
3864 struct amdgpu_ring *ring,
3865 bool acquire)
3866{
3867 int i, pipe;
3868 bool reserve;
3869 struct amdgpu_ring *iring;
3870
3871 mutex_lock(&adev->gfx.pipe_reserve_mutex);
3872 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
3873 if (acquire)
3874 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3875 else
3876 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3877
3878 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
3879 /* Clear all reservations - everyone reacquires all resources */
3880 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
3881 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
3882 true);
3883
3884 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
3885 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
3886 true);
3887 } else {
3888 /* Lower all pipes without a current reservation */
3889 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
3890 iring = &adev->gfx.gfx_ring[i];
3891 pipe = amdgpu_gfx_queue_to_bit(adev,
3892 iring->me,
3893 iring->pipe,
3894 0);
3895 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3896 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3897 }
3898
3899 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
3900 iring = &adev->gfx.compute_ring[i];
3901 pipe = amdgpu_gfx_queue_to_bit(adev,
3902 iring->me,
3903 iring->pipe,
3904 0);
3905 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3906 gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3907 }
3908 }
3909
3910 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
3911}
3912
3913static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
3914 struct amdgpu_ring *ring,
3915 bool acquire)
3916{
3917 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
3918 uint32_t queue_priority = acquire ? 0xf : 0x0;
3919
3920 mutex_lock(&adev->srbm_mutex);
3921 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3922
3923 WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
3924 WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
3925
3926 soc15_grbm_select(adev, 0, 0, 0, 0);
3927 mutex_unlock(&adev->srbm_mutex);
3928}
3929
3930static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
3931 enum drm_sched_priority priority)
3932{
3933 struct amdgpu_device *adev = ring->adev;
3934 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
3935
3936 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
3937 return;
3938
3939 gfx_v9_0_hqd_set_priority(adev, ring, acquire);
3940 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
3941}
3942
3738static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 3943static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3739{ 3944{
3740 struct amdgpu_device *adev = ring->adev; 3945 struct amdgpu_device *adev = ring->adev;
@@ -3788,7 +3993,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
3788 int cnt; 3993 int cnt;
3789 3994
3790 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 3995 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
3791 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 3996 csa_addr = amdgpu_csa_vaddr(ring->adev);
3792 3997
3793 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 3998 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3794 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 3999 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -3806,7 +4011,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
3806 uint64_t csa_addr, gds_addr; 4011 uint64_t csa_addr, gds_addr;
3807 int cnt; 4012 int cnt;
3808 4013
3809 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; 4014 csa_addr = amdgpu_csa_vaddr(ring->adev);
3810 gds_addr = csa_addr + 4096; 4015 gds_addr = csa_addr + 4096;
3811 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4016 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
3812 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4017 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -3904,15 +4109,34 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
3904} 4109}
3905 4110
3906static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4111static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
3907 uint32_t val) 4112 uint32_t val)
3908{ 4113{
4114 uint32_t cmd = 0;
4115
4116 switch (ring->funcs->type) {
4117 case AMDGPU_RING_TYPE_GFX:
4118 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4119 break;
4120 case AMDGPU_RING_TYPE_KIQ:
4121 cmd = (1 << 16); /* no inc addr */
4122 break;
4123 default:
4124 cmd = WR_CONFIRM;
4125 break;
4126 }
3909 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4127 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3910 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 4128 amdgpu_ring_write(ring, cmd);
3911 amdgpu_ring_write(ring, reg); 4129 amdgpu_ring_write(ring, reg);
3912 amdgpu_ring_write(ring, 0); 4130 amdgpu_ring_write(ring, 0);
3913 amdgpu_ring_write(ring, val); 4131 amdgpu_ring_write(ring, val);
3914} 4132}
3915 4133
4134static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4135 uint32_t val, uint32_t mask)
4136{
4137 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4138}
4139
3916static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4140static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
3917 enum amdgpu_interrupt_state state) 4141 enum amdgpu_interrupt_state state)
3918{ 4142{
@@ -4199,7 +4423,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4199 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 4423 .emit_frame_size = /* totally 242 maximum if 16 IBs */
4200 5 + /* COND_EXEC */ 4424 5 + /* COND_EXEC */
4201 7 + /* PIPELINE_SYNC */ 4425 7 + /* PIPELINE_SYNC */
4202 24 + /* VM_FLUSH */ 4426 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4427 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4428 2 + /* VM_FLUSH */
4203 8 + /* FENCE for VM_FLUSH */ 4429 8 + /* FENCE for VM_FLUSH */
4204 20 + /* GDS switch */ 4430 20 + /* GDS switch */
4205 4 + /* double SWITCH_BUFFER, 4431 4 + /* double SWITCH_BUFFER,
@@ -4221,7 +4447,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4221 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 4447 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4222 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 4448 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4223 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 4449 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4224 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
4225 .test_ring = gfx_v9_0_ring_test_ring, 4450 .test_ring = gfx_v9_0_ring_test_ring,
4226 .test_ib = gfx_v9_0_ring_test_ib, 4451 .test_ib = gfx_v9_0_ring_test_ib,
4227 .insert_nop = amdgpu_ring_insert_nop, 4452 .insert_nop = amdgpu_ring_insert_nop,
@@ -4231,6 +4456,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4231 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 4456 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
4232 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 4457 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
4233 .emit_tmz = gfx_v9_0_ring_emit_tmz, 4458 .emit_tmz = gfx_v9_0_ring_emit_tmz,
4459 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4460 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4234}; 4461};
4235 4462
4236static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 4463static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4245,9 +4472,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4245 .emit_frame_size = 4472 .emit_frame_size =
4246 20 + /* gfx_v9_0_ring_emit_gds_switch */ 4473 20 + /* gfx_v9_0_ring_emit_gds_switch */
4247 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 4474 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4248 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 4475 5 + /* hdp invalidate */
4249 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 4476 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4250 24 + /* gfx_v9_0_ring_emit_vm_flush */ 4477 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4478 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4479 2 + /* gfx_v9_0_ring_emit_vm_flush */
4251 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 4480 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4252 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4481 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
4253 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 4482 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4256,11 +4485,13 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4256 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 4485 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4257 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 4486 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4258 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 4487 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4259 .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
4260 .test_ring = gfx_v9_0_ring_test_ring, 4488 .test_ring = gfx_v9_0_ring_test_ring,
4261 .test_ib = gfx_v9_0_ring_test_ib, 4489 .test_ib = gfx_v9_0_ring_test_ib,
4262 .insert_nop = amdgpu_ring_insert_nop, 4490 .insert_nop = amdgpu_ring_insert_nop,
4263 .pad_ib = amdgpu_ring_generic_pad_ib, 4491 .pad_ib = amdgpu_ring_generic_pad_ib,
4492 .set_priority = gfx_v9_0_ring_set_priority_compute,
4493 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4494 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4264}; 4495};
4265 4496
4266static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 4497static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4275,9 +4506,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4275 .emit_frame_size = 4506 .emit_frame_size =
4276 20 + /* gfx_v9_0_ring_emit_gds_switch */ 4507 20 + /* gfx_v9_0_ring_emit_gds_switch */
4277 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 4508 7 + /* gfx_v9_0_ring_emit_hdp_flush */
4278 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ 4509 5 + /* hdp invalidate */
4279 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 4510 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4280 24 + /* gfx_v9_0_ring_emit_vm_flush */ 4511 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4512 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4513 2 + /* gfx_v9_0_ring_emit_vm_flush */
4281 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 4514 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4282 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ 4515 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
4283 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 4516 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4288,6 +4521,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4288 .pad_ib = amdgpu_ring_generic_pad_ib, 4521 .pad_ib = amdgpu_ring_generic_pad_ib,
4289 .emit_rreg = gfx_v9_0_ring_emit_rreg, 4522 .emit_rreg = gfx_v9_0_ring_emit_rreg,
4290 .emit_wreg = gfx_v9_0_ring_emit_wreg, 4523 .emit_wreg = gfx_v9_0_ring_emit_wreg,
4524 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4291}; 4525};
4292 4526
4293static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 4527static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4342,6 +4576,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
4342{ 4576{
4343 switch (adev->asic_type) { 4577 switch (adev->asic_type) {
4344 case CHIP_VEGA10: 4578 case CHIP_VEGA10:
4579 case CHIP_VEGA12:
4345 case CHIP_RAVEN: 4580 case CHIP_RAVEN:
4346 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 4581 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
4347 break; 4582 break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 56f5fe4e2fee..acfbd2d749cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -40,7 +40,7 @@ static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
40 uint64_t value; 40 uint64_t value;
41 41
42 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); 42 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
43 value = adev->gart.table_addr - adev->mc.vram_start 43 value = adev->gart.table_addr - adev->gmc.vram_start
44 + adev->vm_manager.vram_base_offset; 44 + adev->vm_manager.vram_base_offset;
45 value &= 0x0000FFFFFFFFF000ULL; 45 value &= 0x0000FFFFFFFFF000ULL;
46 value |= 0x1; /*valid bit*/ 46 value |= 0x1; /*valid bit*/
@@ -57,14 +57,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
57 gfxhub_v1_0_init_gart_pt_regs(adev); 57 gfxhub_v1_0_init_gart_pt_regs(adev);
58 58
59 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, 59 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
60 (u32)(adev->mc.gart_start >> 12)); 60 (u32)(adev->gmc.gart_start >> 12));
61 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, 61 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
62 (u32)(adev->mc.gart_start >> 44)); 62 (u32)(adev->gmc.gart_start >> 44));
63 63
64 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, 64 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
65 (u32)(adev->mc.gart_end >> 12)); 65 (u32)(adev->gmc.gart_end >> 12));
66 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, 66 WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
67 (u32)(adev->mc.gart_end >> 44)); 67 (u32)(adev->gmc.gart_end >> 44));
68} 68}
69 69
70static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) 70static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -78,12 +78,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
78 78
79 /* Program the system aperture low logical page number. */ 79 /* Program the system aperture low logical page number. */
80 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 80 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
81 adev->mc.vram_start >> 18); 81 adev->gmc.vram_start >> 18);
82 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 82 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
83 adev->mc.vram_end >> 18); 83 adev->gmc.vram_end >> 18);
84 84
85 /* Set default page address. */ 85 /* Set default page address. */
86 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start 86 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
87 + adev->vm_manager.vram_base_offset; 87 + adev->vm_manager.vram_base_offset;
88 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 88 WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
89 (u32)(value >> 12)); 89 (u32)(value >> 12));
@@ -92,9 +92,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
92 92
93 /* Program "protection fault". */ 93 /* Program "protection fault". */
94 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, 94 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
95 (u32)(adev->dummy_page.addr >> 12)); 95 (u32)(adev->dummy_page_addr >> 12));
96 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, 96 WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
97 (u32)((u64)adev->dummy_page.addr >> 44)); 97 (u32)((u64)adev->dummy_page_addr >> 44));
98 98
99 WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, 99 WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
100 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); 100 ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
@@ -143,7 +143,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
143 WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); 143 WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
144 144
145 tmp = mmVM_L2_CNTL3_DEFAULT; 145 tmp = mmVM_L2_CNTL3_DEFAULT;
146 if (adev->mc.translate_further) { 146 if (adev->gmc.translate_further) {
147 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); 147 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
148 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 148 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
149 L2_CACHE_BIGK_FRAGMENT_SIZE, 9); 149 L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -195,7 +195,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
195 195
196 num_level = adev->vm_manager.num_level; 196 num_level = adev->vm_manager.num_level;
197 block_size = adev->vm_manager.block_size; 197 block_size = adev->vm_manager.block_size;
198 if (adev->mc.translate_further) 198 if (adev->gmc.translate_further)
199 num_level -= 1; 199 num_level -= 1;
200 else 200 else
201 block_size -= 9; 201 block_size -= 9;
@@ -257,9 +257,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
257 * SRIOV driver need to program them 257 * SRIOV driver need to program them
258 */ 258 */
259 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, 259 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
260 adev->mc.vram_start >> 24); 260 adev->gmc.vram_start >> 24);
261 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, 261 WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
262 adev->mc.vram_end >> 24); 262 adev->gmc.vram_end >> 24);
263 } 263 }
264 264
265 /* GART Enable. */ 265 /* GART Enable. */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 8e28270d1ea9..5617cf62c566 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -22,6 +22,7 @@
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include <drm/drm_cache.h>
25#include "amdgpu.h" 26#include "amdgpu.h"
26#include "gmc_v6_0.h" 27#include "gmc_v6_0.h"
27#include "amdgpu_ucode.h" 28#include "amdgpu_ucode.h"
@@ -36,7 +37,7 @@
36#include "dce/dce_6_0_sh_mask.h" 37#include "dce/dce_6_0_sh_mask.h"
37#include "si_enums.h" 38#include "si_enums.h"
38 39
39static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev); 40static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
40static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev); 41static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
41static int gmc_v6_0_wait_for_idle(void *handle); 42static int gmc_v6_0_wait_for_idle(void *handle);
42 43
@@ -136,19 +137,19 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
136 snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); 137 snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
137 else 138 else
138 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 139 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
139 err = request_firmware(&adev->mc.fw, fw_name, adev->dev); 140 err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
140 if (err) 141 if (err)
141 goto out; 142 goto out;
142 143
143 err = amdgpu_ucode_validate(adev->mc.fw); 144 err = amdgpu_ucode_validate(adev->gmc.fw);
144 145
145out: 146out:
146 if (err) { 147 if (err) {
147 dev_err(adev->dev, 148 dev_err(adev->dev,
148 "si_mc: Failed to load firmware \"%s\"\n", 149 "si_mc: Failed to load firmware \"%s\"\n",
149 fw_name); 150 fw_name);
150 release_firmware(adev->mc.fw); 151 release_firmware(adev->gmc.fw);
151 adev->mc.fw = NULL; 152 adev->gmc.fw = NULL;
152 } 153 }
153 return err; 154 return err;
154} 155}
@@ -161,20 +162,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
161 int i, regs_size, ucode_size; 162 int i, regs_size, ucode_size;
162 const struct mc_firmware_header_v1_0 *hdr; 163 const struct mc_firmware_header_v1_0 *hdr;
163 164
164 if (!adev->mc.fw) 165 if (!adev->gmc.fw)
165 return -EINVAL; 166 return -EINVAL;
166 167
167 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; 168 hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
168 169
169 amdgpu_ucode_print_mc_hdr(&hdr->header); 170 amdgpu_ucode_print_mc_hdr(&hdr->header);
170 171
171 adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); 172 adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
172 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); 173 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
173 new_io_mc_regs = (const __le32 *) 174 new_io_mc_regs = (const __le32 *)
174 (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); 175 (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
175 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 176 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
176 new_fw_data = (const __le32 *) 177 new_fw_data = (const __le32 *)
177 (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 178 (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
178 179
179 running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK; 180 running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;
180 181
@@ -217,12 +218,12 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
217} 218}
218 219
219static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, 220static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
220 struct amdgpu_mc *mc) 221 struct amdgpu_gmc *mc)
221{ 222{
222 u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; 223 u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
223 base <<= 24; 224 base <<= 24;
224 225
225 amdgpu_device_vram_location(adev, &adev->mc, base); 226 amdgpu_device_vram_location(adev, &adev->gmc, base);
226 amdgpu_device_gart_location(adev, mc); 227 amdgpu_device_gart_location(adev, mc);
227} 228}
228 229
@@ -259,9 +260,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
259 } 260 }
260 /* Update configuration */ 261 /* Update configuration */
261 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 262 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
262 adev->mc.vram_start >> 12); 263 adev->gmc.vram_start >> 12);
263 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 264 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
264 adev->mc.vram_end >> 12); 265 adev->gmc.vram_end >> 12);
265 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 266 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
266 adev->vram_scratch.gpu_addr >> 12); 267 adev->vram_scratch.gpu_addr >> 12);
267 WREG32(mmMC_VM_AGP_BASE, 0); 268 WREG32(mmMC_VM_AGP_BASE, 0);
@@ -319,56 +320,69 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
319 numchan = 16; 320 numchan = 16;
320 break; 321 break;
321 } 322 }
322 adev->mc.vram_width = numchan * chansize; 323 adev->gmc.vram_width = numchan * chansize;
323 /* size in MB on si */ 324 /* size in MB on si */
324 adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 325 adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
325 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 326 adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
326 327
327 if (!(adev->flags & AMD_IS_APU)) { 328 if (!(adev->flags & AMD_IS_APU)) {
328 r = amdgpu_device_resize_fb_bar(adev); 329 r = amdgpu_device_resize_fb_bar(adev);
329 if (r) 330 if (r)
330 return r; 331 return r;
331 } 332 }
332 adev->mc.aper_base = pci_resource_start(adev->pdev, 0); 333 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
333 adev->mc.aper_size = pci_resource_len(adev->pdev, 0); 334 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
334 adev->mc.visible_vram_size = adev->mc.aper_size; 335 adev->gmc.visible_vram_size = adev->gmc.aper_size;
335 336
336 /* set the gart size */ 337 /* set the gart size */
337 if (amdgpu_gart_size == -1) { 338 if (amdgpu_gart_size == -1) {
338 switch (adev->asic_type) { 339 switch (adev->asic_type) {
339 case CHIP_HAINAN: /* no MM engines */ 340 case CHIP_HAINAN: /* no MM engines */
340 default: 341 default:
341 adev->mc.gart_size = 256ULL << 20; 342 adev->gmc.gart_size = 256ULL << 20;
342 break; 343 break;
343 case CHIP_VERDE: /* UVD, VCE do not support GPUVM */ 344 case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
344 case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */ 345 case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
345 case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */ 346 case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
346 case CHIP_OLAND: /* UVD, VCE do not support GPUVM */ 347 case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
347 adev->mc.gart_size = 1024ULL << 20; 348 adev->gmc.gart_size = 1024ULL << 20;
348 break; 349 break;
349 } 350 }
350 } else { 351 } else {
351 adev->mc.gart_size = (u64)amdgpu_gart_size << 20; 352 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
352 } 353 }
353 354
354 gmc_v6_0_vram_gtt_location(adev, &adev->mc); 355 gmc_v6_0_vram_gtt_location(adev, &adev->gmc);
355 356
356 return 0; 357 return 0;
357} 358}
358 359
359static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, 360static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
360 uint32_t vmid)
361{ 361{
362 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
363
364 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 362 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
365} 363}
366 364
367static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, 365static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
368 void *cpu_pt_addr, 366 unsigned vmid, uint64_t pd_addr)
369 uint32_t gpu_page_idx, 367{
370 uint64_t addr, 368 uint32_t reg;
371 uint64_t flags) 369
370 /* write new base address */
371 if (vmid < 8)
372 reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
373 else
374 reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8);
375 amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
376
377 /* bits 0-15 are the VM contexts0-15 */
378 amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
379
380 return pd_addr;
381}
382
383static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
384 uint32_t gpu_page_idx, uint64_t addr,
385 uint64_t flags)
372{ 386{
373 void __iomem *ptr = (void *)cpu_pt_addr; 387 void __iomem *ptr = (void *)cpu_pt_addr;
374 uint64_t value; 388 uint64_t value;
@@ -432,9 +446,9 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
432{ 446{
433 u32 tmp; 447 u32 tmp;
434 448
435 if (enable && !adev->mc.prt_warning) { 449 if (enable && !adev->gmc.prt_warning) {
436 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); 450 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
437 adev->mc.prt_warning = true; 451 adev->gmc.prt_warning = true;
438 } 452 }
439 453
440 tmp = RREG32(mmVM_PRT_CNTL); 454 tmp = RREG32(mmVM_PRT_CNTL);
@@ -454,7 +468,8 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
454 468
455 if (enable) { 469 if (enable) {
456 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; 470 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
457 uint32_t high = adev->vm_manager.max_pfn; 471 uint32_t high = adev->vm_manager.max_pfn -
472 (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
458 473
459 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); 474 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
460 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); 475 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -514,11 +529,11 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
514 (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | 529 (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
515 (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); 530 (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
516 /* setup context0 */ 531 /* setup context0 */
517 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); 532 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
518 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); 533 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
519 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); 534 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
520 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 535 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
521 (u32)(adev->dummy_page.addr >> 12)); 536 (u32)(adev->dummy_page_addr >> 12));
522 WREG32(mmVM_CONTEXT0_CNTL2, 0); 537 WREG32(mmVM_CONTEXT0_CNTL2, 0);
523 WREG32(mmVM_CONTEXT0_CNTL, 538 WREG32(mmVM_CONTEXT0_CNTL,
524 VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK | 539 VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
@@ -548,7 +563,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
548 563
549 /* enable context1-15 */ 564 /* enable context1-15 */
550 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 565 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
551 (u32)(adev->dummy_page.addr >> 12)); 566 (u32)(adev->dummy_page_addr >> 12));
552 WREG32(mmVM_CONTEXT1_CNTL2, 4); 567 WREG32(mmVM_CONTEXT1_CNTL2, 4);
553 WREG32(mmVM_CONTEXT1_CNTL, 568 WREG32(mmVM_CONTEXT1_CNTL,
554 VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK | 569 VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
@@ -560,9 +575,9 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
560 else 575 else
561 gmc_v6_0_set_fault_enable_default(adev, true); 576 gmc_v6_0_set_fault_enable_default(adev, true);
562 577
563 gmc_v6_0_gart_flush_gpu_tlb(adev, 0); 578 gmc_v6_0_flush_gpu_tlb(adev, 0);
564 dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", 579 dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
565 (unsigned)(adev->mc.gart_size >> 20), 580 (unsigned)(adev->gmc.gart_size >> 20),
566 (unsigned long long)adev->gart.table_addr); 581 (unsigned long long)adev->gart.table_addr);
567 adev->gart.ready = true; 582 adev->gart.ready = true;
568 return 0; 583 return 0;
@@ -794,7 +809,7 @@ static int gmc_v6_0_early_init(void *handle)
794{ 809{
795 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 810 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
796 811
797 gmc_v6_0_set_gart_funcs(adev); 812 gmc_v6_0_set_gmc_funcs(adev);
798 gmc_v6_0_set_irq_funcs(adev); 813 gmc_v6_0_set_irq_funcs(adev);
799 814
800 return 0; 815 return 0;
@@ -805,7 +820,7 @@ static int gmc_v6_0_late_init(void *handle)
805 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 820 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
806 821
807 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 822 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
808 return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); 823 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
809 else 824 else
810 return 0; 825 return 0;
811} 826}
@@ -817,26 +832,26 @@ static int gmc_v6_0_sw_init(void *handle)
817 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 832 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
818 833
819 if (adev->flags & AMD_IS_APU) { 834 if (adev->flags & AMD_IS_APU) {
820 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; 835 adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
821 } else { 836 } else {
822 u32 tmp = RREG32(mmMC_SEQ_MISC0); 837 u32 tmp = RREG32(mmMC_SEQ_MISC0);
823 tmp &= MC_SEQ_MISC0__MT__MASK; 838 tmp &= MC_SEQ_MISC0__MT__MASK;
824 adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp); 839 adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
825 } 840 }
826 841
827 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); 842 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
828 if (r) 843 if (r)
829 return r; 844 return r;
830 845
831 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); 846 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
832 if (r) 847 if (r)
833 return r; 848 return r;
834 849
835 amdgpu_vm_adjust_size(adev, 64, 9, 1, 40); 850 amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
836 851
837 adev->mc.mc_mask = 0xffffffffffULL; 852 adev->gmc.mc_mask = 0xffffffffffULL;
838 853
839 adev->mc.stolen_size = 256 * 1024; 854 adev->gmc.stolen_size = 256 * 1024;
840 855
841 adev->need_dma32 = false; 856 adev->need_dma32 = false;
842 dma_bits = adev->need_dma32 ? 32 : 40; 857 dma_bits = adev->need_dma32 ? 32 : 40;
@@ -851,6 +866,7 @@ static int gmc_v6_0_sw_init(void *handle)
851 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 866 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
852 dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); 867 dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n");
853 } 868 }
869 adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
854 870
855 r = gmc_v6_0_init_microcode(adev); 871 r = gmc_v6_0_init_microcode(adev);
856 if (r) { 872 if (r) {
@@ -900,8 +916,8 @@ static int gmc_v6_0_sw_fini(void *handle)
900 amdgpu_vm_manager_fini(adev); 916 amdgpu_vm_manager_fini(adev);
901 gmc_v6_0_gart_fini(adev); 917 gmc_v6_0_gart_fini(adev);
902 amdgpu_bo_fini(adev); 918 amdgpu_bo_fini(adev);
903 release_firmware(adev->mc.fw); 919 release_firmware(adev->gmc.fw);
904 adev->mc.fw = NULL; 920 adev->gmc.fw = NULL;
905 921
906 return 0; 922 return 0;
907} 923}
@@ -932,7 +948,7 @@ static int gmc_v6_0_hw_fini(void *handle)
932{ 948{
933 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 949 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
934 950
935 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); 951 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
936 gmc_v6_0_gart_disable(adev); 952 gmc_v6_0_gart_disable(adev);
937 953
938 return 0; 954 return 0;
@@ -1127,9 +1143,10 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
1127 .set_powergating_state = gmc_v6_0_set_powergating_state, 1143 .set_powergating_state = gmc_v6_0_set_powergating_state,
1128}; 1144};
1129 1145
1130static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { 1146static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
1131 .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, 1147 .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb,
1132 .set_pte_pde = gmc_v6_0_gart_set_pte_pde, 1148 .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
1149 .set_pte_pde = gmc_v6_0_set_pte_pde,
1133 .set_prt = gmc_v6_0_set_prt, 1150 .set_prt = gmc_v6_0_set_prt,
1134 .get_vm_pde = gmc_v6_0_get_vm_pde, 1151 .get_vm_pde = gmc_v6_0_get_vm_pde,
1135 .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags 1152 .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
@@ -1140,16 +1157,16 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
1140 .process = gmc_v6_0_process_interrupt, 1157 .process = gmc_v6_0_process_interrupt,
1141}; 1158};
1142 1159
1143static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev) 1160static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev)
1144{ 1161{
1145 if (adev->gart.gart_funcs == NULL) 1162 if (adev->gmc.gmc_funcs == NULL)
1146 adev->gart.gart_funcs = &gmc_v6_0_gart_funcs; 1163 adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
1147} 1164}
1148 1165
1149static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev) 1166static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
1150{ 1167{
1151 adev->mc.vm_fault.num_types = 1; 1168 adev->gmc.vm_fault.num_types = 1;
1152 adev->mc.vm_fault.funcs = &gmc_v6_0_irq_funcs; 1169 adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
1153} 1170}
1154 1171
1155const struct amdgpu_ip_block_version gmc_v6_0_ip_block = 1172const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 86e9d682c59e..80054f36e487 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -22,6 +22,7 @@
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include <drm/drm_cache.h>
25#include "amdgpu.h" 26#include "amdgpu.h"
26#include "cikd.h" 27#include "cikd.h"
27#include "cik.h" 28#include "cik.h"
@@ -42,7 +43,7 @@
42 43
43#include "amdgpu_atombios.h" 44#include "amdgpu_atombios.h"
44 45
45static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); 46static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
46static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); 47static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
47static int gmc_v7_0_wait_for_idle(void *handle); 48static int gmc_v7_0_wait_for_idle(void *handle);
48 49
@@ -151,16 +152,16 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
151 else 152 else
152 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); 153 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
153 154
154 err = request_firmware(&adev->mc.fw, fw_name, adev->dev); 155 err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
155 if (err) 156 if (err)
156 goto out; 157 goto out;
157 err = amdgpu_ucode_validate(adev->mc.fw); 158 err = amdgpu_ucode_validate(adev->gmc.fw);
158 159
159out: 160out:
160 if (err) { 161 if (err) {
161 pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); 162 pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
162 release_firmware(adev->mc.fw); 163 release_firmware(adev->gmc.fw);
163 adev->mc.fw = NULL; 164 adev->gmc.fw = NULL;
164 } 165 }
165 return err; 166 return err;
166} 167}
@@ -181,19 +182,19 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
181 u32 running; 182 u32 running;
182 int i, ucode_size, regs_size; 183 int i, ucode_size, regs_size;
183 184
184 if (!adev->mc.fw) 185 if (!adev->gmc.fw)
185 return -EINVAL; 186 return -EINVAL;
186 187
187 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; 188 hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
188 amdgpu_ucode_print_mc_hdr(&hdr->header); 189 amdgpu_ucode_print_mc_hdr(&hdr->header);
189 190
190 adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); 191 adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
191 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); 192 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
192 io_mc_regs = (const __le32 *) 193 io_mc_regs = (const __le32 *)
193 (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); 194 (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
194 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 195 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
195 fw_data = (const __le32 *) 196 fw_data = (const __le32 *)
196 (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 197 (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
197 198
198 running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN); 199 running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
199 200
@@ -235,12 +236,12 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
235} 236}
236 237
237static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, 238static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
238 struct amdgpu_mc *mc) 239 struct amdgpu_gmc *mc)
239{ 240{
240 u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; 241 u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
241 base <<= 24; 242 base <<= 24;
242 243
243 amdgpu_device_vram_location(adev, &adev->mc, base); 244 amdgpu_device_vram_location(adev, &adev->gmc, base);
244 amdgpu_device_gart_location(adev, mc); 245 amdgpu_device_gart_location(adev, mc);
245} 246}
246 247
@@ -283,9 +284,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
283 } 284 }
284 /* Update configuration */ 285 /* Update configuration */
285 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 286 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
286 adev->mc.vram_start >> 12); 287 adev->gmc.vram_start >> 12);
287 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 288 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
288 adev->mc.vram_end >> 12); 289 adev->gmc.vram_end >> 12);
289 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 290 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
290 adev->vram_scratch.gpu_addr >> 12); 291 adev->vram_scratch.gpu_addr >> 12);
291 WREG32(mmMC_VM_AGP_BASE, 0); 292 WREG32(mmMC_VM_AGP_BASE, 0);
@@ -318,8 +319,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
318{ 319{
319 int r; 320 int r;
320 321
321 adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); 322 adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
322 if (!adev->mc.vram_width) { 323 if (!adev->gmc.vram_width) {
323 u32 tmp; 324 u32 tmp;
324 int chansize, numchan; 325 int chansize, numchan;
325 326
@@ -361,38 +362,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
361 numchan = 16; 362 numchan = 16;
362 break; 363 break;
363 } 364 }
364 adev->mc.vram_width = numchan * chansize; 365 adev->gmc.vram_width = numchan * chansize;
365 } 366 }
366 /* size in MB on si */ 367 /* size in MB on si */
367 adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 368 adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
368 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 369 adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
369 370
370 if (!(adev->flags & AMD_IS_APU)) { 371 if (!(adev->flags & AMD_IS_APU)) {
371 r = amdgpu_device_resize_fb_bar(adev); 372 r = amdgpu_device_resize_fb_bar(adev);
372 if (r) 373 if (r)
373 return r; 374 return r;
374 } 375 }
375 adev->mc.aper_base = pci_resource_start(adev->pdev, 0); 376 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
376 adev->mc.aper_size = pci_resource_len(adev->pdev, 0); 377 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
377 378
378#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
379 if (adev->flags & AMD_IS_APU) { 380 if (adev->flags & AMD_IS_APU) {
380 adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; 381 adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
381 adev->mc.aper_size = adev->mc.real_vram_size; 382 adev->gmc.aper_size = adev->gmc.real_vram_size;
382 } 383 }
383#endif 384#endif
384 385
385 /* In case the PCI BAR is larger than the actual amount of vram */ 386 /* In case the PCI BAR is larger than the actual amount of vram */
386 adev->mc.visible_vram_size = adev->mc.aper_size; 387 adev->gmc.visible_vram_size = adev->gmc.aper_size;
387 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 388 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
388 adev->mc.visible_vram_size = adev->mc.real_vram_size; 389 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
389 390
390 /* set the gart size */ 391 /* set the gart size */
391 if (amdgpu_gart_size == -1) { 392 if (amdgpu_gart_size == -1) {
392 switch (adev->asic_type) { 393 switch (adev->asic_type) {
393 case CHIP_TOPAZ: /* no MM engines */ 394 case CHIP_TOPAZ: /* no MM engines */
394 default: 395 default:
395 adev->mc.gart_size = 256ULL << 20; 396 adev->gmc.gart_size = 256ULL << 20;
396 break; 397 break;
397#ifdef CONFIG_DRM_AMDGPU_CIK 398#ifdef CONFIG_DRM_AMDGPU_CIK
398 case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */ 399 case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
@@ -400,15 +401,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
400 case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */ 401 case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
401 case CHIP_KABINI: /* UVD, VCE do not support GPUVM */ 402 case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
402 case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */ 403 case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
403 adev->mc.gart_size = 1024ULL << 20; 404 adev->gmc.gart_size = 1024ULL << 20;
404 break; 405 break;
405#endif 406#endif
406 } 407 }
407 } else { 408 } else {
408 adev->mc.gart_size = (u64)amdgpu_gart_size << 20; 409 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
409 } 410 }
410 411
411 gmc_v7_0_vram_gtt_location(adev, &adev->mc); 412 gmc_v7_0_vram_gtt_location(adev, &adev->gmc);
412 413
413 return 0; 414 return 0;
414} 415}
@@ -421,25 +422,44 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
421 */ 422 */
422 423
423/** 424/**
424 * gmc_v7_0_gart_flush_gpu_tlb - gart tlb flush callback 425 * gmc_v7_0_flush_gpu_tlb - gart tlb flush callback
425 * 426 *
426 * @adev: amdgpu_device pointer 427 * @adev: amdgpu_device pointer
427 * @vmid: vm instance to flush 428 * @vmid: vm instance to flush
428 * 429 *
429 * Flush the TLB for the requested page table (CIK). 430 * Flush the TLB for the requested page table (CIK).
430 */ 431 */
431static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, 432static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
432 uint32_t vmid)
433{ 433{
434 /* flush hdp cache */
435 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
436
437 /* bits 0-15 are the VM contexts0-15 */ 434 /* bits 0-15 are the VM contexts0-15 */
438 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 435 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
439} 436}
440 437
438static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
439 unsigned vmid, uint64_t pd_addr)
440{
441 uint32_t reg;
442
443 if (vmid < 8)
444 reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
445 else
446 reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
447 amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
448
449 /* bits 0-15 are the VM contexts0-15 */
450 amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
451
452 return pd_addr;
453}
454
455static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
456 unsigned pasid)
457{
458 amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
459}
460
441/** 461/**
442 * gmc_v7_0_gart_set_pte_pde - update the page tables using MMIO 462 * gmc_v7_0_set_pte_pde - update the page tables using MMIO
443 * 463 *
444 * @adev: amdgpu_device pointer 464 * @adev: amdgpu_device pointer
445 * @cpu_pt_addr: cpu address of the page table 465 * @cpu_pt_addr: cpu address of the page table
@@ -449,11 +469,9 @@ static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
449 * 469 *
450 * Update the page tables using the CPU. 470 * Update the page tables using the CPU.
451 */ 471 */
452static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, 472static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
453 void *cpu_pt_addr, 473 uint32_t gpu_page_idx, uint64_t addr,
454 uint32_t gpu_page_idx, 474 uint64_t flags)
455 uint64_t addr,
456 uint64_t flags)
457{ 475{
458 void __iomem *ptr = (void *)cpu_pt_addr; 476 void __iomem *ptr = (void *)cpu_pt_addr;
459 uint64_t value; 477 uint64_t value;
@@ -523,9 +541,9 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
523{ 541{
524 uint32_t tmp; 542 uint32_t tmp;
525 543
526 if (enable && !adev->mc.prt_warning) { 544 if (enable && !adev->gmc.prt_warning) {
527 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); 545 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
528 adev->mc.prt_warning = true; 546 adev->gmc.prt_warning = true;
529 } 547 }
530 548
531 tmp = RREG32(mmVM_PRT_CNTL); 549 tmp = RREG32(mmVM_PRT_CNTL);
@@ -547,7 +565,8 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
547 565
548 if (enable) { 566 if (enable) {
549 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; 567 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
550 uint32_t high = adev->vm_manager.max_pfn; 568 uint32_t high = adev->vm_manager.max_pfn -
569 (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
551 570
552 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); 571 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
553 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); 572 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -621,11 +640,11 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
621 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); 640 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
622 WREG32(mmVM_L2_CNTL3, tmp); 641 WREG32(mmVM_L2_CNTL3, tmp);
623 /* setup context0 */ 642 /* setup context0 */
624 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); 643 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
625 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); 644 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
626 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); 645 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
627 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 646 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
628 (u32)(adev->dummy_page.addr >> 12)); 647 (u32)(adev->dummy_page_addr >> 12));
629 WREG32(mmVM_CONTEXT0_CNTL2, 0); 648 WREG32(mmVM_CONTEXT0_CNTL2, 0);
630 tmp = RREG32(mmVM_CONTEXT0_CNTL); 649 tmp = RREG32(mmVM_CONTEXT0_CNTL);
631 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); 650 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -655,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
655 674
656 /* enable context1-15 */ 675 /* enable context1-15 */
657 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 676 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
658 (u32)(adev->dummy_page.addr >> 12)); 677 (u32)(adev->dummy_page_addr >> 12));
659 WREG32(mmVM_CONTEXT1_CNTL2, 4); 678 WREG32(mmVM_CONTEXT1_CNTL2, 4);
660 tmp = RREG32(mmVM_CONTEXT1_CNTL); 679 tmp = RREG32(mmVM_CONTEXT1_CNTL);
661 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); 680 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -674,9 +693,9 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
674 WREG32(mmCHUB_CONTROL, tmp); 693 WREG32(mmCHUB_CONTROL, tmp);
675 } 694 }
676 695
677 gmc_v7_0_gart_flush_gpu_tlb(adev, 0); 696 gmc_v7_0_flush_gpu_tlb(adev, 0);
678 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 697 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
679 (unsigned)(adev->mc.gart_size >> 20), 698 (unsigned)(adev->gmc.gart_size >> 20),
680 (unsigned long long)adev->gart.table_addr); 699 (unsigned long long)adev->gart.table_addr);
681 adev->gart.ready = true; 700 adev->gart.ready = true;
682 return 0; 701 return 0;
@@ -749,21 +768,21 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
749 * 768 *
750 * Print human readable fault information (CIK). 769 * Print human readable fault information (CIK).
751 */ 770 */
752static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, 771static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
753 u32 status, u32 addr, u32 mc_client) 772 u32 addr, u32 mc_client, unsigned pasid)
754{ 773{
755 u32 mc_id;
756 u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); 774 u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
757 u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 775 u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
758 PROTECTIONS); 776 PROTECTIONS);
759 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 777 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
760 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 778 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
779 u32 mc_id;
761 780
762 mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 781 mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
763 MEMORY_CLIENT_ID); 782 MEMORY_CLIENT_ID);
764 783
765 dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 784 dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
766 protections, vmid, addr, 785 protections, vmid, pasid, addr,
767 REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 786 REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
768 MEMORY_CLIENT_RW) ? 787 MEMORY_CLIENT_RW) ?
769 "write" : "read", block, mc_client, mc_id); 788 "write" : "read", block, mc_client, mc_id);
@@ -921,16 +940,16 @@ static int gmc_v7_0_early_init(void *handle)
921{ 940{
922 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 941 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
923 942
924 gmc_v7_0_set_gart_funcs(adev); 943 gmc_v7_0_set_gmc_funcs(adev);
925 gmc_v7_0_set_irq_funcs(adev); 944 gmc_v7_0_set_irq_funcs(adev);
926 945
927 adev->mc.shared_aperture_start = 0x2000000000000000ULL; 946 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
928 adev->mc.shared_aperture_end = 947 adev->gmc.shared_aperture_end =
929 adev->mc.shared_aperture_start + (4ULL << 30) - 1; 948 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
930 adev->mc.private_aperture_start = 949 adev->gmc.private_aperture_start =
931 adev->mc.shared_aperture_end + 1; 950 adev->gmc.shared_aperture_end + 1;
932 adev->mc.private_aperture_end = 951 adev->gmc.private_aperture_end =
933 adev->mc.private_aperture_start + (4ULL << 30) - 1; 952 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
934 953
935 return 0; 954 return 0;
936} 955}
@@ -940,7 +959,7 @@ static int gmc_v7_0_late_init(void *handle)
940 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 959 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
941 960
942 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 961 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
943 return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); 962 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
944 else 963 else
945 return 0; 964 return 0;
946} 965}
@@ -952,18 +971,18 @@ static int gmc_v7_0_sw_init(void *handle)
952 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 971 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
953 972
954 if (adev->flags & AMD_IS_APU) { 973 if (adev->flags & AMD_IS_APU) {
955 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; 974 adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
956 } else { 975 } else {
957 u32 tmp = RREG32(mmMC_SEQ_MISC0); 976 u32 tmp = RREG32(mmMC_SEQ_MISC0);
958 tmp &= MC_SEQ_MISC0__MT__MASK; 977 tmp &= MC_SEQ_MISC0__MT__MASK;
959 adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp); 978 adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
960 } 979 }
961 980
962 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); 981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
963 if (r) 982 if (r)
964 return r; 983 return r;
965 984
966 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); 985 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
967 if (r) 986 if (r)
968 return r; 987 return r;
969 988
@@ -977,9 +996,9 @@ static int gmc_v7_0_sw_init(void *handle)
977 * This is the max address of the GPU's 996 * This is the max address of the GPU's
978 * internal address space. 997 * internal address space.
979 */ 998 */
980 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 999 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
981 1000
982 adev->mc.stolen_size = 256 * 1024; 1001 adev->gmc.stolen_size = 256 * 1024;
983 1002
984 /* set DMA mask + need_dma32 flags. 1003 /* set DMA mask + need_dma32 flags.
985 * PCIE - can handle 40-bits. 1004 * PCIE - can handle 40-bits.
@@ -999,6 +1018,7 @@ static int gmc_v7_0_sw_init(void *handle)
999 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 1018 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
1000 pr_warn("amdgpu: No coherent DMA available\n"); 1019 pr_warn("amdgpu: No coherent DMA available\n");
1001 } 1020 }
1021 adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
1002 1022
1003 r = gmc_v7_0_init_microcode(adev); 1023 r = gmc_v7_0_init_microcode(adev);
1004 if (r) { 1024 if (r) {
@@ -1049,8 +1069,8 @@ static int gmc_v7_0_sw_fini(void *handle)
1049 amdgpu_vm_manager_fini(adev); 1069 amdgpu_vm_manager_fini(adev);
1050 gmc_v7_0_gart_fini(adev); 1070 gmc_v7_0_gart_fini(adev);
1051 amdgpu_bo_fini(adev); 1071 amdgpu_bo_fini(adev);
1052 release_firmware(adev->mc.fw); 1072 release_firmware(adev->gmc.fw);
1053 adev->mc.fw = NULL; 1073 adev->gmc.fw = NULL;
1054 1074
1055 return 0; 1075 return 0;
1056} 1076}
@@ -1083,7 +1103,7 @@ static int gmc_v7_0_hw_fini(void *handle)
1083{ 1103{
1084 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1104 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1085 1105
1086 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); 1106 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1087 gmc_v7_0_gart_disable(adev); 1107 gmc_v7_0_gart_disable(adev);
1088 1108
1089 return 0; 1109 return 0;
@@ -1257,7 +1277,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
1257 addr); 1277 addr);
1258 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 1278 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1259 status); 1279 status);
1260 gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client); 1280 gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client,
1281 entry->pasid);
1261 } 1282 }
1262 1283
1263 return 0; 1284 return 0;
@@ -1306,9 +1327,11 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
1306 .set_powergating_state = gmc_v7_0_set_powergating_state, 1327 .set_powergating_state = gmc_v7_0_set_powergating_state,
1307}; 1328};
1308 1329
1309static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { 1330static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
1310 .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, 1331 .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
1311 .set_pte_pde = gmc_v7_0_gart_set_pte_pde, 1332 .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
1333 .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
1334 .set_pte_pde = gmc_v7_0_set_pte_pde,
1312 .set_prt = gmc_v7_0_set_prt, 1335 .set_prt = gmc_v7_0_set_prt,
1313 .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, 1336 .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
1314 .get_vm_pde = gmc_v7_0_get_vm_pde 1337 .get_vm_pde = gmc_v7_0_get_vm_pde
@@ -1319,16 +1342,16 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
1319 .process = gmc_v7_0_process_interrupt, 1342 .process = gmc_v7_0_process_interrupt,
1320}; 1343};
1321 1344
1322static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev) 1345static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev)
1323{ 1346{
1324 if (adev->gart.gart_funcs == NULL) 1347 if (adev->gmc.gmc_funcs == NULL)
1325 adev->gart.gart_funcs = &gmc_v7_0_gart_funcs; 1348 adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
1326} 1349}
1327 1350
1328static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev) 1351static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
1329{ 1352{
1330 adev->mc.vm_fault.num_types = 1; 1353 adev->gmc.vm_fault.num_types = 1;
1331 adev->mc.vm_fault.funcs = &gmc_v7_0_irq_funcs; 1354 adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
1332} 1355}
1333 1356
1334const struct amdgpu_ip_block_version gmc_v7_0_ip_block = 1357const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9a813d834f1a..d71d4cb68f9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -22,6 +22,7 @@
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include <drm/drm_cache.h>
25#include "amdgpu.h" 26#include "amdgpu.h"
26#include "gmc_v8_0.h" 27#include "gmc_v8_0.h"
27#include "amdgpu_ucode.h" 28#include "amdgpu_ucode.h"
@@ -44,7 +45,7 @@
44#include "amdgpu_atombios.h" 45#include "amdgpu_atombios.h"
45 46
46 47
47static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); 48static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
48static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); 49static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
49static int gmc_v8_0_wait_for_idle(void *handle); 50static int gmc_v8_0_wait_for_idle(void *handle);
50 51
@@ -235,16 +236,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
235 } 236 }
236 237
237 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); 238 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
238 err = request_firmware(&adev->mc.fw, fw_name, adev->dev); 239 err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
239 if (err) 240 if (err)
240 goto out; 241 goto out;
241 err = amdgpu_ucode_validate(adev->mc.fw); 242 err = amdgpu_ucode_validate(adev->gmc.fw);
242 243
243out: 244out:
244 if (err) { 245 if (err) {
245 pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); 246 pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
246 release_firmware(adev->mc.fw); 247 release_firmware(adev->gmc.fw);
247 adev->mc.fw = NULL; 248 adev->gmc.fw = NULL;
248 } 249 }
249 return err; 250 return err;
250} 251}
@@ -273,19 +274,19 @@ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
273 if (amdgpu_sriov_bios(adev)) 274 if (amdgpu_sriov_bios(adev))
274 return 0; 275 return 0;
275 276
276 if (!adev->mc.fw) 277 if (!adev->gmc.fw)
277 return -EINVAL; 278 return -EINVAL;
278 279
279 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; 280 hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
280 amdgpu_ucode_print_mc_hdr(&hdr->header); 281 amdgpu_ucode_print_mc_hdr(&hdr->header);
281 282
282 adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); 283 adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
283 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); 284 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
284 io_mc_regs = (const __le32 *) 285 io_mc_regs = (const __le32 *)
285 (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); 286 (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
286 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 287 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
287 fw_data = (const __le32 *) 288 fw_data = (const __le32 *)
288 (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 289 (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
289 290
290 running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN); 291 running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
291 292
@@ -349,19 +350,19 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
349 if (vbios_version == 0) 350 if (vbios_version == 0)
350 return 0; 351 return 0;
351 352
352 if (!adev->mc.fw) 353 if (!adev->gmc.fw)
353 return -EINVAL; 354 return -EINVAL;
354 355
355 hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; 356 hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
356 amdgpu_ucode_print_mc_hdr(&hdr->header); 357 amdgpu_ucode_print_mc_hdr(&hdr->header);
357 358
358 adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); 359 adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
359 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); 360 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
360 io_mc_regs = (const __le32 *) 361 io_mc_regs = (const __le32 *)
361 (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); 362 (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
362 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 363 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
363 fw_data = (const __le32 *) 364 fw_data = (const __le32 *)
364 (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 365 (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
365 366
366 data = RREG32(mmMC_SEQ_MISC0); 367 data = RREG32(mmMC_SEQ_MISC0);
367 data &= ~(0x40); 368 data &= ~(0x40);
@@ -397,7 +398,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
397} 398}
398 399
399static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, 400static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
400 struct amdgpu_mc *mc) 401 struct amdgpu_gmc *mc)
401{ 402{
402 u64 base = 0; 403 u64 base = 0;
403 404
@@ -405,7 +406,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
405 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; 406 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
406 base <<= 24; 407 base <<= 24;
407 408
408 amdgpu_device_vram_location(adev, &adev->mc, base); 409 amdgpu_device_vram_location(adev, &adev->gmc, base);
409 amdgpu_device_gart_location(adev, mc); 410 amdgpu_device_gart_location(adev, mc);
410} 411}
411 412
@@ -448,18 +449,18 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
448 } 449 }
449 /* Update configuration */ 450 /* Update configuration */
450 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 451 WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
451 adev->mc.vram_start >> 12); 452 adev->gmc.vram_start >> 12);
452 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 453 WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
453 adev->mc.vram_end >> 12); 454 adev->gmc.vram_end >> 12);
454 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 455 WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
455 adev->vram_scratch.gpu_addr >> 12); 456 adev->vram_scratch.gpu_addr >> 12);
456 457
457 if (amdgpu_sriov_vf(adev)) { 458 if (amdgpu_sriov_vf(adev)) {
458 tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; 459 tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
459 tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); 460 tmp |= ((adev->gmc.vram_start >> 24) & 0xFFFF);
460 WREG32(mmMC_VM_FB_LOCATION, tmp); 461 WREG32(mmMC_VM_FB_LOCATION, tmp);
461 /* XXX double check these! */ 462 /* XXX double check these! */
462 WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); 463 WREG32(mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
463 WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); 464 WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
464 WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); 465 WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
465 } 466 }
@@ -494,8 +495,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
494{ 495{
495 int r; 496 int r;
496 497
497 adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); 498 adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
498 if (!adev->mc.vram_width) { 499 if (!adev->gmc.vram_width) {
499 u32 tmp; 500 u32 tmp;
500 int chansize, numchan; 501 int chansize, numchan;
501 502
@@ -537,31 +538,31 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
537 numchan = 16; 538 numchan = 16;
538 break; 539 break;
539 } 540 }
540 adev->mc.vram_width = numchan * chansize; 541 adev->gmc.vram_width = numchan * chansize;
541 } 542 }
542 /* size in MB on si */ 543 /* size in MB on si */
543 adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 544 adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
544 adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; 545 adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
545 546
546 if (!(adev->flags & AMD_IS_APU)) { 547 if (!(adev->flags & AMD_IS_APU)) {
547 r = amdgpu_device_resize_fb_bar(adev); 548 r = amdgpu_device_resize_fb_bar(adev);
548 if (r) 549 if (r)
549 return r; 550 return r;
550 } 551 }
551 adev->mc.aper_base = pci_resource_start(adev->pdev, 0); 552 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
552 adev->mc.aper_size = pci_resource_len(adev->pdev, 0); 553 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
553 554
554#ifdef CONFIG_X86_64 555#ifdef CONFIG_X86_64
555 if (adev->flags & AMD_IS_APU) { 556 if (adev->flags & AMD_IS_APU) {
556 adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; 557 adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
557 adev->mc.aper_size = adev->mc.real_vram_size; 558 adev->gmc.aper_size = adev->gmc.real_vram_size;
558 } 559 }
559#endif 560#endif
560 561
561 /* In case the PCI BAR is larger than the actual amount of vram */ 562 /* In case the PCI BAR is larger than the actual amount of vram */
562 adev->mc.visible_vram_size = adev->mc.aper_size; 563 adev->gmc.visible_vram_size = adev->gmc.aper_size;
563 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 564 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
564 adev->mc.visible_vram_size = adev->mc.real_vram_size; 565 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
565 566
566 /* set the gart size */ 567 /* set the gart size */
567 if (amdgpu_gart_size == -1) { 568 if (amdgpu_gart_size == -1) {
@@ -570,20 +571,20 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
570 case CHIP_POLARIS10: /* all engines support GPUVM */ 571 case CHIP_POLARIS10: /* all engines support GPUVM */
571 case CHIP_POLARIS12: /* all engines support GPUVM */ 572 case CHIP_POLARIS12: /* all engines support GPUVM */
572 default: 573 default:
573 adev->mc.gart_size = 256ULL << 20; 574 adev->gmc.gart_size = 256ULL << 20;
574 break; 575 break;
575 case CHIP_TONGA: /* UVD, VCE do not support GPUVM */ 576 case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
576 case CHIP_FIJI: /* UVD, VCE do not support GPUVM */ 577 case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
577 case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */ 578 case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
578 case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */ 579 case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
579 adev->mc.gart_size = 1024ULL << 20; 580 adev->gmc.gart_size = 1024ULL << 20;
580 break; 581 break;
581 } 582 }
582 } else { 583 } else {
583 adev->mc.gart_size = (u64)amdgpu_gart_size << 20; 584 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
584 } 585 }
585 586
586 gmc_v8_0_vram_gtt_location(adev, &adev->mc); 587 gmc_v8_0_vram_gtt_location(adev, &adev->gmc);
587 588
588 return 0; 589 return 0;
589} 590}
@@ -596,25 +597,45 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
596 */ 597 */
597 598
598/** 599/**
599 * gmc_v8_0_gart_flush_gpu_tlb - gart tlb flush callback 600 * gmc_v8_0_flush_gpu_tlb - gart tlb flush callback
600 * 601 *
601 * @adev: amdgpu_device pointer 602 * @adev: amdgpu_device pointer
602 * @vmid: vm instance to flush 603 * @vmid: vm instance to flush
603 * 604 *
604 * Flush the TLB for the requested page table (CIK). 605 * Flush the TLB for the requested page table (CIK).
605 */ 606 */
606static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, 607static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
607 uint32_t vmid) 608 uint32_t vmid)
608{ 609{
609 /* flush hdp cache */
610 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
611
612 /* bits 0-15 are the VM contexts0-15 */ 610 /* bits 0-15 are the VM contexts0-15 */
613 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); 611 WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
614} 612}
615 613
614static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
615 unsigned vmid, uint64_t pd_addr)
616{
617 uint32_t reg;
618
619 if (vmid < 8)
620 reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
621 else
622 reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
623 amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
624
625 /* bits 0-15 are the VM contexts0-15 */
626 amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
627
628 return pd_addr;
629}
630
631static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
632 unsigned pasid)
633{
634 amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
635}
636
616/** 637/**
617 * gmc_v8_0_gart_set_pte_pde - update the page tables using MMIO 638 * gmc_v8_0_set_pte_pde - update the page tables using MMIO
618 * 639 *
619 * @adev: amdgpu_device pointer 640 * @adev: amdgpu_device pointer
620 * @cpu_pt_addr: cpu address of the page table 641 * @cpu_pt_addr: cpu address of the page table
@@ -624,11 +645,9 @@ static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
624 * 645 *
625 * Update the page tables using the CPU. 646 * Update the page tables using the CPU.
626 */ 647 */
627static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, 648static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
628 void *cpu_pt_addr, 649 uint32_t gpu_page_idx, uint64_t addr,
629 uint32_t gpu_page_idx, 650 uint64_t flags)
630 uint64_t addr,
631 uint64_t flags)
632{ 651{
633 void __iomem *ptr = (void *)cpu_pt_addr; 652 void __iomem *ptr = (void *)cpu_pt_addr;
634 uint64_t value; 653 uint64_t value;
@@ -722,9 +741,9 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
722{ 741{
723 u32 tmp; 742 u32 tmp;
724 743
725 if (enable && !adev->mc.prt_warning) { 744 if (enable && !adev->gmc.prt_warning) {
726 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); 745 dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
727 adev->mc.prt_warning = true; 746 adev->gmc.prt_warning = true;
728 } 747 }
729 748
730 tmp = RREG32(mmVM_PRT_CNTL); 749 tmp = RREG32(mmVM_PRT_CNTL);
@@ -746,7 +765,8 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
746 765
747 if (enable) { 766 if (enable) {
748 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; 767 uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
749 uint32_t high = adev->vm_manager.max_pfn; 768 uint32_t high = adev->vm_manager.max_pfn -
769 (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
750 770
751 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); 771 WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
752 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); 772 WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -836,11 +856,11 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
836 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0); 856 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
837 WREG32(mmVM_L2_CNTL4, tmp); 857 WREG32(mmVM_L2_CNTL4, tmp);
838 /* setup context0 */ 858 /* setup context0 */
839 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); 859 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
840 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); 860 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
841 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); 861 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
842 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 862 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
843 (u32)(adev->dummy_page.addr >> 12)); 863 (u32)(adev->dummy_page_addr >> 12));
844 WREG32(mmVM_CONTEXT0_CNTL2, 0); 864 WREG32(mmVM_CONTEXT0_CNTL2, 0);
845 tmp = RREG32(mmVM_CONTEXT0_CNTL); 865 tmp = RREG32(mmVM_CONTEXT0_CNTL);
846 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); 866 tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -870,7 +890,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
870 890
871 /* enable context1-15 */ 891 /* enable context1-15 */
872 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 892 WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
873 (u32)(adev->dummy_page.addr >> 12)); 893 (u32)(adev->dummy_page_addr >> 12));
874 WREG32(mmVM_CONTEXT1_CNTL2, 4); 894 WREG32(mmVM_CONTEXT1_CNTL2, 4);
875 tmp = RREG32(mmVM_CONTEXT1_CNTL); 895 tmp = RREG32(mmVM_CONTEXT1_CNTL);
876 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); 896 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -890,9 +910,9 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
890 else 910 else
891 gmc_v8_0_set_fault_enable_default(adev, true); 911 gmc_v8_0_set_fault_enable_default(adev, true);
892 912
893 gmc_v8_0_gart_flush_gpu_tlb(adev, 0); 913 gmc_v8_0_flush_gpu_tlb(adev, 0);
894 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 914 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
895 (unsigned)(adev->mc.gart_size >> 20), 915 (unsigned)(adev->gmc.gart_size >> 20),
896 (unsigned long long)adev->gart.table_addr); 916 (unsigned long long)adev->gart.table_addr);
897 adev->gart.ready = true; 917 adev->gart.ready = true;
898 return 0; 918 return 0;
@@ -965,21 +985,21 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
965 * 985 *
966 * Print human readable fault information (CIK). 986 * Print human readable fault information (CIK).
967 */ 987 */
968static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, 988static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
969 u32 status, u32 addr, u32 mc_client) 989 u32 addr, u32 mc_client, unsigned pasid)
970{ 990{
971 u32 mc_id;
972 u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); 991 u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
973 u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 992 u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
974 PROTECTIONS); 993 PROTECTIONS);
975 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff, 994 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
976 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 }; 995 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
996 u32 mc_id;
977 997
978 mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 998 mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
979 MEMORY_CLIENT_ID); 999 MEMORY_CLIENT_ID);
980 1000
981 dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n", 1001 dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
982 protections, vmid, addr, 1002 protections, vmid, pasid, addr,
983 REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, 1003 REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
984 MEMORY_CLIENT_RW) ? 1004 MEMORY_CLIENT_RW) ?
985 "write" : "read", block, mc_client, mc_id); 1005 "write" : "read", block, mc_client, mc_id);
@@ -1011,16 +1031,16 @@ static int gmc_v8_0_early_init(void *handle)
1011{ 1031{
1012 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1032 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1013 1033
1014 gmc_v8_0_set_gart_funcs(adev); 1034 gmc_v8_0_set_gmc_funcs(adev);
1015 gmc_v8_0_set_irq_funcs(adev); 1035 gmc_v8_0_set_irq_funcs(adev);
1016 1036
1017 adev->mc.shared_aperture_start = 0x2000000000000000ULL; 1037 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1018 adev->mc.shared_aperture_end = 1038 adev->gmc.shared_aperture_end =
1019 adev->mc.shared_aperture_start + (4ULL << 30) - 1; 1039 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1020 adev->mc.private_aperture_start = 1040 adev->gmc.private_aperture_start =
1021 adev->mc.shared_aperture_end + 1; 1041 adev->gmc.shared_aperture_end + 1;
1022 adev->mc.private_aperture_end = 1042 adev->gmc.private_aperture_end =
1023 adev->mc.private_aperture_start + (4ULL << 30) - 1; 1043 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1024 1044
1025 return 0; 1045 return 0;
1026} 1046}
@@ -1030,7 +1050,7 @@ static int gmc_v8_0_late_init(void *handle)
1030 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1050 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1031 1051
1032 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) 1052 if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
1033 return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); 1053 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1034 else 1054 else
1035 return 0; 1055 return 0;
1036} 1056}
@@ -1044,7 +1064,7 @@ static int gmc_v8_0_sw_init(void *handle)
1044 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1064 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1045 1065
1046 if (adev->flags & AMD_IS_APU) { 1066 if (adev->flags & AMD_IS_APU) {
1047 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; 1067 adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
1048 } else { 1068 } else {
1049 u32 tmp; 1069 u32 tmp;
1050 1070
@@ -1053,14 +1073,14 @@ static int gmc_v8_0_sw_init(void *handle)
1053 else 1073 else
1054 tmp = RREG32(mmMC_SEQ_MISC0); 1074 tmp = RREG32(mmMC_SEQ_MISC0);
1055 tmp &= MC_SEQ_MISC0__MT__MASK; 1075 tmp &= MC_SEQ_MISC0__MT__MASK;
1056 adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); 1076 adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
1057 } 1077 }
1058 1078
1059 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); 1079 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
1060 if (r) 1080 if (r)
1061 return r; 1081 return r;
1062 1082
1063 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); 1083 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
1064 if (r) 1084 if (r)
1065 return r; 1085 return r;
1066 1086
@@ -1074,9 +1094,9 @@ static int gmc_v8_0_sw_init(void *handle)
1074 * This is the max address of the GPU's 1094 * This is the max address of the GPU's
1075 * internal address space. 1095 * internal address space.
1076 */ 1096 */
1077 adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ 1097 adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
1078 1098
1079 adev->mc.stolen_size = 256 * 1024; 1099 adev->gmc.stolen_size = 256 * 1024;
1080 1100
1081 /* set DMA mask + need_dma32 flags. 1101 /* set DMA mask + need_dma32 flags.
1082 * PCIE - can handle 40-bits. 1102 * PCIE - can handle 40-bits.
@@ -1096,6 +1116,7 @@ static int gmc_v8_0_sw_init(void *handle)
1096 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 1116 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
1097 pr_warn("amdgpu: No coherent DMA available\n"); 1117 pr_warn("amdgpu: No coherent DMA available\n");
1098 } 1118 }
1119 adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
1099 1120
1100 r = gmc_v8_0_init_microcode(adev); 1121 r = gmc_v8_0_init_microcode(adev);
1101 if (r) { 1122 if (r) {
@@ -1146,8 +1167,8 @@ static int gmc_v8_0_sw_fini(void *handle)
1146 amdgpu_vm_manager_fini(adev); 1167 amdgpu_vm_manager_fini(adev);
1147 gmc_v8_0_gart_fini(adev); 1168 gmc_v8_0_gart_fini(adev);
1148 amdgpu_bo_fini(adev); 1169 amdgpu_bo_fini(adev);
1149 release_firmware(adev->mc.fw); 1170 release_firmware(adev->gmc.fw);
1150 adev->mc.fw = NULL; 1171 adev->gmc.fw = NULL;
1151 1172
1152 return 0; 1173 return 0;
1153} 1174}
@@ -1188,7 +1209,7 @@ static int gmc_v8_0_hw_fini(void *handle)
1188{ 1209{
1189 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1210 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1190 1211
1191 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); 1212 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1192 gmc_v8_0_gart_disable(adev); 1213 gmc_v8_0_gart_disable(adev);
1193 1214
1194 return 0; 1215 return 0;
@@ -1268,10 +1289,10 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
1268 SRBM_SOFT_RESET, SOFT_RESET_MC, 1); 1289 SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
1269 } 1290 }
1270 if (srbm_soft_reset) { 1291 if (srbm_soft_reset) {
1271 adev->mc.srbm_soft_reset = srbm_soft_reset; 1292 adev->gmc.srbm_soft_reset = srbm_soft_reset;
1272 return true; 1293 return true;
1273 } else { 1294 } else {
1274 adev->mc.srbm_soft_reset = 0; 1295 adev->gmc.srbm_soft_reset = 0;
1275 return false; 1296 return false;
1276 } 1297 }
1277} 1298}
@@ -1280,7 +1301,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle)
1280{ 1301{
1281 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1302 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1282 1303
1283 if (!adev->mc.srbm_soft_reset) 1304 if (!adev->gmc.srbm_soft_reset)
1284 return 0; 1305 return 0;
1285 1306
1286 gmc_v8_0_mc_stop(adev); 1307 gmc_v8_0_mc_stop(adev);
@@ -1296,9 +1317,9 @@ static int gmc_v8_0_soft_reset(void *handle)
1296 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1317 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1297 u32 srbm_soft_reset; 1318 u32 srbm_soft_reset;
1298 1319
1299 if (!adev->mc.srbm_soft_reset) 1320 if (!adev->gmc.srbm_soft_reset)
1300 return 0; 1321 return 0;
1301 srbm_soft_reset = adev->mc.srbm_soft_reset; 1322 srbm_soft_reset = adev->gmc.srbm_soft_reset;
1302 1323
1303 if (srbm_soft_reset) { 1324 if (srbm_soft_reset) {
1304 u32 tmp; 1325 u32 tmp;
@@ -1326,7 +1347,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
1326{ 1347{
1327 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1348 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1328 1349
1329 if (!adev->mc.srbm_soft_reset) 1350 if (!adev->gmc.srbm_soft_reset)
1330 return 0; 1351 return 0;
1331 1352
1332 gmc_v8_0_mc_resume(adev); 1353 gmc_v8_0_mc_resume(adev);
@@ -1407,7 +1428,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
1407 addr); 1428 addr);
1408 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", 1429 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1409 status); 1430 status);
1410 gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client); 1431 gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
1432 entry->pasid);
1411 } 1433 }
1412 1434
1413 return 0; 1435 return 0;
@@ -1639,9 +1661,11 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
1639 .get_clockgating_state = gmc_v8_0_get_clockgating_state, 1661 .get_clockgating_state = gmc_v8_0_get_clockgating_state,
1640}; 1662};
1641 1663
1642static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { 1664static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
1643 .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, 1665 .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
1644 .set_pte_pde = gmc_v8_0_gart_set_pte_pde, 1666 .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
1667 .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
1668 .set_pte_pde = gmc_v8_0_set_pte_pde,
1645 .set_prt = gmc_v8_0_set_prt, 1669 .set_prt = gmc_v8_0_set_prt,
1646 .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, 1670 .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
1647 .get_vm_pde = gmc_v8_0_get_vm_pde 1671 .get_vm_pde = gmc_v8_0_get_vm_pde
@@ -1652,16 +1676,16 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
1652 .process = gmc_v8_0_process_interrupt, 1676 .process = gmc_v8_0_process_interrupt,
1653}; 1677};
1654 1678
1655static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev) 1679static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev)
1656{ 1680{
1657 if (adev->gart.gart_funcs == NULL) 1681 if (adev->gmc.gmc_funcs == NULL)
1658 adev->gart.gart_funcs = &gmc_v8_0_gart_funcs; 1682 adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
1659} 1683}
1660 1684
1661static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev) 1685static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
1662{ 1686{
1663 adev->mc.vm_fault.num_types = 1; 1687 adev->gmc.vm_fault.num_types = 1;
1664 adev->mc.vm_fault.funcs = &gmc_v8_0_irq_funcs; 1688 adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
1665} 1689}
1666 1690
1667const struct amdgpu_ip_block_version gmc_v8_0_ip_block = 1691const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3b7e7af09ead..e687363900bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -21,6 +21,7 @@
21 * 21 *
22 */ 22 */
23#include <linux/firmware.h> 23#include <linux/firmware.h>
24#include <drm/drm_cache.h>
24#include "amdgpu.h" 25#include "amdgpu.h"
25#include "gmc_v9_0.h" 26#include "gmc_v9_0.h"
26#include "amdgpu_atomfirmware.h" 27#include "amdgpu_atomfirmware.h"
@@ -33,6 +34,7 @@
33#include "vega10_enum.h" 34#include "vega10_enum.h"
34#include "mmhub/mmhub_1_0_offset.h" 35#include "mmhub/mmhub_1_0_offset.h"
35#include "athub/athub_1_0_offset.h" 36#include "athub/athub_1_0_offset.h"
37#include "oss/osssys_4_0_offset.h"
36 38
37#include "soc15.h" 39#include "soc15.h"
38#include "soc15_common.h" 40#include "soc15_common.h"
@@ -262,10 +264,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
262 264
263 if (printk_ratelimit()) { 265 if (printk_ratelimit()) {
264 dev_err(adev->dev, 266 dev_err(adev->dev,
265 "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n", 267 "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
266 entry->vmid_src ? "mmhub" : "gfxhub", 268 entry->vmid_src ? "mmhub" : "gfxhub",
267 entry->src_id, entry->ring_id, entry->vmid, 269 entry->src_id, entry->ring_id, entry->vmid,
268 entry->pas_id); 270 entry->pasid);
269 dev_err(adev->dev, " at page 0x%016llx from %d\n", 271 dev_err(adev->dev, " at page 0x%016llx from %d\n",
270 addr, entry->client_id); 272 addr, entry->client_id);
271 if (!amdgpu_sriov_vf(adev)) 273 if (!amdgpu_sriov_vf(adev))
@@ -284,8 +286,8 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
284 286
285static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) 287static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
286{ 288{
287 adev->mc.vm_fault.num_types = 1; 289 adev->gmc.vm_fault.num_types = 1;
288 adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; 290 adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
289} 291}
290 292
291static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) 293static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
@@ -315,24 +317,21 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
315 */ 317 */
316 318
317/** 319/**
318 * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback 320 * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
319 * 321 *
320 * @adev: amdgpu_device pointer 322 * @adev: amdgpu_device pointer
321 * @vmid: vm instance to flush 323 * @vmid: vm instance to flush
322 * 324 *
323 * Flush the TLB for the requested page table. 325 * Flush the TLB for the requested page table.
324 */ 326 */
325static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, 327static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
326 uint32_t vmid) 328 uint32_t vmid)
327{ 329{
328 /* Use register 17 for GART */ 330 /* Use register 17 for GART */
329 const unsigned eng = 17; 331 const unsigned eng = 17;
330 unsigned i, j; 332 unsigned i, j;
331 333
332 /* flush hdp cache */ 334 spin_lock(&adev->gmc.invalidate_lock);
333 adev->nbio_funcs->hdp_flush(adev);
334
335 spin_lock(&adev->mc.invalidate_lock);
336 335
337 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 336 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
338 struct amdgpu_vmhub *hub = &adev->vmhub[i]; 337 struct amdgpu_vmhub *hub = &adev->vmhub[i];
@@ -365,11 +364,52 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
365 DRM_ERROR("Timeout waiting for VM flush ACK!\n"); 364 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
366 } 365 }
367 366
368 spin_unlock(&adev->mc.invalidate_lock); 367 spin_unlock(&adev->gmc.invalidate_lock);
368}
369
370static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
371 unsigned vmid, uint64_t pd_addr)
372{
373 struct amdgpu_device *adev = ring->adev;
374 struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
375 uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
376 uint64_t flags = AMDGPU_PTE_VALID;
377 unsigned eng = ring->vm_inv_eng;
378
379 amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
380 pd_addr |= flags;
381
382 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
383 lower_32_bits(pd_addr));
384
385 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
386 upper_32_bits(pd_addr));
387
388 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
389
390 /* wait for the invalidate to complete */
391 amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
392 1 << vmid, 1 << vmid);
393
394 return pd_addr;
395}
396
397static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
398 unsigned pasid)
399{
400 struct amdgpu_device *adev = ring->adev;
401 uint32_t reg;
402
403 if (ring->funcs->vmhub == AMDGPU_GFXHUB)
404 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
405 else
406 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
407
408 amdgpu_ring_emit_wreg(ring, reg, pasid);
369} 409}
370 410
371/** 411/**
372 * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO 412 * gmc_v9_0_set_pte_pde - update the page tables using MMIO
373 * 413 *
374 * @adev: amdgpu_device pointer 414 * @adev: amdgpu_device pointer
375 * @cpu_pt_addr: cpu address of the page table 415 * @cpu_pt_addr: cpu address of the page table
@@ -379,11 +419,9 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
379 * 419 *
380 * Update the page tables using the CPU. 420 * Update the page tables using the CPU.
381 */ 421 */
382static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev, 422static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
383 void *cpu_pt_addr, 423 uint32_t gpu_page_idx, uint64_t addr,
384 uint32_t gpu_page_idx, 424 uint64_t flags)
385 uint64_t addr,
386 uint64_t flags)
387{ 425{
388 void __iomem *ptr = (void *)cpu_pt_addr; 426 void __iomem *ptr = (void *)cpu_pt_addr;
389 uint64_t value; 427 uint64_t value;
@@ -474,10 +512,10 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
474{ 512{
475 if (!(*flags & AMDGPU_PDE_PTE)) 513 if (!(*flags & AMDGPU_PDE_PTE))
476 *addr = adev->vm_manager.vram_base_offset + *addr - 514 *addr = adev->vm_manager.vram_base_offset + *addr -
477 adev->mc.vram_start; 515 adev->gmc.vram_start;
478 BUG_ON(*addr & 0xFFFF00000000003FULL); 516 BUG_ON(*addr & 0xFFFF00000000003FULL);
479 517
480 if (!adev->mc.translate_further) 518 if (!adev->gmc.translate_further)
481 return; 519 return;
482 520
483 if (level == AMDGPU_VM_PDB1) { 521 if (level == AMDGPU_VM_PDB1) {
@@ -493,34 +531,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
493 } 531 }
494} 532}
495 533
496static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { 534static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
497 .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, 535 .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
498 .set_pte_pde = gmc_v9_0_gart_set_pte_pde, 536 .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
499 .get_invalidate_req = gmc_v9_0_get_invalidate_req, 537 .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
538 .set_pte_pde = gmc_v9_0_set_pte_pde,
500 .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, 539 .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
501 .get_vm_pde = gmc_v9_0_get_vm_pde 540 .get_vm_pde = gmc_v9_0_get_vm_pde
502}; 541};
503 542
504static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) 543static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
505{ 544{
506 if (adev->gart.gart_funcs == NULL) 545 if (adev->gmc.gmc_funcs == NULL)
507 adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; 546 adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
508} 547}
509 548
510static int gmc_v9_0_early_init(void *handle) 549static int gmc_v9_0_early_init(void *handle)
511{ 550{
512 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 551 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
513 552
514 gmc_v9_0_set_gart_funcs(adev); 553 gmc_v9_0_set_gmc_funcs(adev);
515 gmc_v9_0_set_irq_funcs(adev); 554 gmc_v9_0_set_irq_funcs(adev);
516 555
517 adev->mc.shared_aperture_start = 0x2000000000000000ULL; 556 adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
518 adev->mc.shared_aperture_end = 557 adev->gmc.shared_aperture_end =
519 adev->mc.shared_aperture_start + (4ULL << 30) - 1; 558 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
520 adev->mc.private_aperture_start = 559 adev->gmc.private_aperture_start =
521 adev->mc.shared_aperture_end + 1; 560 adev->gmc.shared_aperture_end + 1;
522 adev->mc.private_aperture_end = 561 adev->gmc.private_aperture_end =
523 adev->mc.private_aperture_start + (4ULL << 30) - 1; 562 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
524 563
525 return 0; 564 return 0;
526} 565}
@@ -646,16 +685,16 @@ static int gmc_v9_0_late_init(void *handle)
646 } 685 }
647 } 686 }
648 687
649 return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); 688 return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
650} 689}
651 690
652static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, 691static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
653 struct amdgpu_mc *mc) 692 struct amdgpu_gmc *mc)
654{ 693{
655 u64 base = 0; 694 u64 base = 0;
656 if (!amdgpu_sriov_vf(adev)) 695 if (!amdgpu_sriov_vf(adev))
657 base = mmhub_v1_0_get_fb_location(adev); 696 base = mmhub_v1_0_get_fb_location(adev);
658 amdgpu_device_vram_location(adev, &adev->mc, base); 697 amdgpu_device_vram_location(adev, &adev->gmc, base);
659 amdgpu_device_gart_location(adev, mc); 698 amdgpu_device_gart_location(adev, mc);
660 /* base offset of vram pages */ 699 /* base offset of vram pages */
661 if (adev->flags & AMD_IS_APU) 700 if (adev->flags & AMD_IS_APU)
@@ -679,8 +718,9 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
679 int chansize, numchan; 718 int chansize, numchan;
680 int r; 719 int r;
681 720
682 adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); 721 if (amdgpu_emu_mode != 1)
683 if (!adev->mc.vram_width) { 722 adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
723 if (!adev->gmc.vram_width) {
684 /* hbm memory channel size */ 724 /* hbm memory channel size */
685 if (adev->flags & AMD_IS_APU) 725 if (adev->flags & AMD_IS_APU)
686 chansize = 64; 726 chansize = 64;
@@ -720,43 +760,50 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
720 numchan = 2; 760 numchan = 2;
721 break; 761 break;
722 } 762 }
723 adev->mc.vram_width = numchan * chansize; 763 adev->gmc.vram_width = numchan * chansize;
724 } 764 }
725 765
726 /* size in MB on si */ 766 /* size in MB on si */
727 adev->mc.mc_vram_size = 767 adev->gmc.mc_vram_size =
728 adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; 768 adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
729 adev->mc.real_vram_size = adev->mc.mc_vram_size; 769 adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
730 770
731 if (!(adev->flags & AMD_IS_APU)) { 771 if (!(adev->flags & AMD_IS_APU)) {
732 r = amdgpu_device_resize_fb_bar(adev); 772 r = amdgpu_device_resize_fb_bar(adev);
733 if (r) 773 if (r)
734 return r; 774 return r;
735 } 775 }
736 adev->mc.aper_base = pci_resource_start(adev->pdev, 0); 776 adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
737 adev->mc.aper_size = pci_resource_len(adev->pdev, 0); 777 adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
738 778
779#ifdef CONFIG_X86_64
780 if (adev->flags & AMD_IS_APU) {
781 adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
782 adev->gmc.aper_size = adev->gmc.real_vram_size;
783 }
784#endif
739 /* In case the PCI BAR is larger than the actual amount of vram */ 785 /* In case the PCI BAR is larger than the actual amount of vram */
740 adev->mc.visible_vram_size = adev->mc.aper_size; 786 adev->gmc.visible_vram_size = adev->gmc.aper_size;
741 if (adev->mc.visible_vram_size > adev->mc.real_vram_size) 787 if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
742 adev->mc.visible_vram_size = adev->mc.real_vram_size; 788 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
743 789
744 /* set the gart size */ 790 /* set the gart size */
745 if (amdgpu_gart_size == -1) { 791 if (amdgpu_gart_size == -1) {
746 switch (adev->asic_type) { 792 switch (adev->asic_type) {
747 case CHIP_VEGA10: /* all engines support GPUVM */ 793 case CHIP_VEGA10: /* all engines support GPUVM */
794 case CHIP_VEGA12: /* all engines support GPUVM */
748 default: 795 default:
749 adev->mc.gart_size = 256ULL << 20; 796 adev->gmc.gart_size = 512ULL << 20;
750 break; 797 break;
751 case CHIP_RAVEN: /* DCE SG support */ 798 case CHIP_RAVEN: /* DCE SG support */
752 adev->mc.gart_size = 1024ULL << 20; 799 adev->gmc.gart_size = 1024ULL << 20;
753 break; 800 break;
754 } 801 }
755 } else { 802 } else {
756 adev->mc.gart_size = (u64)amdgpu_gart_size << 20; 803 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
757 } 804 }
758 805
759 gmc_v9_0_vram_gtt_location(adev, &adev->mc); 806 gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
760 807
761 return 0; 808 return 0;
762} 809}
@@ -788,23 +835,22 @@ static int gmc_v9_0_sw_init(void *handle)
788 gfxhub_v1_0_init(adev); 835 gfxhub_v1_0_init(adev);
789 mmhub_v1_0_init(adev); 836 mmhub_v1_0_init(adev);
790 837
791 spin_lock_init(&adev->mc.invalidate_lock); 838 spin_lock_init(&adev->gmc.invalidate_lock);
792 839
840 adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
793 switch (adev->asic_type) { 841 switch (adev->asic_type) {
794 case CHIP_RAVEN: 842 case CHIP_RAVEN:
795 adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
796 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { 843 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
797 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); 844 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
798 } else { 845 } else {
799 /* vm_size is 128TB + 512GB for legacy 3-level page support */ 846 /* vm_size is 128TB + 512GB for legacy 3-level page support */
800 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); 847 amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
801 adev->mc.translate_further = 848 adev->gmc.translate_further =
802 adev->vm_manager.num_level > 1; 849 adev->vm_manager.num_level > 1;
803 } 850 }
804 break; 851 break;
805 case CHIP_VEGA10: 852 case CHIP_VEGA10:
806 /* XXX Don't know how to get VRAM type yet. */ 853 case CHIP_VEGA12:
807 adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
808 /* 854 /*
809 * To fulfill 4-level page support, 855 * To fulfill 4-level page support,
810 * vm size is 256TB (48bit), maximum size of Vega10, 856 * vm size is 256TB (48bit), maximum size of Vega10,
@@ -817,10 +863,10 @@ static int gmc_v9_0_sw_init(void *handle)
817 } 863 }
818 864
819 /* This interrupt is VMC page fault.*/ 865 /* This interrupt is VMC page fault.*/
820 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, 866 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0,
821 &adev->mc.vm_fault); 867 &adev->gmc.vm_fault);
822 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0, 868 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0,
823 &adev->mc.vm_fault); 869 &adev->gmc.vm_fault);
824 870
825 if (r) 871 if (r)
826 return r; 872 return r;
@@ -829,13 +875,13 @@ static int gmc_v9_0_sw_init(void *handle)
829 * This is the max address of the GPU's 875 * This is the max address of the GPU's
830 * internal address space. 876 * internal address space.
831 */ 877 */
832 adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ 878 adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
833 879
834 /* 880 /*
835 * It needs to reserve 8M stolen memory for vega10 881 * It needs to reserve 8M stolen memory for vega10
836 * TODO: Figure out how to avoid that... 882 * TODO: Figure out how to avoid that...
837 */ 883 */
838 adev->mc.stolen_size = 8 * 1024 * 1024; 884 adev->gmc.stolen_size = 8 * 1024 * 1024;
839 885
840 /* set DMA mask + need_dma32 flags. 886 /* set DMA mask + need_dma32 flags.
841 * PCIE - can handle 44-bits. 887 * PCIE - can handle 44-bits.
@@ -855,6 +901,7 @@ static int gmc_v9_0_sw_init(void *handle)
855 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); 901 pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
856 printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); 902 printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
857 } 903 }
904 adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
858 905
859 r = gmc_v9_0_mc_init(adev); 906 r = gmc_v9_0_mc_init(adev);
860 if (r) 907 if (r)
@@ -920,6 +967,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
920 golden_settings_athub_1_0_0, 967 golden_settings_athub_1_0_0,
921 ARRAY_SIZE(golden_settings_athub_1_0_0)); 968 ARRAY_SIZE(golden_settings_athub_1_0_0));
922 break; 969 break;
970 case CHIP_VEGA12:
971 break;
923 case CHIP_RAVEN: 972 case CHIP_RAVEN:
924 soc15_program_register_sequence(adev, 973 soc15_program_register_sequence(adev,
925 golden_settings_athub_1_0_0, 974 golden_settings_athub_1_0_0,
@@ -976,7 +1025,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
976 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); 1025 WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
977 1026
978 /* After HDP is initialized, flush HDP.*/ 1027 /* After HDP is initialized, flush HDP.*/
979 adev->nbio_funcs->hdp_flush(adev); 1028 adev->nbio_funcs->hdp_flush(adev, NULL);
980 1029
981 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) 1030 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
982 value = false; 1031 value = false;
@@ -985,10 +1034,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
985 1034
986 gfxhub_v1_0_set_fault_enable_default(adev, value); 1035 gfxhub_v1_0_set_fault_enable_default(adev, value);
987 mmhub_v1_0_set_fault_enable_default(adev, value); 1036 mmhub_v1_0_set_fault_enable_default(adev, value);
988 gmc_v9_0_gart_flush_gpu_tlb(adev, 0); 1037 gmc_v9_0_flush_gpu_tlb(adev, 0);
989 1038
990 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 1039 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
991 (unsigned)(adev->mc.gart_size >> 20), 1040 (unsigned)(adev->gmc.gart_size >> 20),
992 (unsigned long long)adev->gart.table_addr); 1041 (unsigned long long)adev->gart.table_addr);
993 adev->gart.ready = true; 1042 adev->gart.ready = true;
994 return 0; 1043 return 0;
@@ -1039,7 +1088,7 @@ static int gmc_v9_0_hw_fini(void *handle)
1039 return 0; 1088 return 0;
1040 } 1089 }
1041 1090
1042 amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); 1091 amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1043 gmc_v9_0_gart_disable(adev); 1092 gmc_v9_0_gart_disable(adev);
1044 1093
1045 return 0; 1094 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index c4e4be3dd31d..842c4b677b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -111,7 +111,7 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
111 iceland_ih_disable_interrupts(adev); 111 iceland_ih_disable_interrupts(adev);
112 112
113 /* setup interrupt control */ 113 /* setup interrupt control */
114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 114 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL); 115 interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 116 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 117 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
260 entry->src_data[0] = dw[1] & 0xfffffff; 260 entry->src_data[0] = dw[1] & 0xfffffff;
261 entry->ring_id = dw[2] & 0xff; 261 entry->ring_id = dw[2] & 0xff;
262 entry->vmid = (dw[2] >> 8) & 0xff; 262 entry->vmid = (dw[2] >> 8) & 0xff;
263 entry->pas_id = (dw[2] >> 16) & 0xffff; 263 entry->pasid = (dw[2] >> 16) & 0xffff;
264 264
265 /* wptr/rptr are in bytes! */ 265 /* wptr/rptr are in bytes! */
266 adev->irq.ih.rptr += 16; 266 adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index d9e9e52a0def..26ba984ab2b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -42,6 +42,8 @@
42#define KV_MINIMUM_ENGINE_CLOCK 800 42#define KV_MINIMUM_ENGINE_CLOCK 800
43#define SMC_RAM_END 0x40000 43#define SMC_RAM_END 0x40000
44 44
45static const struct amd_pm_funcs kv_dpm_funcs;
46
45static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev); 47static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev);
46static int kv_enable_nb_dpm(struct amdgpu_device *adev, 48static int kv_enable_nb_dpm(struct amdgpu_device *adev,
47 bool enable); 49 bool enable);
@@ -2960,6 +2962,8 @@ static int kv_dpm_early_init(void *handle)
2960{ 2962{
2961 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2963 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2962 2964
2965 adev->powerplay.pp_funcs = &kv_dpm_funcs;
2966 adev->powerplay.pp_handle = adev;
2963 kv_dpm_set_irq_funcs(adev); 2967 kv_dpm_set_irq_funcs(adev);
2964 2968
2965 return 0; 2969 return 0;
@@ -3301,7 +3305,7 @@ static int kv_dpm_read_sensor(void *handle, int idx,
3301 } 3305 }
3302} 3306}
3303 3307
3304const struct amd_ip_funcs kv_dpm_ip_funcs = { 3308static const struct amd_ip_funcs kv_dpm_ip_funcs = {
3305 .name = "kv_dpm", 3309 .name = "kv_dpm",
3306 .early_init = kv_dpm_early_init, 3310 .early_init = kv_dpm_early_init,
3307 .late_init = kv_dpm_late_init, 3311 .late_init = kv_dpm_late_init,
@@ -3318,8 +3322,16 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = {
3318 .set_powergating_state = kv_dpm_set_powergating_state, 3322 .set_powergating_state = kv_dpm_set_powergating_state,
3319}; 3323};
3320 3324
3321const struct amd_pm_funcs kv_dpm_funcs = { 3325const struct amdgpu_ip_block_version kv_smu_ip_block =
3322 .get_temperature = &kv_dpm_get_temp, 3326{
3327 .type = AMD_IP_BLOCK_TYPE_SMC,
3328 .major = 1,
3329 .minor = 0,
3330 .rev = 0,
3331 .funcs = &kv_dpm_ip_funcs,
3332};
3333
3334static const struct amd_pm_funcs kv_dpm_funcs = {
3323 .pre_set_power_state = &kv_dpm_pre_set_power_state, 3335 .pre_set_power_state = &kv_dpm_pre_set_power_state,
3324 .set_power_state = &kv_dpm_set_power_state, 3336 .set_power_state = &kv_dpm_set_power_state,
3325 .post_set_power_state = &kv_dpm_post_set_power_state, 3337 .post_set_power_state = &kv_dpm_post_set_power_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index ffd5b7ee49c4..43f925773b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -50,7 +50,7 @@ static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
50 uint64_t value; 50 uint64_t value;
51 51
52 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); 52 BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
53 value = adev->gart.table_addr - adev->mc.vram_start + 53 value = adev->gart.table_addr - adev->gmc.vram_start +
54 adev->vm_manager.vram_base_offset; 54 adev->vm_manager.vram_base_offset;
55 value &= 0x0000FFFFFFFFF000ULL; 55 value &= 0x0000FFFFFFFFF000ULL;
56 value |= 0x1; /* valid bit */ 56 value |= 0x1; /* valid bit */
@@ -67,14 +67,14 @@ static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
67 mmhub_v1_0_init_gart_pt_regs(adev); 67 mmhub_v1_0_init_gart_pt_regs(adev);
68 68
69 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, 69 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
70 (u32)(adev->mc.gart_start >> 12)); 70 (u32)(adev->gmc.gart_start >> 12));
71 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, 71 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
72 (u32)(adev->mc.gart_start >> 44)); 72 (u32)(adev->gmc.gart_start >> 44));
73 73
74 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, 74 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
75 (u32)(adev->mc.gart_end >> 12)); 75 (u32)(adev->gmc.gart_end >> 12));
76 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, 76 WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
77 (u32)(adev->mc.gart_end >> 44)); 77 (u32)(adev->gmc.gart_end >> 44));
78} 78}
79 79
80static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) 80static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -89,12 +89,12 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
89 89
90 /* Program the system aperture low logical page number. */ 90 /* Program the system aperture low logical page number. */
91 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, 91 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
92 adev->mc.vram_start >> 18); 92 adev->gmc.vram_start >> 18);
93 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 93 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
94 adev->mc.vram_end >> 18); 94 adev->gmc.vram_end >> 18);
95 95
96 /* Set default page address. */ 96 /* Set default page address. */
97 value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + 97 value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
98 adev->vm_manager.vram_base_offset; 98 adev->vm_manager.vram_base_offset;
99 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 99 WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
100 (u32)(value >> 12)); 100 (u32)(value >> 12));
@@ -103,9 +103,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
103 103
104 /* Program "protection fault". */ 104 /* Program "protection fault". */
105 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, 105 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
106 (u32)(adev->dummy_page.addr >> 12)); 106 (u32)(adev->dummy_page_addr >> 12));
107 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, 107 WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
108 (u32)((u64)adev->dummy_page.addr >> 44)); 108 (u32)((u64)adev->dummy_page_addr >> 44));
109 109
110 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2); 110 tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
111 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, 111 tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
@@ -155,7 +155,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
155 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 155 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
156 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); 156 WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
157 157
158 if (adev->mc.translate_further) { 158 if (adev->gmc.translate_further) {
159 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); 159 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
160 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, 160 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
161 L2_CACHE_BIGK_FRAGMENT_SIZE, 9); 161 L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -207,7 +207,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
207 207
208 num_level = adev->vm_manager.num_level; 208 num_level = adev->vm_manager.num_level;
209 block_size = adev->vm_manager.block_size; 209 block_size = adev->vm_manager.block_size;
210 if (adev->mc.translate_further) 210 if (adev->gmc.translate_further)
211 num_level -= 1; 211 num_level -= 1;
212 else 212 else
213 block_size -= 9; 213 block_size -= 9;
@@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = {
272 {0x11, 0x6a684}, 272 {0x11, 0x6a684},
273 {0x19, 0xea68e}, 273 {0x19, 0xea68e},
274 {0x29, 0xa69e}, 274 {0x29, 0xa69e},
275 {0x2b, 0x34a6c0}, 275 {0x2b, 0x0010a6c0},
276 {0x61, 0x83a707}, 276 {0x3d, 0x83a707},
277 {0xe6, 0x8a7a4}, 277 {0xc2, 0x8a7a4},
278 {0xf0, 0x1a7b8}, 278 {0xcc, 0x1a7b8},
279 {0xf3, 0xfa7cc}, 279 {0xcf, 0xfa7cc},
280 {0x104, 0x17a7dd}, 280 {0xe0, 0x17a7dd},
281 {0x11d, 0xa7dc}, 281 {0xf9, 0xa7dc},
282 {0x11f, 0x12a7f5}, 282 {0xfb, 0x12a7f5},
283 {0x133, 0xa808}, 283 {0x10f, 0xa808},
284 {0x135, 0x12a810}, 284 {0x111, 0x12a810},
285 {0x149, 0x7a82c} 285 {0x125, 0x7a82c}
286}; 286};
287#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) 287#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
288 288
289#define PCTL0_RENG_EXEC_END_PTR 0x151 289#define PCTL0_RENG_EXEC_END_PTR 0x12d
290#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 290#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
291#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 291#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
292 292
@@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
385 if (amdgpu_sriov_vf(adev)) 385 if (amdgpu_sriov_vf(adev))
386 return; 386 return;
387 387
388 /****************** pctl0 **********************/
388 pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); 389 pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
389 pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); 390 pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
390 pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
391 pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
392 391
393 /* Light sleep must be disabled before writing to pctl0 registers */ 392 /* Light sleep must be disabled before writing to pctl0 registers */
394 pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; 393 pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
402 pctl0_data[i].data); 401 pctl0_data[i].data);
403 } 402 }
404 403
405 /* Set the reng execute end ptr for pctl0 */ 404 /* Re-enable light sleep */
406 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, 405 pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
407 PCTL0_RENG_EXECUTE, 406 WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
408 RENG_EXECUTE_END_PTR, 407
409 PCTL0_RENG_EXEC_END_PTR); 408 /****************** pctl1 **********************/
410 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); 409 pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
410 pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
411 411
412 /* Light sleep must be disabled before writing to pctl1 registers */ 412 /* Light sleep must be disabled before writing to pctl1 registers */
413 pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; 413 pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
421 pctl1_data[i].data); 421 pctl1_data[i].data);
422 } 422 }
423 423
424 /* Re-enable light sleep */
425 pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
426 WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
427
428 mmhub_v1_0_power_gating_write_save_ranges(adev);
429
430 /* Set the reng execute end ptr for pctl0 */
431 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
432 PCTL0_RENG_EXECUTE,
433 RENG_EXECUTE_END_PTR,
434 PCTL0_RENG_EXEC_END_PTR);
435 WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
436
424 /* Set the reng execute end ptr for pctl1 */ 437 /* Set the reng execute end ptr for pctl1 */
425 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, 438 pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
426 PCTL1_RENG_EXECUTE, 439 PCTL1_RENG_EXECUTE,
427 RENG_EXECUTE_END_PTR, 440 RENG_EXECUTE_END_PTR,
428 PCTL1_RENG_EXEC_END_PTR); 441 PCTL1_RENG_EXEC_END_PTR);
429 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); 442 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
430
431 mmhub_v1_0_power_gating_write_save_ranges(adev);
432
433 /* Re-enable light sleep */
434 pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
435 WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
436 pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
437 WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
438} 443}
439 444
440void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, 445void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
@@ -466,6 +471,9 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
466 RENG_EXECUTE_ON_REG_UPDATE, 1); 471 RENG_EXECUTE_ON_REG_UPDATE, 1);
467 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); 472 WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
468 473
474 if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu)
475 amdgpu_dpm_set_mmhub_powergating_by_smu(adev);
476
469 } else { 477 } else {
470 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, 478 pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
471 PCTL0_RENG_EXECUTE, 479 PCTL0_RENG_EXECUTE,
@@ -494,9 +502,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
494 * SRIOV driver need to program them 502 * SRIOV driver need to program them
495 */ 503 */
496 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE, 504 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
497 adev->mc.vram_start >> 24); 505 adev->gmc.vram_start >> 24);
498 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP, 506 WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
499 adev->mc.vram_end >> 24); 507 adev->gmc.vram_end >> 24);
500 } 508 }
501 509
502 /* GART Enable. */ 510 /* GART Enable. */
@@ -725,6 +733,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
725 733
726 switch (adev->asic_type) { 734 switch (adev->asic_type) {
727 case CHIP_VEGA10: 735 case CHIP_VEGA10:
736 case CHIP_VEGA12:
728 case CHIP_RAVEN: 737 case CHIP_RAVEN:
729 mmhub_v1_0_update_medium_grain_clock_gating(adev, 738 mmhub_v1_0_update_medium_grain_clock_gating(adev,
730 state == AMD_CG_STATE_GATE ? true : false); 739 state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d3999a..493348672475 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,34 @@
33 33
34static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) 34static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
35{ 35{
36 u32 reg; 36 WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
37 int timeout = AI_MAILBOX_TIMEDOUT;
38 u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
39
40 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
41 mmBIF_BX_PF0_MAILBOX_CONTROL));
42 reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
43 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
44 mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
45
46 /*Wait for RCV_MSG_VALID to be 0*/
47 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
48 mmBIF_BX_PF0_MAILBOX_CONTROL));
49 while (reg & mask) {
50 if (timeout <= 0) {
51 pr_err("RCV_MSG_VALID is not cleared\n");
52 break;
53 }
54 mdelay(1);
55 timeout -=1;
56
57 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
58 mmBIF_BX_PF0_MAILBOX_CONTROL));
59 }
60} 37}
61 38
62static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) 39static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
63{ 40{
64 u32 reg; 41 WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
42}
65 43
66 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 44/*
67 mmBIF_BX_PF0_MAILBOX_CONTROL)); 45 * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
68 reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, 46 * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
69 TRN_MSG_VALID, val ? 1 : 0); 47 * by host.
70 WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL), 48 *
71 reg); 49 * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
50 * correct value since it doesn't return the RCV_DW0 under the case that
51 * RCV_MSG_VALID is set by host.
52 */
53static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev)
54{
55 return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
56 mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
72} 57}
73 58
59
74static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, 60static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
75 enum idh_event event) 61 enum idh_event event)
76{ 62{
77 u32 reg; 63 u32 reg;
78 u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
79
80 if (event != IDH_FLR_NOTIFICATION_CMPL) {
81 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
82 mmBIF_BX_PF0_MAILBOX_CONTROL));
83 if (!(reg & mask))
84 return -ENOENT;
85 }
86 64
87 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 65 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
88 mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0)); 66 mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
@@ -94,54 +72,67 @@ static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
94 return 0; 72 return 0;
95} 73}
96 74
75static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) {
76 return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
77}
78
97static int xgpu_ai_poll_ack(struct amdgpu_device *adev) 79static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
98{ 80{
99 int r = 0, timeout = AI_MAILBOX_TIMEDOUT; 81 int timeout = AI_MAILBOX_POLL_ACK_TIMEDOUT;
100 u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK); 82 u8 reg;
101 u32 reg; 83
84 do {
85 reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
86 if (reg & 2)
87 return 0;
102 88
103 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
104 mmBIF_BX_PF0_MAILBOX_CONTROL));
105 while (!(reg & mask)) {
106 if (timeout <= 0) {
107 pr_err("Doesn't get ack from pf.\n");
108 r = -ETIME;
109 break;
110 }
111 mdelay(5); 89 mdelay(5);
112 timeout -= 5; 90 timeout -= 5;
91 } while (timeout > 1);
113 92
114 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 93 pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
115 mmBIF_BX_PF0_MAILBOX_CONTROL));
116 }
117 94
118 return r; 95 return -ETIME;
119} 96}
120 97
121static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) 98static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
122{ 99{
123 int r = 0, timeout = AI_MAILBOX_TIMEDOUT; 100 int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT;
124
125 r = xgpu_ai_mailbox_rcv_msg(adev, event);
126 while (r) {
127 if (timeout <= 0) {
128 pr_err("Doesn't get msg:%d from pf.\n", event);
129 r = -ETIME;
130 break;
131 }
132 mdelay(5);
133 timeout -= 5;
134 101
102 do {
135 r = xgpu_ai_mailbox_rcv_msg(adev, event); 103 r = xgpu_ai_mailbox_rcv_msg(adev, event);
136 } 104 if (!r)
105 return 0;
137 106
138 return r; 107 msleep(10);
108 timeout -= 10;
109 } while (timeout > 1);
110
111 pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
112
113 return -ETIME;
139} 114}
140 115
141static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, 116static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
142 enum idh_request req, u32 data1, u32 data2, u32 data3) { 117 enum idh_request req, u32 data1, u32 data2, u32 data3) {
143 u32 reg; 118 u32 reg;
144 int r; 119 int r;
120 uint8_t trn;
121
122 /* IMPORTANT:
123 * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
124 * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
125 * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack()
126 * will return immediatly
127 */
128 do {
129 xgpu_ai_mailbox_set_valid(adev, false);
130 trn = xgpu_ai_peek_ack(adev);
131 if (trn) {
132 pr_err("trn=%x ACK should not assert! wait again !\n", trn);
133 msleep(1);
134 }
135 } while(trn);
145 136
146 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, 137 reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
147 mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); 138 mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
@@ -245,15 +236,36 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
245{ 236{
246 struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); 237 struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
247 struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); 238 struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
248 239 int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
249 /* wait until RCV_MSG become 3 */ 240 int locked;
250 if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { 241
251 pr_err("failed to recieve FLR_CMPL\n"); 242 /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
252 return; 243 * otherwise the mailbox msg will be ruined/reseted by
253 } 244 * the VF FLR.
254 245 *
255 /* Trigger recovery due to world switch failure */ 246 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
256 amdgpu_device_gpu_recover(adev, NULL, false); 247 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
248 * which means host side had finished this VF's FLR.
249 */
250 locked = mutex_trylock(&adev->lock_reset);
251 if (locked)
252 adev->in_gpu_reset = 1;
253
254 do {
255 if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
256 goto flr_done;
257
258 msleep(10);
259 timeout -= 10;
260 } while (timeout > 1);
261
262flr_done:
263 if (locked)
264 mutex_unlock(&adev->lock_reset);
265
266 /* Trigger recovery for world switch failure if no TDR */
267 if (amdgpu_lockup_timeout == 0)
268 amdgpu_device_gpu_recover(adev, NULL, true);
257} 269}
258 270
259static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, 271static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -274,24 +286,22 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
274 struct amdgpu_irq_src *source, 286 struct amdgpu_irq_src *source,
275 struct amdgpu_iv_entry *entry) 287 struct amdgpu_iv_entry *entry)
276{ 288{
277 int r; 289 enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
278 290
279 /* trigger gpu-reset by hypervisor only if TDR disbaled */ 291 switch (event) {
280 if (!amdgpu_gpu_recovery) { 292 case IDH_FLR_NOTIFICATION:
281 /* see what event we get */ 293 if (amdgpu_sriov_runtime(adev))
282 r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); 294 schedule_work(&adev->virt.flr_work);
283 295 break;
284 /* sometimes the interrupt is delayed to inject to VM, so under such case 296 /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
285 * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus 297 * it byfar since that polling thread will handle it,
286 * above recieve message could be failed, we should schedule the flr_work 298 * other msg like flr complete is not handled here.
287 * anyway
288 */ 299 */
289 if (r) { 300 case IDH_CLR_MSG_BUF:
290 DRM_ERROR("FLR_NOTIFICATION is missed\n"); 301 case IDH_FLR_NOTIFICATION_CMPL:
291 xgpu_ai_mailbox_send_ack(adev); 302 case IDH_READY_TO_ACCESS_GPU:
292 } 303 default:
293 304 break;
294 schedule_work(&adev->virt.flr_work);
295 } 305 }
296 306
297 return 0; 307 return 0;
@@ -319,11 +329,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
319{ 329{
320 int r; 330 int r;
321 331
322 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); 332 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
323 if (r) 333 if (r)
324 return r; 334 return r;
325 335
326 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); 336 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
327 if (r) { 337 if (r) {
328 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); 338 amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
329 return r; 339 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 67e78576a9eb..b4a9ceea334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -24,7 +24,9 @@
24#ifndef __MXGPU_AI_H__ 24#ifndef __MXGPU_AI_H__
25#define __MXGPU_AI_H__ 25#define __MXGPU_AI_H__
26 26
27#define AI_MAILBOX_TIMEDOUT 12000 27#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500
28#define AI_MAILBOX_POLL_MSG_TIMEDOUT 12000
29#define AI_MAILBOX_POLL_FLR_TIMEDOUT 500
28 30
29enum idh_request { 31enum idh_request {
30 IDH_REQ_GPU_INIT_ACCESS = 1, 32 IDH_REQ_GPU_INIT_ACCESS = 1,
@@ -51,4 +53,7 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
51int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev); 53int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
52void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev); 54void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
53 55
56#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
57#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
58
54#endif 59#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index d4da663d5eb0..6f9c54978cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -53,9 +53,16 @@ static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
53 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); 53 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
54} 54}
55 55
56static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) 56static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev,
57 struct amdgpu_ring *ring)
57{ 58{
58 WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); 59 if (!ring || !ring->funcs->emit_wreg)
60 WREG32_SOC15_NO_KIQ(NBIO, 0,
61 mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL,
62 0);
63 else
64 amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
65 NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
59} 66}
60 67
61static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) 68static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
@@ -126,7 +133,7 @@ static void nbio_v6_1_ih_control(struct amdgpu_device *adev)
126 u32 interrupt_cntl; 133 u32 interrupt_cntl;
127 134
128 /* setup interrupt control */ 135 /* setup interrupt control */
129 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 136 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
130 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); 137 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
131 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 138 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
132 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 139 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -213,12 +220,12 @@ static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
213 220
214static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev) 221static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev)
215{ 222{
216 return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX); 223 return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
217} 224}
218 225
219static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) 226static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
220{ 227{
221 return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA); 228 return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
222} 229}
223 230
224static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { 231static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 17a9131a4598..df34dc79d444 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -53,9 +53,14 @@ static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
53 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); 53 WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
54} 54}
55 55
56static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) 56static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
57 struct amdgpu_ring *ring)
57{ 58{
58 WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); 59 if (!ring || !ring->funcs->emit_wreg)
60 WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
61 else
62 amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
63 NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
59} 64}
60 65
61static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) 66static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -203,7 +208,7 @@ static void nbio_v7_0_ih_control(struct amdgpu_device *adev)
203 u32 interrupt_cntl; 208 u32 interrupt_cntl;
204 209
205 /* setup interrupt control */ 210 /* setup interrupt control */
206 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 211 WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
207 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); 212 interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
208 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 213 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
209 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 214 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 5a9fe24697f9..8873d833a7f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -87,7 +87,7 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
87 return 0; 87 return 0;
88} 88}
89 89
90int psp_v10_0_init_microcode(struct psp_context *psp) 90static int psp_v10_0_init_microcode(struct psp_context *psp)
91{ 91{
92 struct amdgpu_device *adev = psp->adev; 92 struct amdgpu_device *adev = psp->adev;
93 const char *chip_name; 93 const char *chip_name;
@@ -133,7 +133,8 @@ out:
133 return err; 133 return err;
134} 134}
135 135
136int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) 136static int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
137 struct psp_gfx_cmd_resp *cmd)
137{ 138{
138 int ret; 139 int ret;
139 uint64_t fw_mem_mc_addr = ucode->mc_addr; 140 uint64_t fw_mem_mc_addr = ucode->mc_addr;
@@ -152,7 +153,8 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm
152 return ret; 153 return ret;
153} 154}
154 155
155int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) 156static int psp_v10_0_ring_init(struct psp_context *psp,
157 enum psp_ring_type ring_type)
156{ 158{
157 int ret = 0; 159 int ret = 0;
158 struct psp_ring *ring; 160 struct psp_ring *ring;
@@ -177,7 +179,8 @@ int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
177 return 0; 179 return 0;
178} 180}
179 181
180int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) 182static int psp_v10_0_ring_create(struct psp_context *psp,
183 enum psp_ring_type ring_type)
181{ 184{
182 int ret = 0; 185 int ret = 0;
183 unsigned int psp_ring_reg = 0; 186 unsigned int psp_ring_reg = 0;
@@ -208,7 +211,8 @@ int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
208 return ret; 211 return ret;
209} 212}
210 213
211int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) 214static int psp_v10_0_ring_stop(struct psp_context *psp,
215 enum psp_ring_type ring_type)
212{ 216{
213 int ret = 0; 217 int ret = 0;
214 struct psp_ring *ring; 218 struct psp_ring *ring;
@@ -231,7 +235,8 @@ int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
231 return ret; 235 return ret;
232} 236}
233 237
234int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) 238static int psp_v10_0_ring_destroy(struct psp_context *psp,
239 enum psp_ring_type ring_type)
235{ 240{
236 int ret = 0; 241 int ret = 0;
237 struct psp_ring *ring = &psp->km_ring; 242 struct psp_ring *ring = &psp->km_ring;
@@ -248,10 +253,10 @@ int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type
248 return ret; 253 return ret;
249} 254}
250 255
251int psp_v10_0_cmd_submit(struct psp_context *psp, 256static int psp_v10_0_cmd_submit(struct psp_context *psp,
252 struct amdgpu_firmware_info *ucode, 257 struct amdgpu_firmware_info *ucode,
253 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, 258 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
254 int index) 259 int index)
255{ 260{
256 unsigned int psp_write_ptr_reg = 0; 261 unsigned int psp_write_ptr_reg = 0;
257 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; 262 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
@@ -298,9 +303,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
298 303
299static int 304static int
300psp_v10_0_sram_map(struct amdgpu_device *adev, 305psp_v10_0_sram_map(struct amdgpu_device *adev,
301 unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, 306 unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
302 unsigned int *sram_data_reg_offset, 307 unsigned int *sram_data_reg_offset,
303 enum AMDGPU_UCODE_ID ucode_id) 308 enum AMDGPU_UCODE_ID ucode_id)
304{ 309{
305 int ret = 0; 310 int ret = 0;
306 311
@@ -383,9 +388,9 @@ psp_v10_0_sram_map(struct amdgpu_device *adev,
383 return ret; 388 return ret;
384} 389}
385 390
386bool psp_v10_0_compare_sram_data(struct psp_context *psp, 391static bool psp_v10_0_compare_sram_data(struct psp_context *psp,
387 struct amdgpu_firmware_info *ucode, 392 struct amdgpu_firmware_info *ucode,
388 enum AMDGPU_UCODE_ID ucode_type) 393 enum AMDGPU_UCODE_ID ucode_type)
389{ 394{
390 int err = 0; 395 int err = 0;
391 unsigned int fw_sram_reg_val = 0; 396 unsigned int fw_sram_reg_val = 0;
@@ -419,8 +424,25 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp,
419} 424}
420 425
421 426
422int psp_v10_0_mode1_reset(struct psp_context *psp) 427static int psp_v10_0_mode1_reset(struct psp_context *psp)
423{ 428{
424 DRM_INFO("psp mode 1 reset not supported now! \n"); 429 DRM_INFO("psp mode 1 reset not supported now! \n");
425 return -EINVAL; 430 return -EINVAL;
426} 431}
432
433static const struct psp_funcs psp_v10_0_funcs = {
434 .init_microcode = psp_v10_0_init_microcode,
435 .prep_cmd_buf = psp_v10_0_prep_cmd_buf,
436 .ring_init = psp_v10_0_ring_init,
437 .ring_create = psp_v10_0_ring_create,
438 .ring_stop = psp_v10_0_ring_stop,
439 .ring_destroy = psp_v10_0_ring_destroy,
440 .cmd_submit = psp_v10_0_cmd_submit,
441 .compare_sram_data = psp_v10_0_compare_sram_data,
442 .mode1_reset = psp_v10_0_mode1_reset,
443};
444
445void psp_v10_0_set_psp_funcs(struct psp_context *psp)
446{
447 psp->funcs = &psp_v10_0_funcs;
448}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
index 451e8308303f..20c2a94859d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
@@ -27,24 +27,6 @@
27 27
28#include "amdgpu_psp.h" 28#include "amdgpu_psp.h"
29 29
30extern int psp_v10_0_init_microcode(struct psp_context *psp); 30void psp_v10_0_set_psp_funcs(struct psp_context *psp);
31extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
32 struct psp_gfx_cmd_resp *cmd);
33extern int psp_v10_0_ring_init(struct psp_context *psp,
34 enum psp_ring_type ring_type);
35extern int psp_v10_0_ring_create(struct psp_context *psp,
36 enum psp_ring_type ring_type);
37extern int psp_v10_0_ring_stop(struct psp_context *psp,
38 enum psp_ring_type ring_type);
39extern int psp_v10_0_ring_destroy(struct psp_context *psp,
40 enum psp_ring_type ring_type);
41extern int psp_v10_0_cmd_submit(struct psp_context *psp,
42 struct amdgpu_firmware_info *ucode,
43 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
44 int index);
45extern bool psp_v10_0_compare_sram_data(struct psp_context *psp,
46 struct amdgpu_firmware_info *ucode,
47 enum AMDGPU_UCODE_ID ucode_type);
48 31
49extern int psp_v10_0_mode1_reset(struct psp_context *psp);
50#endif 32#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 19bd1934e63d..196e75def1f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -39,6 +39,8 @@
39 39
40MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); 40MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
41MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); 41MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
42MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
43MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
42 44
43#define smnMP1_FIRMWARE_FLAGS 0x3010028 45#define smnMP1_FIRMWARE_FLAGS 0x3010028
44 46
@@ -93,7 +95,7 @@ psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *t
93 return 0; 95 return 0;
94} 96}
95 97
96int psp_v3_1_init_microcode(struct psp_context *psp) 98static int psp_v3_1_init_microcode(struct psp_context *psp)
97{ 99{
98 struct amdgpu_device *adev = psp->adev; 100 struct amdgpu_device *adev = psp->adev;
99 const char *chip_name; 101 const char *chip_name;
@@ -107,6 +109,9 @@ int psp_v3_1_init_microcode(struct psp_context *psp)
107 case CHIP_VEGA10: 109 case CHIP_VEGA10:
108 chip_name = "vega10"; 110 chip_name = "vega10";
109 break; 111 break;
112 case CHIP_VEGA12:
113 chip_name = "vega12";
114 break;
110 default: BUG(); 115 default: BUG();
111 } 116 }
112 117
@@ -161,7 +166,7 @@ out:
161 return err; 166 return err;
162} 167}
163 168
164int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) 169static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
165{ 170{
166 int ret; 171 int ret;
167 uint32_t psp_gfxdrv_command_reg = 0; 172 uint32_t psp_gfxdrv_command_reg = 0;
@@ -202,7 +207,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
202 return ret; 207 return ret;
203} 208}
204 209
205int psp_v3_1_bootloader_load_sos(struct psp_context *psp) 210static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
206{ 211{
207 int ret; 212 int ret;
208 unsigned int psp_gfxdrv_command_reg = 0; 213 unsigned int psp_gfxdrv_command_reg = 0;
@@ -243,7 +248,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
243 return ret; 248 return ret;
244} 249}
245 250
246int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) 251static int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
252 struct psp_gfx_cmd_resp *cmd)
247{ 253{
248 int ret; 254 int ret;
249 uint64_t fw_mem_mc_addr = ucode->mc_addr; 255 uint64_t fw_mem_mc_addr = ucode->mc_addr;
@@ -262,7 +268,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
262 return ret; 268 return ret;
263} 269}
264 270
265int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) 271static int psp_v3_1_ring_init(struct psp_context *psp,
272 enum psp_ring_type ring_type)
266{ 273{
267 int ret = 0; 274 int ret = 0;
268 struct psp_ring *ring; 275 struct psp_ring *ring;
@@ -287,7 +294,8 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
287 return 0; 294 return 0;
288} 295}
289 296
290int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) 297static int psp_v3_1_ring_create(struct psp_context *psp,
298 enum psp_ring_type ring_type)
291{ 299{
292 int ret = 0; 300 int ret = 0;
293 unsigned int psp_ring_reg = 0; 301 unsigned int psp_ring_reg = 0;
@@ -318,7 +326,8 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
318 return ret; 326 return ret;
319} 327}
320 328
321int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) 329static int psp_v3_1_ring_stop(struct psp_context *psp,
330 enum psp_ring_type ring_type)
322{ 331{
323 int ret = 0; 332 int ret = 0;
324 struct psp_ring *ring; 333 struct psp_ring *ring;
@@ -341,7 +350,8 @@ int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
341 return ret; 350 return ret;
342} 351}
343 352
344int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) 353static int psp_v3_1_ring_destroy(struct psp_context *psp,
354 enum psp_ring_type ring_type)
345{ 355{
346 int ret = 0; 356 int ret = 0;
347 struct psp_ring *ring = &psp->km_ring; 357 struct psp_ring *ring = &psp->km_ring;
@@ -358,10 +368,10 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
358 return ret; 368 return ret;
359} 369}
360 370
361int psp_v3_1_cmd_submit(struct psp_context *psp, 371static int psp_v3_1_cmd_submit(struct psp_context *psp,
362 struct amdgpu_firmware_info *ucode, 372 struct amdgpu_firmware_info *ucode,
363 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, 373 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
364 int index) 374 int index)
365{ 375{
366 unsigned int psp_write_ptr_reg = 0; 376 unsigned int psp_write_ptr_reg = 0;
367 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; 377 struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
@@ -410,9 +420,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
410 420
411static int 421static int
412psp_v3_1_sram_map(struct amdgpu_device *adev, 422psp_v3_1_sram_map(struct amdgpu_device *adev,
413 unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, 423 unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
414 unsigned int *sram_data_reg_offset, 424 unsigned int *sram_data_reg_offset,
415 enum AMDGPU_UCODE_ID ucode_id) 425 enum AMDGPU_UCODE_ID ucode_id)
416{ 426{
417 int ret = 0; 427 int ret = 0;
418 428
@@ -495,9 +505,9 @@ psp_v3_1_sram_map(struct amdgpu_device *adev,
495 return ret; 505 return ret;
496} 506}
497 507
498bool psp_v3_1_compare_sram_data(struct psp_context *psp, 508static bool psp_v3_1_compare_sram_data(struct psp_context *psp,
499 struct amdgpu_firmware_info *ucode, 509 struct amdgpu_firmware_info *ucode,
500 enum AMDGPU_UCODE_ID ucode_type) 510 enum AMDGPU_UCODE_ID ucode_type)
501{ 511{
502 int err = 0; 512 int err = 0;
503 unsigned int fw_sram_reg_val = 0; 513 unsigned int fw_sram_reg_val = 0;
@@ -530,7 +540,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
530 return true; 540 return true;
531} 541}
532 542
533bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) 543static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
534{ 544{
535 struct amdgpu_device *adev = psp->adev; 545 struct amdgpu_device *adev = psp->adev;
536 uint32_t reg; 546 uint32_t reg;
@@ -541,7 +551,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
541 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; 551 return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
542} 552}
543 553
544int psp_v3_1_mode1_reset(struct psp_context *psp) 554static int psp_v3_1_mode1_reset(struct psp_context *psp)
545{ 555{
546 int ret; 556 int ret;
547 uint32_t offset; 557 uint32_t offset;
@@ -574,3 +584,23 @@ int psp_v3_1_mode1_reset(struct psp_context *psp)
574 584
575 return 0; 585 return 0;
576} 586}
587
588static const struct psp_funcs psp_v3_1_funcs = {
589 .init_microcode = psp_v3_1_init_microcode,
590 .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
591 .bootloader_load_sos = psp_v3_1_bootloader_load_sos,
592 .prep_cmd_buf = psp_v3_1_prep_cmd_buf,
593 .ring_init = psp_v3_1_ring_init,
594 .ring_create = psp_v3_1_ring_create,
595 .ring_stop = psp_v3_1_ring_stop,
596 .ring_destroy = psp_v3_1_ring_destroy,
597 .cmd_submit = psp_v3_1_cmd_submit,
598 .compare_sram_data = psp_v3_1_compare_sram_data,
599 .smu_reload_quirk = psp_v3_1_smu_reload_quirk,
600 .mode1_reset = psp_v3_1_mode1_reset,
601};
602
603void psp_v3_1_set_psp_funcs(struct psp_context *psp)
604{
605 psp->funcs = &psp_v3_1_funcs;
606}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index b05dbada7751..e411e31ba452 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -32,26 +32,6 @@ enum { PSP_BINARY_ALIGNMENT = 64 };
32enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 }; 32enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 };
33enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 }; 33enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 };
34 34
35extern int psp_v3_1_init_microcode(struct psp_context *psp); 35void psp_v3_1_set_psp_funcs(struct psp_context *psp);
36extern int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp); 36
37extern int psp_v3_1_bootloader_load_sos(struct psp_context *psp);
38extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
39 struct psp_gfx_cmd_resp *cmd);
40extern int psp_v3_1_ring_init(struct psp_context *psp,
41 enum psp_ring_type ring_type);
42extern int psp_v3_1_ring_create(struct psp_context *psp,
43 enum psp_ring_type ring_type);
44extern int psp_v3_1_ring_stop(struct psp_context *psp,
45 enum psp_ring_type ring_type);
46extern int psp_v3_1_ring_destroy(struct psp_context *psp,
47 enum psp_ring_type ring_type);
48extern int psp_v3_1_cmd_submit(struct psp_context *psp,
49 struct amdgpu_firmware_info *ucode,
50 uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
51 int index);
52extern bool psp_v3_1_compare_sram_data(struct psp_context *psp,
53 struct amdgpu_firmware_info *ucode,
54 enum AMDGPU_UCODE_ID ucode_type);
55extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp);
56extern int psp_v3_1_mode1_reset(struct psp_context *psp);
57#endif 37#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index d4787ad4d346..c7190c39c4f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -289,13 +289,6 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
289 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 289 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
290} 290}
291 291
292static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
293{
294 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
295 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
296 amdgpu_ring_write(ring, mmHDP_DEBUG0);
297 amdgpu_ring_write(ring, 1);
298}
299/** 292/**
300 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring 293 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
301 * 294 *
@@ -346,7 +339,7 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
346 339
347 if ((adev->mman.buffer_funcs_ring == sdma0) || 340 if ((adev->mman.buffer_funcs_ring == sdma0) ||
348 (adev->mman.buffer_funcs_ring == sdma1)) 341 (adev->mman.buffer_funcs_ring == sdma1))
349 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 342 amdgpu_ttm_set_buffer_funcs_status(adev, false);
350 343
351 for (i = 0; i < adev->sdma.num_instances; i++) { 344 for (i = 0; i < adev->sdma.num_instances; i++) {
352 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 345 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -491,7 +484,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
491 } 484 }
492 485
493 if (adev->mman.buffer_funcs_ring == ring) 486 if (adev->mman.buffer_funcs_ring == ring)
494 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 487 amdgpu_ttm_set_buffer_funcs_status(adev, true);
495 } 488 }
496 489
497 return 0; 490 return 0;
@@ -844,7 +837,7 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
844 amdgpu_ring_write(ring, addr & 0xfffffffc); 837 amdgpu_ring_write(ring, addr & 0xfffffffc);
845 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 838 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
846 amdgpu_ring_write(ring, seq); /* reference */ 839 amdgpu_ring_write(ring, seq); /* reference */
847 amdgpu_ring_write(ring, 0xfffffff); /* mask */ 840 amdgpu_ring_write(ring, 0xffffffff); /* mask */
848 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 841 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
849 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 842 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
850} 843}
@@ -861,20 +854,7 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
861static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, 854static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
862 unsigned vmid, uint64_t pd_addr) 855 unsigned vmid, uint64_t pd_addr)
863{ 856{
864 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 857 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
865 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
866 if (vmid < 8) {
867 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
868 } else {
869 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
870 }
871 amdgpu_ring_write(ring, pd_addr >> 12);
872
873 /* flush TLB */
874 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
875 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
876 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
877 amdgpu_ring_write(ring, 1 << vmid);
878 858
879 /* wait for flush */ 859 /* wait for flush */
880 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 860 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
@@ -888,6 +868,15 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
888 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 868 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
889} 869}
890 870
871static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
872 uint32_t reg, uint32_t val)
873{
874 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
875 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
876 amdgpu_ring_write(ring, reg);
877 amdgpu_ring_write(ring, val);
878}
879
891static int sdma_v2_4_early_init(void *handle) 880static int sdma_v2_4_early_init(void *handle)
892{ 881{
893 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 882 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1203,9 +1192,9 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1203 .set_wptr = sdma_v2_4_ring_set_wptr, 1192 .set_wptr = sdma_v2_4_ring_set_wptr,
1204 .emit_frame_size = 1193 .emit_frame_size =
1205 6 + /* sdma_v2_4_ring_emit_hdp_flush */ 1194 6 + /* sdma_v2_4_ring_emit_hdp_flush */
1206 3 + /* sdma_v2_4_ring_emit_hdp_invalidate */ 1195 3 + /* hdp invalidate */
1207 6 + /* sdma_v2_4_ring_emit_pipeline_sync */ 1196 6 + /* sdma_v2_4_ring_emit_pipeline_sync */
1208 12 + /* sdma_v2_4_ring_emit_vm_flush */ 1197 VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
1209 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */ 1198 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
1210 .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */ 1199 .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
1211 .emit_ib = sdma_v2_4_ring_emit_ib, 1200 .emit_ib = sdma_v2_4_ring_emit_ib,
@@ -1213,11 +1202,11 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1213 .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync, 1202 .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
1214 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, 1203 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
1215 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, 1204 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
1216 .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
1217 .test_ring = sdma_v2_4_ring_test_ring, 1205 .test_ring = sdma_v2_4_ring_test_ring,
1218 .test_ib = sdma_v2_4_ring_test_ib, 1206 .test_ib = sdma_v2_4_ring_test_ib,
1219 .insert_nop = sdma_v2_4_ring_insert_nop, 1207 .insert_nop = sdma_v2_4_ring_insert_nop,
1220 .pad_ib = sdma_v2_4_ring_pad_ib, 1208 .pad_ib = sdma_v2_4_ring_pad_ib,
1209 .emit_wreg = sdma_v2_4_ring_emit_wreg,
1221}; 1210};
1222 1211
1223static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1212static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1316,9 +1305,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
1316 .copy_pte = sdma_v2_4_vm_copy_pte, 1305 .copy_pte = sdma_v2_4_vm_copy_pte,
1317 1306
1318 .write_pte = sdma_v2_4_vm_write_pte, 1307 .write_pte = sdma_v2_4_vm_write_pte,
1319
1320 .set_max_nums_pte_pde = 0x1fffff >> 3,
1321 .set_pte_pde_num_dw = 10,
1322 .set_pte_pde = sdma_v2_4_vm_set_pte_pde, 1308 .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
1323}; 1309};
1324 1310
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 521978c40537..be20a387d961 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -460,14 +460,6 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
460 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 460 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
461} 461}
462 462
463static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
464{
465 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
466 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
467 amdgpu_ring_write(ring, mmHDP_DEBUG0);
468 amdgpu_ring_write(ring, 1);
469}
470
471/** 463/**
472 * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring 464 * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
473 * 465 *
@@ -518,7 +510,7 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
518 510
519 if ((adev->mman.buffer_funcs_ring == sdma0) || 511 if ((adev->mman.buffer_funcs_ring == sdma0) ||
520 (adev->mman.buffer_funcs_ring == sdma1)) 512 (adev->mman.buffer_funcs_ring == sdma1))
521 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 513 amdgpu_ttm_set_buffer_funcs_status(adev, false);
522 514
523 for (i = 0; i < adev->sdma.num_instances; i++) { 515 for (i = 0; i < adev->sdma.num_instances; i++) {
524 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 516 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -719,14 +711,17 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
719 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], 711 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
720 upper_32_bits(wptr_gpu_addr)); 712 upper_32_bits(wptr_gpu_addr));
721 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); 713 wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
722 if (ring->use_pollmem) 714 if (ring->use_pollmem) {
715 /*wptr polling is not enogh fast, directly clean the wptr register */
716 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
723 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 717 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
724 SDMA0_GFX_RB_WPTR_POLL_CNTL, 718 SDMA0_GFX_RB_WPTR_POLL_CNTL,
725 ENABLE, 1); 719 ENABLE, 1);
726 else 720 } else {
727 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, 721 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
728 SDMA0_GFX_RB_WPTR_POLL_CNTL, 722 SDMA0_GFX_RB_WPTR_POLL_CNTL,
729 ENABLE, 0); 723 ENABLE, 0);
724 }
730 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); 725 WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl);
731 726
732 /* enable DMA RB */ 727 /* enable DMA RB */
@@ -758,7 +753,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
758 } 753 }
759 754
760 if (adev->mman.buffer_funcs_ring == ring) 755 if (adev->mman.buffer_funcs_ring == ring)
761 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 756 amdgpu_ttm_set_buffer_funcs_status(adev, true);
762 } 757 }
763 758
764 return 0; 759 return 0;
@@ -1110,7 +1105,7 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1110 amdgpu_ring_write(ring, addr & 0xfffffffc); 1105 amdgpu_ring_write(ring, addr & 0xfffffffc);
1111 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1106 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1112 amdgpu_ring_write(ring, seq); /* reference */ 1107 amdgpu_ring_write(ring, seq); /* reference */
1113 amdgpu_ring_write(ring, 0xfffffff); /* mask */ 1108 amdgpu_ring_write(ring, 0xffffffff); /* mask */
1114 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1109 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1115 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1110 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1116} 1111}
@@ -1127,20 +1122,7 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1127static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1122static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1128 unsigned vmid, uint64_t pd_addr) 1123 unsigned vmid, uint64_t pd_addr)
1129{ 1124{
1130 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1125 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1131 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1132 if (vmid < 8) {
1133 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
1134 } else {
1135 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
1136 }
1137 amdgpu_ring_write(ring, pd_addr >> 12);
1138
1139 /* flush TLB */
1140 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1141 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1142 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
1143 amdgpu_ring_write(ring, 1 << vmid);
1144 1126
1145 /* wait for flush */ 1127 /* wait for flush */
1146 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1128 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
@@ -1154,6 +1136,15 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1154 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 1136 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
1155} 1137}
1156 1138
1139static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
1140 uint32_t reg, uint32_t val)
1141{
1142 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1143 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1144 amdgpu_ring_write(ring, reg);
1145 amdgpu_ring_write(ring, val);
1146}
1147
1157static int sdma_v3_0_early_init(void *handle) 1148static int sdma_v3_0_early_init(void *handle)
1158{ 1149{
1159 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1150 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1637,9 +1628,9 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1637 .set_wptr = sdma_v3_0_ring_set_wptr, 1628 .set_wptr = sdma_v3_0_ring_set_wptr,
1638 .emit_frame_size = 1629 .emit_frame_size =
1639 6 + /* sdma_v3_0_ring_emit_hdp_flush */ 1630 6 + /* sdma_v3_0_ring_emit_hdp_flush */
1640 3 + /* sdma_v3_0_ring_emit_hdp_invalidate */ 1631 3 + /* hdp invalidate */
1641 6 + /* sdma_v3_0_ring_emit_pipeline_sync */ 1632 6 + /* sdma_v3_0_ring_emit_pipeline_sync */
1642 12 + /* sdma_v3_0_ring_emit_vm_flush */ 1633 VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v3_0_ring_emit_vm_flush */
1643 10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */ 1634 10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
1644 .emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */ 1635 .emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */
1645 .emit_ib = sdma_v3_0_ring_emit_ib, 1636 .emit_ib = sdma_v3_0_ring_emit_ib,
@@ -1647,11 +1638,11 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1647 .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync, 1638 .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
1648 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, 1639 .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
1649 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1640 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
1650 .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
1651 .test_ring = sdma_v3_0_ring_test_ring, 1641 .test_ring = sdma_v3_0_ring_test_ring,
1652 .test_ib = sdma_v3_0_ring_test_ib, 1642 .test_ib = sdma_v3_0_ring_test_ib,
1653 .insert_nop = sdma_v3_0_ring_insert_nop, 1643 .insert_nop = sdma_v3_0_ring_insert_nop,
1654 .pad_ib = sdma_v3_0_ring_pad_ib, 1644 .pad_ib = sdma_v3_0_ring_pad_ib,
1645 .emit_wreg = sdma_v3_0_ring_emit_wreg,
1655}; 1646};
1656 1647
1657static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1648static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1750,10 +1741,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
1750 .copy_pte = sdma_v3_0_vm_copy_pte, 1741 .copy_pte = sdma_v3_0_vm_copy_pte,
1751 1742
1752 .write_pte = sdma_v3_0_vm_write_pte, 1743 .write_pte = sdma_v3_0_vm_write_pte,
1753
1754 /* not 0x3fffff due to HW limitation */
1755 .set_max_nums_pte_pde = 0x3fffe0 >> 3,
1756 .set_pte_pde_num_dw = 10,
1757 .set_pte_pde = sdma_v3_0_vm_set_pte_pde, 1744 .set_pte_pde = sdma_v3_0_vm_set_pte_pde,
1758}; 1745};
1759 1746
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 91cf95a8c39c..399f876f9cad 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -31,8 +31,6 @@
31#include "sdma0/sdma0_4_0_sh_mask.h" 31#include "sdma0/sdma0_4_0_sh_mask.h"
32#include "sdma1/sdma1_4_0_offset.h" 32#include "sdma1/sdma1_4_0_offset.h"
33#include "sdma1/sdma1_4_0_sh_mask.h" 33#include "sdma1/sdma1_4_0_sh_mask.h"
34#include "mmhub/mmhub_1_0_offset.h"
35#include "mmhub/mmhub_1_0_sh_mask.h"
36#include "hdp/hdp_4_0_offset.h" 34#include "hdp/hdp_4_0_offset.h"
37#include "sdma0/sdma0_4_1_default.h" 35#include "sdma0/sdma0_4_1_default.h"
38 36
@@ -42,6 +40,8 @@
42 40
43MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); 41MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
44MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); 42MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
43MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
44MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
45MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); 45MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
46 46
47#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L 47#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@@ -86,6 +86,13 @@ static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
86 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002) 86 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
87}; 87};
88 88
89static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
90 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
91 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
92 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
93 SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
94};
95
89static const struct soc15_reg_golden golden_settings_sdma_4_1[] = 96static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
90{ 97{
91 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), 98 SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -124,6 +131,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
124 golden_settings_sdma_vg10, 131 golden_settings_sdma_vg10,
125 ARRAY_SIZE(golden_settings_sdma_vg10)); 132 ARRAY_SIZE(golden_settings_sdma_vg10));
126 break; 133 break;
134 case CHIP_VEGA12:
135 soc15_program_register_sequence(adev,
136 golden_settings_sdma_4,
137 ARRAY_SIZE(golden_settings_sdma_4));
138 soc15_program_register_sequence(adev,
139 golden_settings_sdma_vg12,
140 ARRAY_SIZE(golden_settings_sdma_vg12));
141 break;
127 case CHIP_RAVEN: 142 case CHIP_RAVEN:
128 soc15_program_register_sequence(adev, 143 soc15_program_register_sequence(adev,
129 golden_settings_sdma_4_1, 144 golden_settings_sdma_4_1,
@@ -164,6 +179,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
164 case CHIP_VEGA10: 179 case CHIP_VEGA10:
165 chip_name = "vega10"; 180 chip_name = "vega10";
166 break; 181 break;
182 case CHIP_VEGA12:
183 chip_name = "vega12";
184 break;
167 case CHIP_RAVEN: 185 case CHIP_RAVEN:
168 chip_name = "raven"; 186 chip_name = "raven";
169 break; 187 break;
@@ -371,16 +389,6 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
371 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ 389 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
372} 390}
373 391
374static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
375{
376 struct amdgpu_device *adev = ring->adev;
377
378 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
379 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
380 amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE));
381 amdgpu_ring_write(ring, 1);
382}
383
384/** 392/**
385 * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring 393 * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
386 * 394 *
@@ -436,7 +444,7 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
436 444
437 if ((adev->mman.buffer_funcs_ring == sdma0) || 445 if ((adev->mman.buffer_funcs_ring == sdma0) ||
438 (adev->mman.buffer_funcs_ring == sdma1)) 446 (adev->mman.buffer_funcs_ring == sdma1))
439 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 447 amdgpu_ttm_set_buffer_funcs_status(adev, false);
440 448
441 for (i = 0; i < adev->sdma.num_instances; i++) { 449 for (i = 0; i < adev->sdma.num_instances; i++) {
442 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); 450 rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
@@ -678,7 +686,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
678 } 686 }
679 687
680 if (adev->mman.buffer_funcs_ring == ring) 688 if (adev->mman.buffer_funcs_ring == ring)
681 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 689 amdgpu_ttm_set_buffer_funcs_status(adev, true);
682 690
683 } 691 }
684 692
@@ -1113,7 +1121,7 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1113 amdgpu_ring_write(ring, addr & 0xfffffffc); 1121 amdgpu_ring_write(ring, addr & 0xfffffffc);
1114 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 1122 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1115 amdgpu_ring_write(ring, seq); /* reference */ 1123 amdgpu_ring_write(ring, seq); /* reference */
1116 amdgpu_ring_write(ring, 0xfffffff); /* mask */ 1124 amdgpu_ring_write(ring, 0xffffffff); /* mask */
1117 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1125 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1118 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 1126 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1119} 1127}
@@ -1131,38 +1139,28 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1131static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1139static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1132 unsigned vmid, uint64_t pd_addr) 1140 unsigned vmid, uint64_t pd_addr)
1133{ 1141{
1134 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1142 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1135 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); 1143}
1136 uint64_t flags = AMDGPU_PTE_VALID;
1137 unsigned eng = ring->vm_inv_eng;
1138
1139 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
1140 pd_addr |= flags;
1141
1142 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1143 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1144 amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2);
1145 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1146
1147 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1148 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1149 amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2);
1150 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
1151 1144
1152 /* flush TLB */ 1145static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
1146 uint32_t reg, uint32_t val)
1147{
1153 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | 1148 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1154 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); 1149 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1155 amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); 1150 amdgpu_ring_write(ring, reg);
1156 amdgpu_ring_write(ring, req); 1151 amdgpu_ring_write(ring, val);
1152}
1157 1153
1158 /* wait for flush */ 1154static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1155 uint32_t val, uint32_t mask)
1156{
1159 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | 1157 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1160 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | 1158 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1161 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1159 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1162 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); 1160 amdgpu_ring_write(ring, reg << 2);
1163 amdgpu_ring_write(ring, 0); 1161 amdgpu_ring_write(ring, 0);
1164 amdgpu_ring_write(ring, 1 << vmid); /* reference */ 1162 amdgpu_ring_write(ring, val); /* reference */
1165 amdgpu_ring_write(ring, 1 << vmid); /* mask */ 1163 amdgpu_ring_write(ring, mask); /* mask */
1166 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 1164 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1167 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 1165 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1168} 1166}
@@ -1192,13 +1190,13 @@ static int sdma_v4_0_sw_init(void *handle)
1192 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1190 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1193 1191
1194 /* SDMA trap event */ 1192 /* SDMA trap event */
1195 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224, 1193 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224,
1196 &adev->sdma.trap_irq); 1194 &adev->sdma.trap_irq);
1197 if (r) 1195 if (r)
1198 return r; 1196 return r;
1199 1197
1200 /* SDMA trap event */ 1198 /* SDMA trap event */
1201 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224, 1199 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224,
1202 &adev->sdma.trap_irq); 1200 &adev->sdma.trap_irq);
1203 if (r) 1201 if (r)
1204 return r; 1202 return r;
@@ -1353,7 +1351,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
1353{ 1351{
1354 DRM_DEBUG("IH: SDMA trap\n"); 1352 DRM_DEBUG("IH: SDMA trap\n");
1355 switch (entry->client_id) { 1353 switch (entry->client_id) {
1356 case AMDGPU_IH_CLIENTID_SDMA0: 1354 case SOC15_IH_CLIENTID_SDMA0:
1357 switch (entry->ring_id) { 1355 switch (entry->ring_id) {
1358 case 0: 1356 case 0:
1359 amdgpu_fence_process(&adev->sdma.instance[0].ring); 1357 amdgpu_fence_process(&adev->sdma.instance[0].ring);
@@ -1369,7 +1367,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
1369 break; 1367 break;
1370 } 1368 }
1371 break; 1369 break;
1372 case AMDGPU_IH_CLIENTID_SDMA1: 1370 case SOC15_IH_CLIENTID_SDMA1:
1373 switch (entry->ring_id) { 1371 switch (entry->ring_id) {
1374 case 0: 1372 case 0:
1375 amdgpu_fence_process(&adev->sdma.instance[1].ring); 1373 amdgpu_fence_process(&adev->sdma.instance[1].ring);
@@ -1419,7 +1417,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
1419 if (def != data) 1417 if (def != data)
1420 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); 1418 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1421 1419
1422 if (adev->asic_type == CHIP_VEGA10) { 1420 if (adev->sdma.num_instances > 1) {
1423 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); 1421 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1424 data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1422 data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1425 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1423 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
@@ -1447,7 +1445,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
1447 if (def != data) 1445 if (def != data)
1448 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); 1446 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
1449 1447
1450 if (adev->asic_type == CHIP_VEGA10) { 1448 if (adev->sdma.num_instances > 1) {
1451 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); 1449 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
1452 data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | 1450 data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1453 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | 1451 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
@@ -1478,7 +1476,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
1478 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1476 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1479 1477
1480 /* 1-not override: enable sdma1 mem light sleep */ 1478 /* 1-not override: enable sdma1 mem light sleep */
1481 if (adev->asic_type == CHIP_VEGA10) { 1479 if (adev->sdma.num_instances > 1) {
1482 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1480 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1483 data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1481 data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1484 if (def != data) 1482 if (def != data)
@@ -1492,7 +1490,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
1492 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); 1490 WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
1493 1491
1494 /* 0-override:disable sdma1 mem light sleep */ 1492 /* 0-override:disable sdma1 mem light sleep */
1495 if (adev->asic_type == CHIP_VEGA10) { 1493 if (adev->sdma.num_instances > 1) {
1496 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); 1494 def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
1497 data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; 1495 data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1498 if (def != data) 1496 if (def != data)
@@ -1511,6 +1509,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
1511 1509
1512 switch (adev->asic_type) { 1510 switch (adev->asic_type) {
1513 case CHIP_VEGA10: 1511 case CHIP_VEGA10:
1512 case CHIP_VEGA12:
1514 case CHIP_RAVEN: 1513 case CHIP_RAVEN:
1515 sdma_v4_0_update_medium_grain_clock_gating(adev, 1514 sdma_v4_0_update_medium_grain_clock_gating(adev,
1516 state == AMD_CG_STATE_GATE ? true : false); 1515 state == AMD_CG_STATE_GATE ? true : false);
@@ -1588,9 +1587,11 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
1588 .set_wptr = sdma_v4_0_ring_set_wptr, 1587 .set_wptr = sdma_v4_0_ring_set_wptr,
1589 .emit_frame_size = 1588 .emit_frame_size =
1590 6 + /* sdma_v4_0_ring_emit_hdp_flush */ 1589 6 + /* sdma_v4_0_ring_emit_hdp_flush */
1591 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ 1590 3 + /* hdp invalidate */
1592 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ 1591 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
1593 18 + /* sdma_v4_0_ring_emit_vm_flush */ 1592 /* sdma_v4_0_ring_emit_vm_flush */
1593 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1594 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1594 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ 1595 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
1595 .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ 1596 .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
1596 .emit_ib = sdma_v4_0_ring_emit_ib, 1597 .emit_ib = sdma_v4_0_ring_emit_ib,
@@ -1598,11 +1599,12 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
1598 .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, 1599 .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
1599 .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, 1600 .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
1600 .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, 1601 .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
1601 .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate,
1602 .test_ring = sdma_v4_0_ring_test_ring, 1602 .test_ring = sdma_v4_0_ring_test_ring,
1603 .test_ib = sdma_v4_0_ring_test_ib, 1603 .test_ib = sdma_v4_0_ring_test_ib,
1604 .insert_nop = sdma_v4_0_ring_insert_nop, 1604 .insert_nop = sdma_v4_0_ring_insert_nop,
1605 .pad_ib = sdma_v4_0_ring_pad_ib, 1605 .pad_ib = sdma_v4_0_ring_pad_ib,
1606 .emit_wreg = sdma_v4_0_ring_emit_wreg,
1607 .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
1606}; 1608};
1607 1609
1608static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1610static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1637,7 +1639,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1637 * @dst_offset: dst GPU address 1639 * @dst_offset: dst GPU address
1638 * @byte_count: number of bytes to xfer 1640 * @byte_count: number of bytes to xfer
1639 * 1641 *
1640 * Copy GPU buffers using the DMA engine (VEGA10). 1642 * Copy GPU buffers using the DMA engine (VEGA10/12).
1641 * Used by the amdgpu ttm implementation to move pages if 1643 * Used by the amdgpu ttm implementation to move pages if
1642 * registered as the asic copy callback. 1644 * registered as the asic copy callback.
1643 */ 1645 */
@@ -1664,7 +1666,7 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
1664 * @dst_offset: dst GPU address 1666 * @dst_offset: dst GPU address
1665 * @byte_count: number of bytes to xfer 1667 * @byte_count: number of bytes to xfer
1666 * 1668 *
1667 * Fill GPU buffers using the DMA engine (VEGA10). 1669 * Fill GPU buffers using the DMA engine (VEGA10/12).
1668 */ 1670 */
1669static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib, 1671static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
1670 uint32_t src_data, 1672 uint32_t src_data,
@@ -1701,9 +1703,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
1701 .copy_pte = sdma_v4_0_vm_copy_pte, 1703 .copy_pte = sdma_v4_0_vm_copy_pte,
1702 1704
1703 .write_pte = sdma_v4_0_vm_write_pte, 1705 .write_pte = sdma_v4_0_vm_write_pte,
1704
1705 .set_max_nums_pte_pde = 0x400000 >> 3,
1706 .set_pte_pde_num_dw = 10,
1707 .set_pte_pde = sdma_v4_0_vm_set_pte_pde, 1706 .set_pte_pde = sdma_v4_0_vm_set_pte_pde,
1708}; 1707};
1709 1708
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 543101d5a5ed..a675ec6d2811 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -31,7 +31,8 @@
31#include "amdgpu_uvd.h" 31#include "amdgpu_uvd.h"
32#include "amdgpu_vce.h" 32#include "amdgpu_vce.h"
33#include "atom.h" 33#include "atom.h"
34#include "amdgpu_powerplay.h" 34#include "amd_pcie.h"
35#include "si_dpm.h"
35#include "sid.h" 36#include "sid.h"
36#include "si_ih.h" 37#include "si_ih.h"
37#include "gfx_v6_0.h" 38#include "gfx_v6_0.h"
@@ -1230,6 +1231,92 @@ static void si_detect_hw_virtualization(struct amdgpu_device *adev)
1230 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; 1231 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
1231} 1232}
1232 1233
1234static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
1235{
1236 if (!ring || !ring->funcs->emit_wreg) {
1237 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
1238 RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
1239 } else {
1240 amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
1241 }
1242}
1243
1244static void si_invalidate_hdp(struct amdgpu_device *adev,
1245 struct amdgpu_ring *ring)
1246{
1247 if (!ring || !ring->funcs->emit_wreg) {
1248 WREG32(mmHDP_DEBUG0, 1);
1249 RREG32(mmHDP_DEBUG0);
1250 } else {
1251 amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
1252 }
1253}
1254
1255static int si_get_pcie_lanes(struct amdgpu_device *adev)
1256{
1257 u32 link_width_cntl;
1258
1259 if (adev->flags & AMD_IS_APU)
1260 return 0;
1261
1262 link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
1263
1264 switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
1265 case LC_LINK_WIDTH_X1:
1266 return 1;
1267 case LC_LINK_WIDTH_X2:
1268 return 2;
1269 case LC_LINK_WIDTH_X4:
1270 return 4;
1271 case LC_LINK_WIDTH_X8:
1272 return 8;
1273 case LC_LINK_WIDTH_X0:
1274 case LC_LINK_WIDTH_X16:
1275 default:
1276 return 16;
1277 }
1278}
1279
1280static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
1281{
1282 u32 link_width_cntl, mask;
1283
1284 if (adev->flags & AMD_IS_APU)
1285 return;
1286
1287 switch (lanes) {
1288 case 0:
1289 mask = LC_LINK_WIDTH_X0;
1290 break;
1291 case 1:
1292 mask = LC_LINK_WIDTH_X1;
1293 break;
1294 case 2:
1295 mask = LC_LINK_WIDTH_X2;
1296 break;
1297 case 4:
1298 mask = LC_LINK_WIDTH_X4;
1299 break;
1300 case 8:
1301 mask = LC_LINK_WIDTH_X8;
1302 break;
1303 case 16:
1304 mask = LC_LINK_WIDTH_X16;
1305 break;
1306 default:
1307 DRM_ERROR("invalid pcie lane request: %d\n", lanes);
1308 return;
1309 }
1310
1311 link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
1312 link_width_cntl &= ~LC_LINK_WIDTH_MASK;
1313 link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
1314 link_width_cntl |= (LC_RECONFIG_NOW |
1315 LC_RECONFIG_ARC_MISSING_ESCAPE);
1316
1317 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
1318}
1319
1233static const struct amdgpu_asic_funcs si_asic_funcs = 1320static const struct amdgpu_asic_funcs si_asic_funcs =
1234{ 1321{
1235 .read_disabled_bios = &si_read_disabled_bios, 1322 .read_disabled_bios = &si_read_disabled_bios,
@@ -1240,7 +1327,11 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
1240 .get_xclk = &si_get_xclk, 1327 .get_xclk = &si_get_xclk,
1241 .set_uvd_clocks = &si_set_uvd_clocks, 1328 .set_uvd_clocks = &si_set_uvd_clocks,
1242 .set_vce_clocks = NULL, 1329 .set_vce_clocks = NULL,
1330 .get_pcie_lanes = &si_get_pcie_lanes,
1331 .set_pcie_lanes = &si_set_pcie_lanes,
1243 .get_config_memsize = &si_get_config_memsize, 1332 .get_config_memsize = &si_get_config_memsize,
1333 .flush_hdp = &si_flush_hdp,
1334 .invalidate_hdp = &si_invalidate_hdp,
1244}; 1335};
1245 1336
1246static uint32_t si_get_rev_id(struct amdgpu_device *adev) 1337static uint32_t si_get_rev_id(struct amdgpu_device *adev)
@@ -1461,8 +1552,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
1461{ 1552{
1462 struct pci_dev *root = adev->pdev->bus->self; 1553 struct pci_dev *root = adev->pdev->bus->self;
1463 int bridge_pos, gpu_pos; 1554 int bridge_pos, gpu_pos;
1464 u32 speed_cntl, mask, current_data_rate; 1555 u32 speed_cntl, current_data_rate;
1465 int ret, i; 1556 int i;
1466 u16 tmp16; 1557 u16 tmp16;
1467 1558
1468 if (pci_is_root_bus(adev->pdev->bus)) 1559 if (pci_is_root_bus(adev->pdev->bus))
@@ -1474,23 +1565,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
1474 if (adev->flags & AMD_IS_APU) 1565 if (adev->flags & AMD_IS_APU)
1475 return; 1566 return;
1476 1567
1477 ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); 1568 if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
1478 if (ret != 0) 1569 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
1479 return;
1480
1481 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
1482 return; 1570 return;
1483 1571
1484 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); 1572 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
1485 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> 1573 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
1486 LC_CURRENT_DATA_RATE_SHIFT; 1574 LC_CURRENT_DATA_RATE_SHIFT;
1487 if (mask & DRM_PCIE_SPEED_80) { 1575 if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
1488 if (current_data_rate == 2) { 1576 if (current_data_rate == 2) {
1489 DRM_INFO("PCIE gen 3 link speeds already enabled\n"); 1577 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
1490 return; 1578 return;
1491 } 1579 }
1492 DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); 1580 DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
1493 } else if (mask & DRM_PCIE_SPEED_50) { 1581 } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
1494 if (current_data_rate == 1) { 1582 if (current_data_rate == 1) {
1495 DRM_INFO("PCIE gen 2 link speeds already enabled\n"); 1583 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
1496 return; 1584 return;
@@ -1506,7 +1594,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
1506 if (!gpu_pos) 1594 if (!gpu_pos)
1507 return; 1595 return;
1508 1596
1509 if (mask & DRM_PCIE_SPEED_80) { 1597 if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
1510 if (current_data_rate != 2) { 1598 if (current_data_rate != 2) {
1511 u16 bridge_cfg, gpu_cfg; 1599 u16 bridge_cfg, gpu_cfg;
1512 u16 bridge_cfg2, gpu_cfg2; 1600 u16 bridge_cfg2, gpu_cfg2;
@@ -1589,9 +1677,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
1589 1677
1590 pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); 1678 pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
1591 tmp16 &= ~0xf; 1679 tmp16 &= ~0xf;
1592 if (mask & DRM_PCIE_SPEED_80) 1680 if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
1593 tmp16 |= 3; 1681 tmp16 |= 3;
1594 else if (mask & DRM_PCIE_SPEED_50) 1682 else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
1595 tmp16 |= 2; 1683 tmp16 |= 2;
1596 else 1684 else
1597 tmp16 |= 1; 1685 tmp16 |= 1;
@@ -1962,7 +2050,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
1962 amdgpu_device_ip_block_add(adev, &si_common_ip_block); 2050 amdgpu_device_ip_block_add(adev, &si_common_ip_block);
1963 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); 2051 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
1964 amdgpu_device_ip_block_add(adev, &si_ih_ip_block); 2052 amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
1965 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2053 amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
1966 if (adev->enable_virtual_display) 2054 if (adev->enable_virtual_display)
1967 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2055 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1968 else 2056 else
@@ -1976,7 +2064,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
1976 amdgpu_device_ip_block_add(adev, &si_common_ip_block); 2064 amdgpu_device_ip_block_add(adev, &si_common_ip_block);
1977 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); 2065 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
1978 amdgpu_device_ip_block_add(adev, &si_ih_ip_block); 2066 amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
1979 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2067 amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
1980 if (adev->enable_virtual_display) 2068 if (adev->enable_virtual_display)
1981 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2069 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1982 else 2070 else
@@ -1990,7 +2078,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
1990 amdgpu_device_ip_block_add(adev, &si_common_ip_block); 2078 amdgpu_device_ip_block_add(adev, &si_common_ip_block);
1991 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); 2079 amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
1992 amdgpu_device_ip_block_add(adev, &si_ih_ip_block); 2080 amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
1993 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 2081 amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
1994 if (adev->enable_virtual_display) 2082 if (adev->enable_virtual_display)
1995 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 2083 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1996 amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); 2084 amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.h b/drivers/gpu/drm/amd/amdgpu/si.h
index 589225080c24..06ed7212a0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.h
+++ b/drivers/gpu/drm/amd/amdgpu/si.h
@@ -24,6 +24,8 @@
24#ifndef __SI_H__ 24#ifndef __SI_H__
25#define __SI_H__ 25#define __SI_H__
26 26
27#define SI_FLUSH_GPU_TLB_NUM_WREG 2
28
27void si_srbm_select(struct amdgpu_device *adev, 29void si_srbm_select(struct amdgpu_device *adev,
28 u32 me, u32 pipe, u32 queue, u32 vmid); 30 u32 me, u32 pipe, u32 queue, u32 vmid);
29int si_set_ip_blocks(struct amdgpu_device *adev); 31int si_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 9a29c1399091..b75d901ba3c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -24,6 +24,7 @@
24#include <drm/drmP.h> 24#include <drm/drmP.h>
25#include "amdgpu.h" 25#include "amdgpu.h"
26#include "amdgpu_trace.h" 26#include "amdgpu_trace.h"
27#include "si.h"
27#include "sid.h" 28#include "sid.h"
28 29
29const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 30const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -74,20 +75,6 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
74 75
75} 76}
76 77
77static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
78{
79 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
80 amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL));
81 amdgpu_ring_write(ring, 1);
82}
83
84static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
85{
86 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
87 amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0));
88 amdgpu_ring_write(ring, 1);
89}
90
91/** 78/**
92 * si_dma_ring_emit_fence - emit a fence on the DMA ring 79 * si_dma_ring_emit_fence - emit a fence on the DMA ring
93 * 80 *
@@ -134,7 +121,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
134 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); 121 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
135 122
136 if (adev->mman.buffer_funcs_ring == ring) 123 if (adev->mman.buffer_funcs_ring == ring)
137 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 124 amdgpu_ttm_set_buffer_funcs_status(adev, false);
138 ring->ready = false; 125 ring->ready = false;
139 } 126 }
140} 127}
@@ -197,7 +184,7 @@ static int si_dma_start(struct amdgpu_device *adev)
197 } 184 }
198 185
199 if (adev->mman.buffer_funcs_ring == ring) 186 if (adev->mman.buffer_funcs_ring == ring)
200 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); 187 amdgpu_ttm_set_buffer_funcs_status(adev, true);
201 } 188 }
202 189
203 return 0; 190 return 0;
@@ -475,17 +462,7 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
475static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, 462static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
476 unsigned vmid, uint64_t pd_addr) 463 unsigned vmid, uint64_t pd_addr)
477{ 464{
478 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); 465 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
479 if (vmid < 8)
480 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
481 else
482 amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
483 amdgpu_ring_write(ring, pd_addr >> 12);
484
485 /* bits 0-7 are the VM contexts0-7 */
486 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
487 amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST));
488 amdgpu_ring_write(ring, 1 << vmid);
489 466
490 /* wait for invalidate to complete */ 467 /* wait for invalidate to complete */
491 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); 468 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
@@ -496,6 +473,14 @@ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
496 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ 473 amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
497} 474}
498 475
476static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
477 uint32_t reg, uint32_t val)
478{
479 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
480 amdgpu_ring_write(ring, (0xf << 16) | reg);
481 amdgpu_ring_write(ring, val);
482}
483
499static int si_dma_early_init(void *handle) 484static int si_dma_early_init(void *handle)
500{ 485{
501 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 486 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -772,22 +757,20 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
772 .get_wptr = si_dma_ring_get_wptr, 757 .get_wptr = si_dma_ring_get_wptr,
773 .set_wptr = si_dma_ring_set_wptr, 758 .set_wptr = si_dma_ring_set_wptr,
774 .emit_frame_size = 759 .emit_frame_size =
775 3 + /* si_dma_ring_emit_hdp_flush */ 760 3 + 3 + /* hdp flush / invalidate */
776 3 + /* si_dma_ring_emit_hdp_invalidate */
777 6 + /* si_dma_ring_emit_pipeline_sync */ 761 6 + /* si_dma_ring_emit_pipeline_sync */
778 12 + /* si_dma_ring_emit_vm_flush */ 762 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */
779 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */ 763 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
780 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */ 764 .emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */
781 .emit_ib = si_dma_ring_emit_ib, 765 .emit_ib = si_dma_ring_emit_ib,
782 .emit_fence = si_dma_ring_emit_fence, 766 .emit_fence = si_dma_ring_emit_fence,
783 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync, 767 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
784 .emit_vm_flush = si_dma_ring_emit_vm_flush, 768 .emit_vm_flush = si_dma_ring_emit_vm_flush,
785 .emit_hdp_flush = si_dma_ring_emit_hdp_flush,
786 .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate,
787 .test_ring = si_dma_ring_test_ring, 769 .test_ring = si_dma_ring_test_ring,
788 .test_ib = si_dma_ring_test_ib, 770 .test_ib = si_dma_ring_test_ib,
789 .insert_nop = amdgpu_ring_insert_nop, 771 .insert_nop = amdgpu_ring_insert_nop,
790 .pad_ib = si_dma_ring_pad_ib, 772 .pad_ib = si_dma_ring_pad_ib,
773 .emit_wreg = si_dma_ring_emit_wreg,
791}; 774};
792 775
793static void si_dma_set_ring_funcs(struct amdgpu_device *adev) 776static void si_dma_set_ring_funcs(struct amdgpu_device *adev)
@@ -891,9 +874,6 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
891 .copy_pte = si_dma_vm_copy_pte, 874 .copy_pte = si_dma_vm_copy_pte,
892 875
893 .write_pte = si_dma_vm_write_pte, 876 .write_pte = si_dma_vm_write_pte,
894
895 .set_max_nums_pte_pde = 0xffff8 >> 3,
896 .set_pte_pde_num_dw = 9,
897 .set_pte_pde = si_dma_vm_set_pte_pde, 877 .set_pte_pde = si_dma_vm_set_pte_pde,
898}; 878};
899 879
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index ce675a7f179a..797d505bf9ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -26,6 +26,7 @@
26#include "amdgpu_pm.h" 26#include "amdgpu_pm.h"
27#include "amdgpu_dpm.h" 27#include "amdgpu_dpm.h"
28#include "amdgpu_atombios.h" 28#include "amdgpu_atombios.h"
29#include "amd_pcie.h"
29#include "sid.h" 30#include "sid.h"
30#include "r600_dpm.h" 31#include "r600_dpm.h"
31#include "si_dpm.h" 32#include "si_dpm.h"
@@ -66,6 +67,8 @@ MODULE_FIRMWARE("radeon/hainan_smc.bin");
66MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); 67MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
67MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); 68MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
68 69
70static const struct amd_pm_funcs si_dpm_funcs;
71
69union power_info { 72union power_info {
70 struct _ATOM_POWERPLAY_INFO info; 73 struct _ATOM_POWERPLAY_INFO info;
71 struct _ATOM_POWERPLAY_INFO_V2 info_2; 74 struct _ATOM_POWERPLAY_INFO_V2 info_2;
@@ -3064,7 +3067,7 @@ static bool si_dpm_vblank_too_short(void *handle)
3064 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3067 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3065 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); 3068 u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
3066 /* we never hit the non-gddr5 limit so disable it */ 3069 /* we never hit the non-gddr5 limit so disable it */
3067 u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; 3070 u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0;
3068 3071
3069 if (vblank_time < switch_limit) 3072 if (vblank_time < switch_limit)
3070 return true; 3073 return true;
@@ -3331,29 +3334,6 @@ static void btc_apply_voltage_delta_rules(struct amdgpu_device *adev,
3331 } 3334 }
3332} 3335}
3333 3336
3334static enum amdgpu_pcie_gen r600_get_pcie_gen_support(struct amdgpu_device *adev,
3335 u32 sys_mask,
3336 enum amdgpu_pcie_gen asic_gen,
3337 enum amdgpu_pcie_gen default_gen)
3338{
3339 switch (asic_gen) {
3340 case AMDGPU_PCIE_GEN1:
3341 return AMDGPU_PCIE_GEN1;
3342 case AMDGPU_PCIE_GEN2:
3343 return AMDGPU_PCIE_GEN2;
3344 case AMDGPU_PCIE_GEN3:
3345 return AMDGPU_PCIE_GEN3;
3346 default:
3347 if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
3348 return AMDGPU_PCIE_GEN3;
3349 else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
3350 return AMDGPU_PCIE_GEN2;
3351 else
3352 return AMDGPU_PCIE_GEN1;
3353 }
3354 return AMDGPU_PCIE_GEN1;
3355}
3356
3357static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, 3337static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
3358 u32 *p, u32 *u) 3338 u32 *p, u32 *u)
3359{ 3339{
@@ -4350,7 +4330,7 @@ static u8 si_get_strobe_mode_settings(struct amdgpu_device *adev, u32 mclk)
4350 if (mclk <= pi->mclk_strobe_mode_threshold) 4330 if (mclk <= pi->mclk_strobe_mode_threshold)
4351 strobe_mode = true; 4331 strobe_mode = true;
4352 4332
4353 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) 4333 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
4354 result = si_get_mclk_frequency_ratio(mclk, strobe_mode); 4334 result = si_get_mclk_frequency_ratio(mclk, strobe_mode);
4355 else 4335 else
4356 result = si_get_ddr3_mclk_frequency_ratio(mclk); 4336 result = si_get_ddr3_mclk_frequency_ratio(mclk);
@@ -4937,7 +4917,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
4937 table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp); 4917 table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
4938 table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen; 4918 table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
4939 4919
4940 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { 4920 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
4941 table->initialState.levels[0].strobeMode = 4921 table->initialState.levels[0].strobeMode =
4942 si_get_strobe_mode_settings(adev, 4922 si_get_strobe_mode_settings(adev,
4943 initial_state->performance_levels[0].mclk); 4923 initial_state->performance_levels[0].mclk);
@@ -5028,10 +5008,11 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
5028 table->ACPIState.levels[0].vddc.index, 5008 table->ACPIState.levels[0].vddc.index,
5029 &table->ACPIState.levels[0].std_vddc); 5009 &table->ACPIState.levels[0].std_vddc);
5030 } 5010 }
5031 table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(adev, 5011 table->ACPIState.levels[0].gen2PCIE =
5032 si_pi->sys_pcie_mask, 5012 (u8)amdgpu_get_pcie_gen_support(adev,
5033 si_pi->boot_pcie_gen, 5013 si_pi->sys_pcie_mask,
5034 AMDGPU_PCIE_GEN1); 5014 si_pi->boot_pcie_gen,
5015 AMDGPU_PCIE_GEN1);
5035 5016
5036 if (si_pi->vddc_phase_shed_control) 5017 if (si_pi->vddc_phase_shed_control)
5037 si_populate_phase_shedding_value(adev, 5018 si_populate_phase_shedding_value(adev,
@@ -5208,7 +5189,7 @@ static int si_init_smc_table(struct amdgpu_device *adev)
5208 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) 5189 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
5209 table->systemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; 5190 table->systemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
5210 5191
5211 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) 5192 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
5212 table->systemFlags |= PPSMC_SYSTEMFLAG_GDDR5; 5193 table->systemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
5213 5194
5214 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY) 5195 if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY)
@@ -5385,7 +5366,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev,
5385 mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK; 5366 mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK;
5386 mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div); 5367 mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div);
5387 5368
5388 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { 5369 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
5389 mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK); 5370 mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK);
5390 mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) | 5371 mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) |
5391 YCLK_POST_DIV(mpll_param.post_div); 5372 YCLK_POST_DIV(mpll_param.post_div);
@@ -5397,7 +5378,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev,
5397 u32 tmp; 5378 u32 tmp;
5398 u32 reference_clock = adev->clock.mpll.reference_freq; 5379 u32 reference_clock = adev->clock.mpll.reference_freq;
5399 5380
5400 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) 5381 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
5401 freq_nom = memory_clock * 4; 5382 freq_nom = memory_clock * 4;
5402 else 5383 else
5403 freq_nom = memory_clock * 2; 5384 freq_nom = memory_clock * 2;
@@ -5489,7 +5470,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev,
5489 level->mcFlags |= SISLANDS_SMC_MC_PG_EN; 5470 level->mcFlags |= SISLANDS_SMC_MC_PG_EN;
5490 } 5471 }
5491 5472
5492 if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { 5473 if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
5493 if (pl->mclk > pi->mclk_edc_enable_threshold) 5474 if (pl->mclk > pi->mclk_edc_enable_threshold)
5494 level->mcFlags |= SISLANDS_SMC_MC_EDC_RD_FLAG; 5475 level->mcFlags |= SISLANDS_SMC_MC_EDC_RD_FLAG;
5495 5476
@@ -5860,12 +5841,12 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev,
5860 table->mc_reg_table_entry[k].mc_data[j] = 5841 table->mc_reg_table_entry[k].mc_data[j] =
5861 (temp_reg & 0xffff0000) | 5842 (temp_reg & 0xffff0000) |
5862 (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); 5843 (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
5863 if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) 5844 if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
5864 table->mc_reg_table_entry[k].mc_data[j] |= 0x100; 5845 table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
5865 } 5846 }
5866 j++; 5847 j++;
5867 5848
5868 if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { 5849 if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
5869 if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) 5850 if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
5870 return -EINVAL; 5851 return -EINVAL;
5871 table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD; 5852 table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD;
@@ -6391,9 +6372,9 @@ static void si_set_pcie_lane_width_in_smc(struct amdgpu_device *adev,
6391{ 6372{
6392 u32 lane_width; 6373 u32 lane_width;
6393 u32 new_lane_width = 6374 u32 new_lane_width =
6394 (amdgpu_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; 6375 ((amdgpu_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
6395 u32 current_lane_width = 6376 u32 current_lane_width =
6396 (amdgpu_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; 6377 ((amdgpu_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
6397 6378
6398 if (new_lane_width != current_lane_width) { 6379 if (new_lane_width != current_lane_width) {
6399 amdgpu_set_pcie_lanes(adev, new_lane_width); 6380 amdgpu_set_pcie_lanes(adev, new_lane_width);
@@ -7168,10 +7149,10 @@ static void si_parse_pplib_clock_info(struct amdgpu_device *adev,
7168 pl->vddc = le16_to_cpu(clock_info->si.usVDDC); 7149 pl->vddc = le16_to_cpu(clock_info->si.usVDDC);
7169 pl->vddci = le16_to_cpu(clock_info->si.usVDDCI); 7150 pl->vddci = le16_to_cpu(clock_info->si.usVDDCI);
7170 pl->flags = le32_to_cpu(clock_info->si.ulFlags); 7151 pl->flags = le32_to_cpu(clock_info->si.ulFlags);
7171 pl->pcie_gen = r600_get_pcie_gen_support(adev, 7152 pl->pcie_gen = amdgpu_get_pcie_gen_support(adev,
7172 si_pi->sys_pcie_mask, 7153 si_pi->sys_pcie_mask,
7173 si_pi->boot_pcie_gen, 7154 si_pi->boot_pcie_gen,
7174 clock_info->si.ucPCIEGen); 7155 clock_info->si.ucPCIEGen);
7175 7156
7176 /* patch up vddc if necessary */ 7157 /* patch up vddc if necessary */
7177 ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc, 7158 ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc,
@@ -7326,7 +7307,6 @@ static int si_dpm_init(struct amdgpu_device *adev)
7326 struct si_power_info *si_pi; 7307 struct si_power_info *si_pi;
7327 struct atom_clock_dividers dividers; 7308 struct atom_clock_dividers dividers;
7328 int ret; 7309 int ret;
7329 u32 mask;
7330 7310
7331 si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL); 7311 si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL);
7332 if (si_pi == NULL) 7312 if (si_pi == NULL)
@@ -7336,11 +7316,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
7336 eg_pi = &ni_pi->eg; 7316 eg_pi = &ni_pi->eg;
7337 pi = &eg_pi->rv7xx; 7317 pi = &eg_pi->rv7xx;
7338 7318
7339 ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); 7319 si_pi->sys_pcie_mask =
7340 if (ret) 7320 (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
7341 si_pi->sys_pcie_mask = 0; 7321 CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
7342 else
7343 si_pi->sys_pcie_mask = mask;
7344 si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; 7322 si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
7345 si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev); 7323 si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
7346 7324
@@ -7938,6 +7916,8 @@ static int si_dpm_early_init(void *handle)
7938 7916
7939 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7917 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7940 7918
7919 adev->powerplay.pp_funcs = &si_dpm_funcs;
7920 adev->powerplay.pp_handle = adev;
7941 si_dpm_set_irq_funcs(adev); 7921 si_dpm_set_irq_funcs(adev);
7942 return 0; 7922 return 0;
7943} 7923}
@@ -8038,7 +8018,7 @@ static int si_dpm_read_sensor(void *handle, int idx,
8038 } 8018 }
8039} 8019}
8040 8020
8041const struct amd_ip_funcs si_dpm_ip_funcs = { 8021static const struct amd_ip_funcs si_dpm_ip_funcs = {
8042 .name = "si_dpm", 8022 .name = "si_dpm",
8043 .early_init = si_dpm_early_init, 8023 .early_init = si_dpm_early_init,
8044 .late_init = si_dpm_late_init, 8024 .late_init = si_dpm_late_init,
@@ -8055,8 +8035,16 @@ const struct amd_ip_funcs si_dpm_ip_funcs = {
8055 .set_powergating_state = si_dpm_set_powergating_state, 8035 .set_powergating_state = si_dpm_set_powergating_state,
8056}; 8036};
8057 8037
8058const struct amd_pm_funcs si_dpm_funcs = { 8038const struct amdgpu_ip_block_version si_smu_ip_block =
8059 .get_temperature = &si_dpm_get_temp, 8039{
8040 .type = AMD_IP_BLOCK_TYPE_SMC,
8041 .major = 6,
8042 .minor = 0,
8043 .rev = 0,
8044 .funcs = &si_dpm_ip_funcs,
8045};
8046
8047static const struct amd_pm_funcs si_dpm_funcs = {
8060 .pre_set_power_state = &si_dpm_pre_set_power_state, 8048 .pre_set_power_state = &si_dpm_pre_set_power_state,
8061 .set_power_state = &si_dpm_set_power_state, 8049 .set_power_state = &si_dpm_set_power_state,
8062 .post_set_power_state = &si_dpm_post_set_power_state, 8050 .post_set_power_state = &si_dpm_post_set_power_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
index 9fe343de3477..6b7d292b919f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
@@ -245,8 +245,7 @@ enum si_display_gap
245 SI_PM_DISPLAY_GAP_IGNORE = 3, 245 SI_PM_DISPLAY_GAP_IGNORE = 3,
246}; 246};
247 247
248extern const struct amd_ip_funcs si_dpm_ip_funcs; 248extern const struct amdgpu_ip_block_version si_smu_ip_block;
249extern const struct amd_pm_funcs si_dpm_funcs;
250 249
251struct ni_leakage_coeffients 250struct ni_leakage_coeffients
252{ 251{
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index a04a033f57de..51cf8a30f6c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -57,7 +57,6 @@
57#include "uvd_v7_0.h" 57#include "uvd_v7_0.h"
58#include "vce_v4_0.h" 58#include "vce_v4_0.h"
59#include "vcn_v1_0.h" 59#include "vcn_v1_0.h"
60#include "amdgpu_powerplay.h"
61#include "dce_virtual.h" 60#include "dce_virtual.h"
62#include "mxgpu_ai.h" 61#include "mxgpu_ai.h"
63 62
@@ -417,12 +416,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
417 416
418 pci_save_state(adev->pdev); 417 pci_save_state(adev->pdev);
419 418
420 for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) { 419 psp_gpu_reset(adev);
421 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){
422 adev->ip_blocks[i].version->funcs->soft_reset((void *)adev);
423 break;
424 }
425 }
426 420
427 pci_restore_state(adev->pdev); 421 pci_restore_state(adev->pdev);
428 422
@@ -514,6 +508,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
514 /* Set IP register base before any HW register access */ 508 /* Set IP register base before any HW register access */
515 switch (adev->asic_type) { 509 switch (adev->asic_type) {
516 case CHIP_VEGA10: 510 case CHIP_VEGA10:
511 case CHIP_VEGA12:
517 case CHIP_RAVEN: 512 case CHIP_RAVEN:
518 vega10_reg_base_init(adev); 513 vega10_reg_base_init(adev);
519 break; 514 break;
@@ -533,13 +528,13 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
533 528
534 switch (adev->asic_type) { 529 switch (adev->asic_type) {
535 case CHIP_VEGA10: 530 case CHIP_VEGA10:
531 case CHIP_VEGA12:
536 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); 532 amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
537 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); 533 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
538 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); 534 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
539 if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) 535 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
540 amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
541 if (!amdgpu_sriov_vf(adev)) 536 if (!amdgpu_sriov_vf(adev))
542 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 537 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
543 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) 538 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
544 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 539 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
545#if defined(CONFIG_DRM_AMD_DC) 540#if defined(CONFIG_DRM_AMD_DC)
@@ -558,7 +553,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
558 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); 553 amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
559 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); 554 amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
560 amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); 555 amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
561 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 556 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
562 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) 557 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
563 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 558 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
564#if defined(CONFIG_DRM_AMD_DC) 559#if defined(CONFIG_DRM_AMD_DC)
@@ -583,6 +578,21 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
583 return adev->nbio_funcs->get_rev_id(adev); 578 return adev->nbio_funcs->get_rev_id(adev);
584} 579}
585 580
581static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
582{
583 adev->nbio_funcs->hdp_flush(adev, ring);
584}
585
586static void soc15_invalidate_hdp(struct amdgpu_device *adev,
587 struct amdgpu_ring *ring)
588{
589 if (!ring || !ring->funcs->emit_wreg)
590 WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
591 else
592 amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
593 HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
594}
595
586static const struct amdgpu_asic_funcs soc15_asic_funcs = 596static const struct amdgpu_asic_funcs soc15_asic_funcs =
587{ 597{
588 .read_disabled_bios = &soc15_read_disabled_bios, 598 .read_disabled_bios = &soc15_read_disabled_bios,
@@ -594,11 +604,12 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
594 .set_uvd_clocks = &soc15_set_uvd_clocks, 604 .set_uvd_clocks = &soc15_set_uvd_clocks,
595 .set_vce_clocks = &soc15_set_vce_clocks, 605 .set_vce_clocks = &soc15_set_vce_clocks,
596 .get_config_memsize = &soc15_get_config_memsize, 606 .get_config_memsize = &soc15_get_config_memsize,
607 .flush_hdp = &soc15_flush_hdp,
608 .invalidate_hdp = &soc15_invalidate_hdp,
597}; 609};
598 610
599static int soc15_common_early_init(void *handle) 611static int soc15_common_early_init(void *handle)
600{ 612{
601 bool psp_enabled = false;
602 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 613 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
603 614
604 adev->smc_rreg = NULL; 615 adev->smc_rreg = NULL;
@@ -616,10 +627,6 @@ static int soc15_common_early_init(void *handle)
616 627
617 adev->asic_funcs = &soc15_asic_funcs; 628 adev->asic_funcs = &soc15_asic_funcs;
618 629
619 if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
620 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
621 psp_enabled = true;
622
623 adev->rev_id = soc15_get_rev_id(adev); 630 adev->rev_id = soc15_get_rev_id(adev);
624 adev->external_rev_id = 0xFF; 631 adev->external_rev_id = 0xFF;
625 switch (adev->asic_type) { 632 switch (adev->asic_type) {
@@ -646,6 +653,28 @@ static int soc15_common_early_init(void *handle)
646 adev->pg_flags = 0; 653 adev->pg_flags = 0;
647 adev->external_rev_id = 0x1; 654 adev->external_rev_id = 0x1;
648 break; 655 break;
656 case CHIP_VEGA12:
657 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
658 AMD_CG_SUPPORT_GFX_MGLS |
659 AMD_CG_SUPPORT_GFX_CGCG |
660 AMD_CG_SUPPORT_GFX_CGLS |
661 AMD_CG_SUPPORT_GFX_3D_CGCG |
662 AMD_CG_SUPPORT_GFX_3D_CGLS |
663 AMD_CG_SUPPORT_GFX_CP_LS |
664 AMD_CG_SUPPORT_MC_LS |
665 AMD_CG_SUPPORT_MC_MGCG |
666 AMD_CG_SUPPORT_SDMA_MGCG |
667 AMD_CG_SUPPORT_SDMA_LS |
668 AMD_CG_SUPPORT_BIF_MGCG |
669 AMD_CG_SUPPORT_BIF_LS |
670 AMD_CG_SUPPORT_HDP_MGCG |
671 AMD_CG_SUPPORT_HDP_LS |
672 AMD_CG_SUPPORT_ROM_MGCG |
673 AMD_CG_SUPPORT_VCE_MGCG |
674 AMD_CG_SUPPORT_UVD_MGCG;
675 adev->pg_flags = 0;
676 adev->external_rev_id = adev->rev_id + 0x14;
677 break;
649 case CHIP_RAVEN: 678 case CHIP_RAVEN:
650 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | 679 adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
651 AMD_CG_SUPPORT_GFX_MGLS | 680 AMD_CG_SUPPORT_GFX_MGLS |
@@ -680,10 +709,6 @@ static int soc15_common_early_init(void *handle)
680 xgpu_ai_mailbox_set_irq_funcs(adev); 709 xgpu_ai_mailbox_set_irq_funcs(adev);
681 } 710 }
682 711
683 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
684
685 amdgpu_device_get_pcie_info(adev);
686
687 return 0; 712 return 0;
688} 713}
689 714
@@ -882,6 +907,7 @@ static int soc15_common_set_clockgating_state(void *handle,
882 907
883 switch (adev->asic_type) { 908 switch (adev->asic_type) {
884 case CHIP_VEGA10: 909 case CHIP_VEGA10:
910 case CHIP_VEGA12:
885 adev->nbio_funcs->update_medium_grain_clock_gating(adev, 911 adev->nbio_funcs->update_medium_grain_clock_gating(adev,
886 state == AMD_CG_STATE_GATE ? true : false); 912 state == AMD_CG_STATE_GATE ? true : false);
887 adev->nbio_funcs->update_medium_grain_light_sleep(adev, 913 adev->nbio_funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 26b3feac5d06..f70da8a29f86 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,9 @@
27#include "nbio_v6_1.h" 27#include "nbio_v6_1.h"
28#include "nbio_v7_0.h" 28#include "nbio_v7_0.h"
29 29
30#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
31#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
32
30extern const struct amd_ip_funcs soc15_common_ip_funcs; 33extern const struct amd_ip_funcs soc15_common_ip_funcs;
31 34
32struct soc15_reg_golden { 35struct soc15_reg_golden {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 5995ffc183de..52853d8a8fdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -107,7 +107,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
107 tonga_ih_disable_interrupts(adev); 107 tonga_ih_disable_interrupts(adev);
108 108
109 /* setup interrupt control */ 109 /* setup interrupt control */
110 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); 110 WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
111 interrupt_cntl = RREG32(mmINTERRUPT_CNTL); 111 interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
112 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi 112 /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
113 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN 113 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -271,7 +271,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
271 entry->src_data[0] = dw[1] & 0xfffffff; 271 entry->src_data[0] = dw[1] & 0xfffffff;
272 entry->ring_id = dw[2] & 0xff; 272 entry->ring_id = dw[2] & 0xff;
273 entry->vmid = (dw[2] >> 8) & 0xff; 273 entry->vmid = (dw[2] >> 8) & 0xff;
274 entry->pas_id = (dw[2] >> 16) & 0xffff; 274 entry->pasid = (dw[2] >> 16) & 0xffff;
275 275
276 /* wptr/rptr are in bytes! */ 276 /* wptr/rptr are in bytes! */
277 adev->irq.ih.rptr += 16; 277 adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 8ab10c220910..948bb9437757 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -464,32 +464,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
464} 464}
465 465
466/** 466/**
467 * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
468 *
469 * @ring: amdgpu_ring pointer
470 *
471 * Emits an hdp flush.
472 */
473static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
474{
475 amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
476 amdgpu_ring_write(ring, 0);
477}
478
479/**
480 * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
481 *
482 * @ring: amdgpu_ring pointer
483 *
484 * Emits an hdp invalidate.
485 */
486static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
487{
488 amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
489 amdgpu_ring_write(ring, 1);
490}
491
492/**
493 * uvd_v4_2_ring_test_ring - register write test 467 * uvd_v4_2_ring_test_ring - register write test
494 * 468 *
495 * @ring: amdgpu_ring pointer 469 * @ring: amdgpu_ring pointer
@@ -765,14 +739,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
765 .set_wptr = uvd_v4_2_ring_set_wptr, 739 .set_wptr = uvd_v4_2_ring_set_wptr,
766 .parse_cs = amdgpu_uvd_ring_parse_cs, 740 .parse_cs = amdgpu_uvd_ring_parse_cs,
767 .emit_frame_size = 741 .emit_frame_size =
768 2 + /* uvd_v4_2_ring_emit_hdp_flush */
769 2 + /* uvd_v4_2_ring_emit_hdp_invalidate */
770 14, /* uvd_v4_2_ring_emit_fence x1 no user fence */ 742 14, /* uvd_v4_2_ring_emit_fence x1 no user fence */
771 .emit_ib_size = 4, /* uvd_v4_2_ring_emit_ib */ 743 .emit_ib_size = 4, /* uvd_v4_2_ring_emit_ib */
772 .emit_ib = uvd_v4_2_ring_emit_ib, 744 .emit_ib = uvd_v4_2_ring_emit_ib,
773 .emit_fence = uvd_v4_2_ring_emit_fence, 745 .emit_fence = uvd_v4_2_ring_emit_fence,
774 .emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
775 .emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
776 .test_ring = uvd_v4_2_ring_test_ring, 746 .test_ring = uvd_v4_2_ring_test_ring,
777 .test_ib = amdgpu_uvd_ring_test_ib, 747 .test_ib = amdgpu_uvd_ring_test_ib,
778 .insert_nop = amdgpu_ring_insert_nop, 748 .insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index c1fe30cdba32..6445d55e7d5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -479,32 +479,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
479} 479}
480 480
481/** 481/**
482 * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
483 *
484 * @ring: amdgpu_ring pointer
485 *
486 * Emits an hdp flush.
487 */
488static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
489{
490 amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
491 amdgpu_ring_write(ring, 0);
492}
493
494/**
495 * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
496 *
497 * @ring: amdgpu_ring pointer
498 *
499 * Emits an hdp invalidate.
500 */
501static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
502{
503 amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
504 amdgpu_ring_write(ring, 1);
505}
506
507/**
508 * uvd_v5_0_ring_test_ring - register write test 482 * uvd_v5_0_ring_test_ring - register write test
509 * 483 *
510 * @ring: amdgpu_ring pointer 484 * @ring: amdgpu_ring pointer
@@ -873,14 +847,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
873 .set_wptr = uvd_v5_0_ring_set_wptr, 847 .set_wptr = uvd_v5_0_ring_set_wptr,
874 .parse_cs = amdgpu_uvd_ring_parse_cs, 848 .parse_cs = amdgpu_uvd_ring_parse_cs,
875 .emit_frame_size = 849 .emit_frame_size =
876 2 + /* uvd_v5_0_ring_emit_hdp_flush */
877 2 + /* uvd_v5_0_ring_emit_hdp_invalidate */
878 14, /* uvd_v5_0_ring_emit_fence x1 no user fence */ 850 14, /* uvd_v5_0_ring_emit_fence x1 no user fence */
879 .emit_ib_size = 6, /* uvd_v5_0_ring_emit_ib */ 851 .emit_ib_size = 6, /* uvd_v5_0_ring_emit_ib */
880 .emit_ib = uvd_v5_0_ring_emit_ib, 852 .emit_ib = uvd_v5_0_ring_emit_ib,
881 .emit_fence = uvd_v5_0_ring_emit_fence, 853 .emit_fence = uvd_v5_0_ring_emit_fence,
882 .emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
883 .emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
884 .test_ring = uvd_v5_0_ring_test_ring, 854 .test_ring = uvd_v5_0_ring_test_ring,
885 .test_ib = amdgpu_uvd_ring_test_ib, 855 .test_ib = amdgpu_uvd_ring_test_ib,
886 .insert_nop = amdgpu_ring_insert_nop, 856 .insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 9bab4842cd44..f26f515db2fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -964,32 +964,6 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964} 964}
965 965
966/** 966/**
967 * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
968 *
969 * @ring: amdgpu_ring pointer
970 *
971 * Emits an hdp flush.
972 */
973static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
974{
975 amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
976 amdgpu_ring_write(ring, 0);
977}
978
979/**
980 * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
981 *
982 * @ring: amdgpu_ring pointer
983 *
984 * Emits an hdp invalidate.
985 */
986static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
987{
988 amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
989 amdgpu_ring_write(ring, 1);
990}
991
992/**
993 * uvd_v6_0_ring_test_ring - register write test 967 * uvd_v6_0_ring_test_ring - register write test
994 * 968 *
995 * @ring: amdgpu_ring pointer 969 * @ring: amdgpu_ring pointer
@@ -1072,29 +1046,21 @@ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1072 amdgpu_ring_write(ring, ib->length_dw); 1046 amdgpu_ring_write(ring, ib->length_dw);
1073} 1047}
1074 1048
1075static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 1049static void uvd_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
1076 unsigned vmid, uint64_t pd_addr) 1050 uint32_t reg, uint32_t val)
1077{ 1051{
1078 uint32_t reg;
1079
1080 if (vmid < 8)
1081 reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
1082 else
1083 reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
1084
1085 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); 1052 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
1086 amdgpu_ring_write(ring, reg << 2); 1053 amdgpu_ring_write(ring, reg << 2);
1087 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); 1054 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
1088 amdgpu_ring_write(ring, pd_addr >> 12); 1055 amdgpu_ring_write(ring, val);
1089 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); 1056 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
1090 amdgpu_ring_write(ring, 0x8); 1057 amdgpu_ring_write(ring, 0x8);
1058}
1091 1059
1092 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); 1060static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1093 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 1061 unsigned vmid, uint64_t pd_addr)
1094 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); 1062{
1095 amdgpu_ring_write(ring, 1 << vmid); 1063 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1096 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
1097 amdgpu_ring_write(ring, 0x8);
1098 1064
1099 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); 1065 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
1100 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); 1066 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -1140,7 +1106,7 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring)
1140} 1106}
1141 1107
1142static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, 1108static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1143 unsigned int vmid, uint64_t pd_addr) 1109 unsigned int vmid, uint64_t pd_addr)
1144{ 1110{
1145 amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB); 1111 amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB);
1146 amdgpu_ring_write(ring, vmid); 1112 amdgpu_ring_write(ring, vmid);
@@ -1562,21 +1528,19 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
1562 .set_wptr = uvd_v6_0_ring_set_wptr, 1528 .set_wptr = uvd_v6_0_ring_set_wptr,
1563 .parse_cs = amdgpu_uvd_ring_parse_cs, 1529 .parse_cs = amdgpu_uvd_ring_parse_cs,
1564 .emit_frame_size = 1530 .emit_frame_size =
1565 2 + /* uvd_v6_0_ring_emit_hdp_flush */ 1531 6 + 6 + /* hdp flush / invalidate */
1566 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */
1567 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 1532 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
1568 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ 1533 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
1569 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ 1534 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
1570 .emit_ib = uvd_v6_0_ring_emit_ib, 1535 .emit_ib = uvd_v6_0_ring_emit_ib,
1571 .emit_fence = uvd_v6_0_ring_emit_fence, 1536 .emit_fence = uvd_v6_0_ring_emit_fence,
1572 .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
1573 .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
1574 .test_ring = uvd_v6_0_ring_test_ring, 1537 .test_ring = uvd_v6_0_ring_test_ring,
1575 .test_ib = amdgpu_uvd_ring_test_ib, 1538 .test_ib = amdgpu_uvd_ring_test_ib,
1576 .insert_nop = amdgpu_ring_insert_nop, 1539 .insert_nop = amdgpu_ring_insert_nop,
1577 .pad_ib = amdgpu_ring_generic_pad_ib, 1540 .pad_ib = amdgpu_ring_generic_pad_ib,
1578 .begin_use = amdgpu_uvd_ring_begin_use, 1541 .begin_use = amdgpu_uvd_ring_begin_use,
1579 .end_use = amdgpu_uvd_ring_end_use, 1542 .end_use = amdgpu_uvd_ring_end_use,
1543 .emit_wreg = uvd_v6_0_ring_emit_wreg,
1580}; 1544};
1581 1545
1582static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { 1546static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
@@ -1588,24 +1552,22 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
1588 .get_wptr = uvd_v6_0_ring_get_wptr, 1552 .get_wptr = uvd_v6_0_ring_get_wptr,
1589 .set_wptr = uvd_v6_0_ring_set_wptr, 1553 .set_wptr = uvd_v6_0_ring_set_wptr,
1590 .emit_frame_size = 1554 .emit_frame_size =
1591 2 + /* uvd_v6_0_ring_emit_hdp_flush */ 1555 6 + 6 + /* hdp flush / invalidate */
1592 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */
1593 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 1556 10 + /* uvd_v6_0_ring_emit_pipeline_sync */
1594 20 + /* uvd_v6_0_ring_emit_vm_flush */ 1557 VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
1595 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ 1558 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
1596 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ 1559 .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
1597 .emit_ib = uvd_v6_0_ring_emit_ib, 1560 .emit_ib = uvd_v6_0_ring_emit_ib,
1598 .emit_fence = uvd_v6_0_ring_emit_fence, 1561 .emit_fence = uvd_v6_0_ring_emit_fence,
1599 .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, 1562 .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
1600 .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, 1563 .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
1601 .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
1602 .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
1603 .test_ring = uvd_v6_0_ring_test_ring, 1564 .test_ring = uvd_v6_0_ring_test_ring,
1604 .test_ib = amdgpu_uvd_ring_test_ib, 1565 .test_ib = amdgpu_uvd_ring_test_ib,
1605 .insert_nop = amdgpu_ring_insert_nop, 1566 .insert_nop = amdgpu_ring_insert_nop,
1606 .pad_ib = amdgpu_ring_generic_pad_ib, 1567 .pad_ib = amdgpu_ring_generic_pad_ib,
1607 .begin_use = amdgpu_uvd_ring_begin_use, 1568 .begin_use = amdgpu_uvd_ring_begin_use,
1608 .end_use = amdgpu_uvd_ring_end_use, 1569 .end_use = amdgpu_uvd_ring_end_use,
1570 .emit_wreg = uvd_v6_0_ring_emit_wreg,
1609}; 1571};
1610 1572
1611static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { 1573static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 6b95f4f344b5..eddc57f3b72a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -25,6 +25,7 @@
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_uvd.h" 27#include "amdgpu_uvd.h"
28#include "soc15.h"
28#include "soc15d.h" 29#include "soc15d.h"
29#include "soc15_common.h" 30#include "soc15_common.h"
30#include "mmsch_v1_0.h" 31#include "mmsch_v1_0.h"
@@ -389,13 +390,13 @@ static int uvd_v7_0_sw_init(void *handle)
389 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 390 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
390 391
391 /* UVD TRAP */ 392 /* UVD TRAP */
392 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, 124, &adev->uvd.irq); 393 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
393 if (r) 394 if (r)
394 return r; 395 return r;
395 396
396 /* UVD ENC TRAP */ 397 /* UVD ENC TRAP */
397 for (i = 0; i < adev->uvd.num_enc_rings; ++i) { 398 for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
398 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); 399 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
399 if (r) 400 if (r)
400 return r; 401 return r;
401 } 402 }
@@ -1135,37 +1136,6 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
1135} 1136}
1136 1137
1137/** 1138/**
1138 * uvd_v7_0_ring_emit_hdp_flush - emit an hdp flush
1139 *
1140 * @ring: amdgpu_ring pointer
1141 *
1142 * Emits an hdp flush.
1143 */
1144static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
1145{
1146 struct amdgpu_device *adev = ring->adev;
1147
1148 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0,
1149 mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0));
1150 amdgpu_ring_write(ring, 0);
1151}
1152
1153/**
1154 * uvd_v7_0_ring_hdp_invalidate - emit an hdp invalidate
1155 *
1156 * @ring: amdgpu_ring pointer
1157 *
1158 * Emits an hdp invalidate.
1159 */
1160static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
1161{
1162 struct amdgpu_device *adev = ring->adev;
1163
1164 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
1165 amdgpu_ring_write(ring, 1);
1166}
1167
1168/**
1169 * uvd_v7_0_ring_test_ring - register write test 1139 * uvd_v7_0_ring_test_ring - register write test
1170 * 1140 *
1171 * @ring: amdgpu_ring pointer 1141 * @ring: amdgpu_ring pointer
@@ -1255,33 +1225,33 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1255 amdgpu_ring_write(ring, ib->length_dw); 1225 amdgpu_ring_write(ring, ib->length_dw);
1256} 1226}
1257 1227
1258static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, 1228static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
1259 uint32_t data0, uint32_t data1) 1229 uint32_t reg, uint32_t val)
1260{ 1230{
1261 struct amdgpu_device *adev = ring->adev; 1231 struct amdgpu_device *adev = ring->adev;
1262 1232
1263 amdgpu_ring_write(ring, 1233 amdgpu_ring_write(ring,
1264 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1234 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
1265 amdgpu_ring_write(ring, data0); 1235 amdgpu_ring_write(ring, reg << 2);
1266 amdgpu_ring_write(ring, 1236 amdgpu_ring_write(ring,
1267 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1237 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
1268 amdgpu_ring_write(ring, data1); 1238 amdgpu_ring_write(ring, val);
1269 amdgpu_ring_write(ring, 1239 amdgpu_ring_write(ring,
1270 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); 1240 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
1271 amdgpu_ring_write(ring, 8); 1241 amdgpu_ring_write(ring, 8);
1272} 1242}
1273 1243
1274static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, 1244static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1275 uint32_t data0, uint32_t data1, uint32_t mask) 1245 uint32_t val, uint32_t mask)
1276{ 1246{
1277 struct amdgpu_device *adev = ring->adev; 1247 struct amdgpu_device *adev = ring->adev;
1278 1248
1279 amdgpu_ring_write(ring, 1249 amdgpu_ring_write(ring,
1280 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 1250 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
1281 amdgpu_ring_write(ring, data0); 1251 amdgpu_ring_write(ring, reg << 2);
1282 amdgpu_ring_write(ring, 1252 amdgpu_ring_write(ring,
1283 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 1253 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
1284 amdgpu_ring_write(ring, data1); 1254 amdgpu_ring_write(ring, val);
1285 amdgpu_ring_write(ring, 1255 amdgpu_ring_write(ring,
1286 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); 1256 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
1287 amdgpu_ring_write(ring, mask); 1257 amdgpu_ring_write(ring, mask);
@@ -1294,37 +1264,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
1294 unsigned vmid, uint64_t pd_addr) 1264 unsigned vmid, uint64_t pd_addr)
1295{ 1265{
1296 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1266 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1297 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
1298 uint64_t flags = AMDGPU_PTE_VALID;
1299 unsigned eng = ring->vm_inv_eng;
1300 uint32_t data0, data1, mask; 1267 uint32_t data0, data1, mask;
1301 1268
1302 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); 1269 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1303 pd_addr |= flags;
1304
1305 data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
1306 data1 = upper_32_bits(pd_addr);
1307 uvd_v7_0_vm_reg_write(ring, data0, data1);
1308
1309 data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
1310 data1 = lower_32_bits(pd_addr);
1311 uvd_v7_0_vm_reg_write(ring, data0, data1);
1312 1270
1313 data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; 1271 /* wait for reg writes */
1272 data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
1314 data1 = lower_32_bits(pd_addr); 1273 data1 = lower_32_bits(pd_addr);
1315 mask = 0xffffffff; 1274 mask = 0xffffffff;
1316 uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); 1275 uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
1317
1318 /* flush TLB */
1319 data0 = (hub->vm_inv_eng0_req + eng) << 2;
1320 data1 = req;
1321 uvd_v7_0_vm_reg_write(ring, data0, data1);
1322
1323 /* wait for flush */
1324 data0 = (hub->vm_inv_eng0_ack + eng) << 2;
1325 data1 = 1 << vmid;
1326 mask = 1 << vmid;
1327 uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
1328} 1276}
1329 1277
1330static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 1278static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -1342,40 +1290,34 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
1342 amdgpu_ring_write(ring, HEVC_ENC_CMD_END); 1290 amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
1343} 1291}
1344 1292
1293static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
1294 uint32_t reg, uint32_t val,
1295 uint32_t mask)
1296{
1297 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
1298 amdgpu_ring_write(ring, reg << 2);
1299 amdgpu_ring_write(ring, mask);
1300 amdgpu_ring_write(ring, val);
1301}
1302
1345static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, 1303static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1346 unsigned int vmid, uint64_t pd_addr) 1304 unsigned int vmid, uint64_t pd_addr)
1347{ 1305{
1348 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1306 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1349 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
1350 uint64_t flags = AMDGPU_PTE_VALID;
1351 unsigned eng = ring->vm_inv_eng;
1352 1307
1353 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); 1308 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1354 pd_addr |= flags;
1355 1309
1356 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1310 /* wait for reg writes */
1357 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); 1311 uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
1358 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 1312 lower_32_bits(pd_addr), 0xffffffff);
1359 1313}
1360 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
1361 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
1362 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1363
1364 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
1365 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
1366 amdgpu_ring_write(ring, 0xffffffff);
1367 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1368 1314
1369 /* flush TLB */ 1315static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
1316 uint32_t reg, uint32_t val)
1317{
1370 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); 1318 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
1371 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1319 amdgpu_ring_write(ring, reg << 2);
1372 amdgpu_ring_write(ring, req); 1320 amdgpu_ring_write(ring, val);
1373
1374 /* wait for flush */
1375 amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
1376 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1377 amdgpu_ring_write(ring, 1 << vmid);
1378 amdgpu_ring_write(ring, 1 << vmid);
1379} 1321}
1380 1322
1381#if 0 1323#if 0
@@ -1712,22 +1654,23 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
1712 .get_wptr = uvd_v7_0_ring_get_wptr, 1654 .get_wptr = uvd_v7_0_ring_get_wptr,
1713 .set_wptr = uvd_v7_0_ring_set_wptr, 1655 .set_wptr = uvd_v7_0_ring_set_wptr,
1714 .emit_frame_size = 1656 .emit_frame_size =
1715 2 + /* uvd_v7_0_ring_emit_hdp_flush */ 1657 6 + 6 + /* hdp flush / invalidate */
1716 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ 1658 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1717 34 + /* uvd_v7_0_ring_emit_vm_flush */ 1659 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1660 8 + /* uvd_v7_0_ring_emit_vm_flush */
1718 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ 1661 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
1719 .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ 1662 .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
1720 .emit_ib = uvd_v7_0_ring_emit_ib, 1663 .emit_ib = uvd_v7_0_ring_emit_ib,
1721 .emit_fence = uvd_v7_0_ring_emit_fence, 1664 .emit_fence = uvd_v7_0_ring_emit_fence,
1722 .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, 1665 .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
1723 .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
1724 .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate,
1725 .test_ring = uvd_v7_0_ring_test_ring, 1666 .test_ring = uvd_v7_0_ring_test_ring,
1726 .test_ib = amdgpu_uvd_ring_test_ib, 1667 .test_ib = amdgpu_uvd_ring_test_ib,
1727 .insert_nop = uvd_v7_0_ring_insert_nop, 1668 .insert_nop = uvd_v7_0_ring_insert_nop,
1728 .pad_ib = amdgpu_ring_generic_pad_ib, 1669 .pad_ib = amdgpu_ring_generic_pad_ib,
1729 .begin_use = amdgpu_uvd_ring_begin_use, 1670 .begin_use = amdgpu_uvd_ring_begin_use,
1730 .end_use = amdgpu_uvd_ring_end_use, 1671 .end_use = amdgpu_uvd_ring_end_use,
1672 .emit_wreg = uvd_v7_0_ring_emit_wreg,
1673 .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
1731}; 1674};
1732 1675
1733static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { 1676static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1740,7 +1683,10 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
1740 .get_wptr = uvd_v7_0_enc_ring_get_wptr, 1683 .get_wptr = uvd_v7_0_enc_ring_get_wptr,
1741 .set_wptr = uvd_v7_0_enc_ring_set_wptr, 1684 .set_wptr = uvd_v7_0_enc_ring_set_wptr,
1742 .emit_frame_size = 1685 .emit_frame_size =
1743 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ 1686 3 + 3 + /* hdp flush / invalidate */
1687 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1688 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1689 4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
1744 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ 1690 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
1745 1, /* uvd_v7_0_enc_ring_insert_end */ 1691 1, /* uvd_v7_0_enc_ring_insert_end */
1746 .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ 1692 .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
@@ -1754,6 +1700,8 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
1754 .pad_ib = amdgpu_ring_generic_pad_ib, 1700 .pad_ib = amdgpu_ring_generic_pad_ib,
1755 .begin_use = amdgpu_uvd_ring_begin_use, 1701 .begin_use = amdgpu_uvd_ring_begin_use,
1756 .end_use = amdgpu_uvd_ring_end_use, 1702 .end_use = amdgpu_uvd_ring_end_use,
1703 .emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
1704 .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
1757}; 1705};
1758 1706
1759static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) 1707static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index a5355eb689f1..428d1928e44e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -844,7 +844,7 @@ static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
844} 844}
845 845
846static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring, 846static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
847 unsigned int vmid, uint64_t pd_addr) 847 unsigned int vmid, uint64_t pd_addr)
848{ 848{
849 amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB); 849 amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
850 amdgpu_ring_write(ring, vmid); 850 amdgpu_ring_write(ring, vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 7cf2eef68cf2..73fd48d6c756 100755..100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -28,6 +28,7 @@
28#include <drm/drmP.h> 28#include <drm/drmP.h>
29#include "amdgpu.h" 29#include "amdgpu.h"
30#include "amdgpu_vce.h" 30#include "amdgpu_vce.h"
31#include "soc15.h"
31#include "soc15d.h" 32#include "soc15d.h"
32#include "soc15_common.h" 33#include "soc15_common.h"
33#include "mmsch_v1_0.h" 34#include "mmsch_v1_0.h"
@@ -419,7 +420,7 @@ static int vce_v4_0_sw_init(void *handle)
419 unsigned size; 420 unsigned size;
420 int r, i; 421 int r, i;
421 422
422 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); 423 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
423 if (r) 424 if (r)
424 return r; 425 return r;
425 426
@@ -964,40 +965,33 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
964 amdgpu_ring_write(ring, VCE_CMD_END); 965 amdgpu_ring_write(ring, VCE_CMD_END);
965} 966}
966 967
968static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
969 uint32_t val, uint32_t mask)
970{
971 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
972 amdgpu_ring_write(ring, reg << 2);
973 amdgpu_ring_write(ring, mask);
974 amdgpu_ring_write(ring, val);
975}
976
967static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, 977static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
968 unsigned int vmid, uint64_t pd_addr) 978 unsigned int vmid, uint64_t pd_addr)
969{ 979{
970 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 980 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
971 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
972 uint64_t flags = AMDGPU_PTE_VALID;
973 unsigned eng = ring->vm_inv_eng;
974 981
975 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); 982 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
976 pd_addr |= flags;
977 983
978 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 984 /* wait for reg writes */
979 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); 985 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
980 amdgpu_ring_write(ring, upper_32_bits(pd_addr)); 986 lower_32_bits(pd_addr), 0xffffffff);
981 987}
982 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
983 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
984 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
985
986 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
987 amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
988 amdgpu_ring_write(ring, 0xffffffff);
989 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
990 988
991 /* flush TLB */ 989static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
990 uint32_t reg, uint32_t val)
991{
992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); 992 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 993 amdgpu_ring_write(ring, reg << 2);
994 amdgpu_ring_write(ring, req); 994 amdgpu_ring_write(ring, val);
995
996 /* wait for flush */
997 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
998 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
999 amdgpu_ring_write(ring, 1 << vmid);
1000 amdgpu_ring_write(ring, 1 << vmid);
1001} 995}
1002 996
1003static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, 997static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1069,7 +1063,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1069 .set_wptr = vce_v4_0_ring_set_wptr, 1063 .set_wptr = vce_v4_0_ring_set_wptr,
1070 .parse_cs = amdgpu_vce_ring_parse_cs_vm, 1064 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1071 .emit_frame_size = 1065 .emit_frame_size =
1072 17 + /* vce_v4_0_emit_vm_flush */ 1066 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1067 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1068 4 + /* vce_v4_0_emit_vm_flush */
1073 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1069 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1074 1, /* vce_v4_0_ring_insert_end */ 1070 1, /* vce_v4_0_ring_insert_end */
1075 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ 1071 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
@@ -1083,6 +1079,8 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1083 .pad_ib = amdgpu_ring_generic_pad_ib, 1079 .pad_ib = amdgpu_ring_generic_pad_ib,
1084 .begin_use = amdgpu_vce_ring_begin_use, 1080 .begin_use = amdgpu_vce_ring_begin_use,
1085 .end_use = amdgpu_vce_ring_end_use, 1081 .end_use = amdgpu_vce_ring_end_use,
1082 .emit_wreg = vce_v4_0_emit_wreg,
1083 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1086}; 1084};
1087 1085
1088static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) 1086static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index b99e15c43e45..8c132673bc79 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
25#include <drm/drmP.h> 25#include <drm/drmP.h>
26#include "amdgpu.h" 26#include "amdgpu.h"
27#include "amdgpu_vcn.h" 27#include "amdgpu_vcn.h"
28#include "soc15.h"
28#include "soc15d.h" 29#include "soc15d.h"
29#include "soc15_common.h" 30#include "soc15_common.h"
30 31
@@ -74,13 +75,13 @@ static int vcn_v1_0_sw_init(void *handle)
74 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 75 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
75 76
76 /* VCN DEC TRAP */ 77 /* VCN DEC TRAP */
77 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq); 78 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
78 if (r) 79 if (r)
79 return r; 80 return r;
80 81
81 /* VCN ENC TRAP */ 82 /* VCN ENC TRAP */
82 for (i = 0; i < adev->vcn.num_enc_rings; ++i) { 83 for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
83 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119, 84 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119,
84 &adev->vcn.irq); 85 &adev->vcn.irq);
85 if (r) 86 if (r)
86 return r; 87 return r;
@@ -809,21 +810,6 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
809} 810}
810 811
811/** 812/**
812 * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate
813 *
814 * @ring: amdgpu_ring pointer
815 *
816 * Emits an hdp invalidate.
817 */
818static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
819{
820 struct amdgpu_device *adev = ring->adev;
821
822 amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
823 amdgpu_ring_write(ring, 1);
824}
825
826/**
827 * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer 813 * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer
828 * 814 *
829 * @ring: amdgpu_ring pointer 815 * @ring: amdgpu_ring pointer
@@ -852,33 +838,18 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
852 amdgpu_ring_write(ring, ib->length_dw); 838 amdgpu_ring_write(ring, ib->length_dw);
853} 839}
854 840
855static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, 841static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
856 uint32_t data0, uint32_t data1) 842 uint32_t reg, uint32_t val,
843 uint32_t mask)
857{ 844{
858 struct amdgpu_device *adev = ring->adev; 845 struct amdgpu_device *adev = ring->adev;
859 846
860 amdgpu_ring_write(ring, 847 amdgpu_ring_write(ring,
861 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); 848 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
862 amdgpu_ring_write(ring, data0); 849 amdgpu_ring_write(ring, reg << 2);
863 amdgpu_ring_write(ring, 850 amdgpu_ring_write(ring,
864 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); 851 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
865 amdgpu_ring_write(ring, data1); 852 amdgpu_ring_write(ring, val);
866 amdgpu_ring_write(ring,
867 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
868 amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
869}
870
871static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
872 uint32_t data0, uint32_t data1, uint32_t mask)
873{
874 struct amdgpu_device *adev = ring->adev;
875
876 amdgpu_ring_write(ring,
877 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
878 amdgpu_ring_write(ring, data0);
879 amdgpu_ring_write(ring,
880 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
881 amdgpu_ring_write(ring, data1);
882 amdgpu_ring_write(ring, 853 amdgpu_ring_write(ring,
883 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); 854 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
884 amdgpu_ring_write(ring, mask); 855 amdgpu_ring_write(ring, mask);
@@ -888,40 +859,34 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
888} 859}
889 860
890static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, 861static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
891 unsigned vmid, uint64_t pd_addr) 862 unsigned vmid, uint64_t pd_addr)
892{ 863{
893 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 864 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
894 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
895 uint64_t flags = AMDGPU_PTE_VALID;
896 unsigned eng = ring->vm_inv_eng;
897 uint32_t data0, data1, mask; 865 uint32_t data0, data1, mask;
898 866
899 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); 867 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
900 pd_addr |= flags;
901
902 data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
903 data1 = upper_32_bits(pd_addr);
904 vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
905
906 data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
907 data1 = lower_32_bits(pd_addr);
908 vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
909 868
910 data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; 869 /* wait for register write */
870 data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
911 data1 = lower_32_bits(pd_addr); 871 data1 = lower_32_bits(pd_addr);
912 mask = 0xffffffff; 872 mask = 0xffffffff;
913 vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); 873 vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
914 874}
915 /* flush TLB */ 875
916 data0 = (hub->vm_inv_eng0_req + eng) << 2; 876static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
917 data1 = req; 877 uint32_t reg, uint32_t val)
918 vcn_v1_0_dec_vm_reg_write(ring, data0, data1); 878{
919 879 struct amdgpu_device *adev = ring->adev;
920 /* wait for flush */ 880
921 data0 = (hub->vm_inv_eng0_ack + eng) << 2; 881 amdgpu_ring_write(ring,
922 data1 = 1 << vmid; 882 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
923 mask = 1 << vmid; 883 amdgpu_ring_write(ring, reg << 2);
924 vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); 884 amdgpu_ring_write(ring,
885 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
886 amdgpu_ring_write(ring, val);
887 amdgpu_ring_write(ring,
888 PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
889 amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
925} 890}
926 891
927/** 892/**
@@ -1020,43 +985,34 @@ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
1020 amdgpu_ring_write(ring, ib->length_dw); 985 amdgpu_ring_write(ring, ib->length_dw);
1021} 986}
1022 987
988static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
989 uint32_t reg, uint32_t val,
990 uint32_t mask)
991{
992 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
993 amdgpu_ring_write(ring, reg << 2);
994 amdgpu_ring_write(ring, mask);
995 amdgpu_ring_write(ring, val);
996}
997
1023static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, 998static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
1024 unsigned int vmid, uint64_t pd_addr) 999 unsigned int vmid, uint64_t pd_addr)
1025{ 1000{
1026 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; 1001 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1027 uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
1028 uint64_t flags = AMDGPU_PTE_VALID;
1029 unsigned eng = ring->vm_inv_eng;
1030
1031 amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
1032 pd_addr |= flags;
1033 1002
1034 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); 1003 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1035 amdgpu_ring_write(ring,
1036 (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
1037 amdgpu_ring_write(ring, upper_32_bits(pd_addr));
1038
1039 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1040 amdgpu_ring_write(ring,
1041 (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
1042 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1043 1004
1044 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); 1005 /* wait for reg writes */
1045 amdgpu_ring_write(ring, 1006 vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
1046 (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); 1007 lower_32_bits(pd_addr), 0xffffffff);
1047 amdgpu_ring_write(ring, 0xffffffff); 1008}
1048 amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1049 1009
1050 /* flush TLB */ 1010static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
1011 uint32_t reg, uint32_t val)
1012{
1051 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); 1013 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
1052 amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); 1014 amdgpu_ring_write(ring, reg << 2);
1053 amdgpu_ring_write(ring, req); 1015 amdgpu_ring_write(ring, val);
1054
1055 /* wait for flush */
1056 amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
1057 amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1058 amdgpu_ring_write(ring, 1 << vmid);
1059 amdgpu_ring_write(ring, 1 << vmid);
1060} 1016}
1061 1017
1062static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, 1018static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1133,15 +1089,16 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1133 .get_wptr = vcn_v1_0_dec_ring_get_wptr, 1089 .get_wptr = vcn_v1_0_dec_ring_get_wptr,
1134 .set_wptr = vcn_v1_0_dec_ring_set_wptr, 1090 .set_wptr = vcn_v1_0_dec_ring_set_wptr,
1135 .emit_frame_size = 1091 .emit_frame_size =
1136 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ 1092 6 + 6 + /* hdp invalidate / flush */
1137 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ 1093 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1094 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1095 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
1138 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ 1096 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
1139 6, 1097 6,
1140 .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ 1098 .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
1141 .emit_ib = vcn_v1_0_dec_ring_emit_ib, 1099 .emit_ib = vcn_v1_0_dec_ring_emit_ib,
1142 .emit_fence = vcn_v1_0_dec_ring_emit_fence, 1100 .emit_fence = vcn_v1_0_dec_ring_emit_fence,
1143 .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, 1101 .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
1144 .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate,
1145 .test_ring = amdgpu_vcn_dec_ring_test_ring, 1102 .test_ring = amdgpu_vcn_dec_ring_test_ring,
1146 .test_ib = amdgpu_vcn_dec_ring_test_ib, 1103 .test_ib = amdgpu_vcn_dec_ring_test_ib,
1147 .insert_nop = vcn_v1_0_ring_insert_nop, 1104 .insert_nop = vcn_v1_0_ring_insert_nop,
@@ -1150,6 +1107,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
1150 .pad_ib = amdgpu_ring_generic_pad_ib, 1107 .pad_ib = amdgpu_ring_generic_pad_ib,
1151 .begin_use = amdgpu_vcn_ring_begin_use, 1108 .begin_use = amdgpu_vcn_ring_begin_use,
1152 .end_use = amdgpu_vcn_ring_end_use, 1109 .end_use = amdgpu_vcn_ring_end_use,
1110 .emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
1111 .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
1153}; 1112};
1154 1113
1155static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { 1114static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1162,7 +1121,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
1162 .get_wptr = vcn_v1_0_enc_ring_get_wptr, 1121 .get_wptr = vcn_v1_0_enc_ring_get_wptr,
1163 .set_wptr = vcn_v1_0_enc_ring_set_wptr, 1122 .set_wptr = vcn_v1_0_enc_ring_set_wptr,
1164 .emit_frame_size = 1123 .emit_frame_size =
1165 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ 1124 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1125 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1126 4 + /* vcn_v1_0_enc_ring_emit_vm_flush */
1166 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ 1127 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
1167 1, /* vcn_v1_0_enc_ring_insert_end */ 1128 1, /* vcn_v1_0_enc_ring_insert_end */
1168 .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ 1129 .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
@@ -1176,6 +1137,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
1176 .pad_ib = amdgpu_ring_generic_pad_ib, 1137 .pad_ib = amdgpu_ring_generic_pad_ib,
1177 .begin_use = amdgpu_vcn_ring_begin_use, 1138 .begin_use = amdgpu_vcn_ring_begin_use,
1178 .end_use = amdgpu_vcn_ring_end_use, 1139 .end_use = amdgpu_vcn_ring_end_use,
1140 .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
1141 .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
1179}; 1142};
1180 1143
1181static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) 1144static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ee14d78be2a9..5ae5ed2e62d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -245,8 +245,8 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
245 * some faults get cleared. 245 * some faults get cleared.
246 */ 246 */
247 switch (dw0 & 0xff) { 247 switch (dw0 & 0xff) {
248 case AMDGPU_IH_CLIENTID_VMC: 248 case SOC15_IH_CLIENTID_VMC:
249 case AMDGPU_IH_CLIENTID_UTCL2: 249 case SOC15_IH_CLIENTID_UTCL2:
250 break; 250 break;
251 default: 251 default:
252 /* Not a VM fault */ 252 /* Not a VM fault */
@@ -333,7 +333,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
333 entry->vmid_src = (dw[0] >> 31); 333 entry->vmid_src = (dw[0] >> 31);
334 entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); 334 entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
335 entry->timestamp_src = dw[2] >> 31; 335 entry->timestamp_src = dw[2] >> 31;
336 entry->pas_id = dw[3] & 0xffff; 336 entry->pasid = dw[3] & 0xffff;
337 entry->pasid_src = dw[3] >> 31; 337 entry->pasid_src = dw[3] >> 31;
338 entry->src_data[0] = dw[4]; 338 entry->src_data[0] = dw[4];
339 entry->src_data[1] = dw[5]; 339 entry->src_data[1] = dw[5];
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index b7bdd04793d6..45aafca7f315 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,8 @@
24#include "soc15.h" 24#include "soc15.h"
25 25
26#include "soc15_common.h" 26#include "soc15_common.h"
27#include "soc15ip.h" 27#include "soc15_hw_ip.h"
28#include "vega10_ip_offset.h"
28 29
29int vega10_reg_base_init(struct amdgpu_device *adev) 30int vega10_reg_base_init(struct amdgpu_device *adev)
30{ 31{
@@ -37,6 +38,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
37 adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); 38 adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
38 adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); 39 adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
39 adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); 40 adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
41 adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
40 adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); 42 adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
41 adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); 43 adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
42 adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); 44 adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
@@ -48,7 +50,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
48 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); 50 adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
49 adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i])); 51 adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
50 adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i])); 52 adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
51 53 adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
52 } 54 }
53 return 0; 55 return 0;
54} 56}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 1e3e05a11f7a..126f1276d347 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -71,7 +71,6 @@
71#include "uvd_v5_0.h" 71#include "uvd_v5_0.h"
72#include "uvd_v6_0.h" 72#include "uvd_v6_0.h"
73#include "vce_v3_0.h" 73#include "vce_v3_0.h"
74#include "amdgpu_powerplay.h"
75#if defined(CONFIG_DRM_AMD_ACP) 74#if defined(CONFIG_DRM_AMD_ACP)
76#include "amdgpu_acp.h" 75#include "amdgpu_acp.h"
77#endif 76#endif
@@ -856,6 +855,27 @@ static uint32_t vi_get_rev_id(struct amdgpu_device *adev)
856 >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT; 855 >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT;
857} 856}
858 857
858static void vi_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
859{
860 if (!ring || !ring->funcs->emit_wreg) {
861 WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
862 RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
863 } else {
864 amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
865 }
866}
867
868static void vi_invalidate_hdp(struct amdgpu_device *adev,
869 struct amdgpu_ring *ring)
870{
871 if (!ring || !ring->funcs->emit_wreg) {
872 WREG32(mmHDP_DEBUG0, 1);
873 RREG32(mmHDP_DEBUG0);
874 } else {
875 amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
876 }
877}
878
859static const struct amdgpu_asic_funcs vi_asic_funcs = 879static const struct amdgpu_asic_funcs vi_asic_funcs =
860{ 880{
861 .read_disabled_bios = &vi_read_disabled_bios, 881 .read_disabled_bios = &vi_read_disabled_bios,
@@ -867,6 +887,8 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
867 .set_uvd_clocks = &vi_set_uvd_clocks, 887 .set_uvd_clocks = &vi_set_uvd_clocks,
868 .set_vce_clocks = &vi_set_vce_clocks, 888 .set_vce_clocks = &vi_set_vce_clocks,
869 .get_config_memsize = &vi_get_config_memsize, 889 .get_config_memsize = &vi_get_config_memsize,
890 .flush_hdp = &vi_flush_hdp,
891 .invalidate_hdp = &vi_invalidate_hdp,
870}; 892};
871 893
872#define CZ_REV_BRISTOL(rev) \ 894#define CZ_REV_BRISTOL(rev) \
@@ -874,7 +896,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
874 896
875static int vi_common_early_init(void *handle) 897static int vi_common_early_init(void *handle)
876{ 898{
877 bool smc_enabled = false;
878 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 899 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
879 900
880 if (adev->flags & AMD_IS_APU) { 901 if (adev->flags & AMD_IS_APU) {
@@ -895,10 +916,6 @@ static int vi_common_early_init(void *handle)
895 916
896 adev->asic_funcs = &vi_asic_funcs; 917 adev->asic_funcs = &vi_asic_funcs;
897 918
898 if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) &&
899 (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
900 smc_enabled = true;
901
902 adev->rev_id = vi_get_rev_id(adev); 919 adev->rev_id = vi_get_rev_id(adev);
903 adev->external_rev_id = 0xFF; 920 adev->external_rev_id = 0xFF;
904 switch (adev->asic_type) { 921 switch (adev->asic_type) {
@@ -1074,11 +1091,6 @@ static int vi_common_early_init(void *handle)
1074 xgpu_vi_mailbox_set_irq_funcs(adev); 1091 xgpu_vi_mailbox_set_irq_funcs(adev);
1075 } 1092 }
1076 1093
1077 /* vi use smc load by default */
1078 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1079
1080 amdgpu_device_get_pcie_info(adev);
1081
1082 return 0; 1094 return 0;
1083} 1095}
1084 1096
@@ -1493,7 +1505,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1493 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1505 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1494 amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block); 1506 amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block);
1495 amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block); 1507 amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block);
1496 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1508 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1497 if (adev->enable_virtual_display) 1509 if (adev->enable_virtual_display)
1498 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1510 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1499 amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); 1511 amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
@@ -1503,7 +1515,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1503 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1515 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1504 amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block); 1516 amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block);
1505 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); 1517 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
1506 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1518 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1507 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) 1519 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
1508 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1520 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1509#if defined(CONFIG_DRM_AMD_DC) 1521#if defined(CONFIG_DRM_AMD_DC)
@@ -1523,7 +1535,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1523 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1535 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1524 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); 1536 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
1525 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); 1537 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
1526 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1538 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1527 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) 1539 if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
1528 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1540 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1529#if defined(CONFIG_DRM_AMD_DC) 1541#if defined(CONFIG_DRM_AMD_DC)
@@ -1545,7 +1557,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1545 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1557 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1546 amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); 1558 amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
1547 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); 1559 amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
1548 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1560 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1549 if (adev->enable_virtual_display) 1561 if (adev->enable_virtual_display)
1550 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1562 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1551#if defined(CONFIG_DRM_AMD_DC) 1563#if defined(CONFIG_DRM_AMD_DC)
@@ -1563,7 +1575,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1563 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1575 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1564 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); 1576 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
1565 amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); 1577 amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
1566 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1578 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1567 if (adev->enable_virtual_display) 1579 if (adev->enable_virtual_display)
1568 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1580 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1569#if defined(CONFIG_DRM_AMD_DC) 1581#if defined(CONFIG_DRM_AMD_DC)
@@ -1584,7 +1596,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1584 amdgpu_device_ip_block_add(adev, &vi_common_ip_block); 1596 amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
1585 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); 1597 amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
1586 amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); 1598 amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
1587 amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); 1599 amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
1588 if (adev->enable_virtual_display) 1600 if (adev->enable_virtual_display)
1589 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); 1601 amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
1590#if defined(CONFIG_DRM_AMD_DC) 1602#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 575d7aed5d32..0429fe332269 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -24,6 +24,8 @@
24#ifndef __VI_H__ 24#ifndef __VI_H__
25#define __VI_H__ 25#define __VI_H__
26 26
27#define VI_FLUSH_GPU_TLB_NUM_WREG 3
28
27void vi_srbm_select(struct amdgpu_device *adev, 29void vi_srbm_select(struct amdgpu_device *adev,
28 u32 me, u32 pipe, u32 queue, u32 vmid); 30 u32 me, u32 pipe, u32 queue, u32 vmid);
29int vi_set_ip_blocks(struct amdgpu_device *adev); 31int vi_set_ip_blocks(struct amdgpu_device *adev);