aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h227
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c227
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c480
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h106
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c234
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c149
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_dpm.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_smc.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c272
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c274
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c175
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c213
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c53
54 files changed, 2869 insertions, 2043 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6647fb26ef25..5a5f04d0902d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -79,6 +79,8 @@ extern int amdgpu_bapm;
79extern int amdgpu_deep_color; 79extern int amdgpu_deep_color;
80extern int amdgpu_vm_size; 80extern int amdgpu_vm_size;
81extern int amdgpu_vm_block_size; 81extern int amdgpu_vm_block_size;
82extern int amdgpu_vm_fault_stop;
83extern int amdgpu_vm_debug;
82extern int amdgpu_enable_scheduler; 84extern int amdgpu_enable_scheduler;
83extern int amdgpu_sched_jobs; 85extern int amdgpu_sched_jobs;
84extern int amdgpu_sched_hw_submission; 86extern int amdgpu_sched_hw_submission;
@@ -343,7 +345,6 @@ struct amdgpu_ring_funcs {
343 /* testing functions */ 345 /* testing functions */
344 int (*test_ring)(struct amdgpu_ring *ring); 346 int (*test_ring)(struct amdgpu_ring *ring);
345 int (*test_ib)(struct amdgpu_ring *ring); 347 int (*test_ib)(struct amdgpu_ring *ring);
346 bool (*is_lockup)(struct amdgpu_ring *ring);
347 /* insert NOP packets */ 348 /* insert NOP packets */
348 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); 349 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
349}; 350};
@@ -388,7 +389,6 @@ struct amdgpu_clock {
388 * Fences. 389 * Fences.
389 */ 390 */
390struct amdgpu_fence_driver { 391struct amdgpu_fence_driver {
391 struct amdgpu_ring *ring;
392 uint64_t gpu_addr; 392 uint64_t gpu_addr;
393 volatile uint32_t *cpu_addr; 393 volatile uint32_t *cpu_addr;
394 /* sync_seq is protected by ring emission lock */ 394 /* sync_seq is protected by ring emission lock */
@@ -397,14 +397,13 @@ struct amdgpu_fence_driver {
397 bool initialized; 397 bool initialized;
398 struct amdgpu_irq_src *irq_src; 398 struct amdgpu_irq_src *irq_src;
399 unsigned irq_type; 399 unsigned irq_type;
400 struct delayed_work lockup_work; 400 struct timer_list fallback_timer;
401 wait_queue_head_t fence_queue; 401 wait_queue_head_t fence_queue;
402}; 402};
403 403
404/* some special values for the owner field */ 404/* some special values for the owner field */
405#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) 405#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
406#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) 406#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
407#define AMDGPU_FENCE_OWNER_MOVE ((void*)2ul)
408 407
409#define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 408#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
410#define AMDGPU_FENCE_FLAG_INT (1 << 1) 409#define AMDGPU_FENCE_FLAG_INT (1 << 1)
@@ -446,58 +445,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
446int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 445int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
447unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 446unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
448 447
449signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
450 struct fence **array,
451 uint32_t count,
452 bool intr,
453 signed long t);
454struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
455void amdgpu_fence_unref(struct amdgpu_fence **fence);
456
457bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, 448bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
458 struct amdgpu_ring *ring); 449 struct amdgpu_ring *ring);
459void amdgpu_fence_note_sync(struct amdgpu_fence *fence, 450void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
460 struct amdgpu_ring *ring); 451 struct amdgpu_ring *ring);
461 452
462static inline struct amdgpu_fence *amdgpu_fence_later(struct amdgpu_fence *a,
463 struct amdgpu_fence *b)
464{
465 if (!a) {
466 return b;
467 }
468
469 if (!b) {
470 return a;
471 }
472
473 BUG_ON(a->ring != b->ring);
474
475 if (a->seq > b->seq) {
476 return a;
477 } else {
478 return b;
479 }
480}
481
482static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
483 struct amdgpu_fence *b)
484{
485 if (!a) {
486 return false;
487 }
488
489 if (!b) {
490 return true;
491 }
492
493 BUG_ON(a->ring != b->ring);
494
495 return a->seq < b->seq;
496}
497
498int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
499 void *owner, struct amdgpu_fence **fence);
500
501/* 453/*
502 * TTM. 454 * TTM.
503 */ 455 */
@@ -544,6 +496,7 @@ struct amdgpu_bo_va_mapping {
544 496
545/* bo virtual addresses in a specific vm */ 497/* bo virtual addresses in a specific vm */
546struct amdgpu_bo_va { 498struct amdgpu_bo_va {
499 struct mutex mutex;
547 /* protected by bo being reserved */ 500 /* protected by bo being reserved */
548 struct list_head bo_list; 501 struct list_head bo_list;
549 struct fence *last_pt_update; 502 struct fence *last_pt_update;
@@ -586,6 +539,7 @@ struct amdgpu_bo {
586 /* Constant after initialization */ 539 /* Constant after initialization */
587 struct amdgpu_device *adev; 540 struct amdgpu_device *adev;
588 struct drm_gem_object gem_base; 541 struct drm_gem_object gem_base;
542 struct amdgpu_bo *parent;
589 543
590 struct ttm_bo_kmap_obj dma_buf_vmap; 544 struct ttm_bo_kmap_obj dma_buf_vmap;
591 pid_t pid; 545 pid_t pid;
@@ -708,7 +662,7 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
708 */ 662 */
709struct amdgpu_sync { 663struct amdgpu_sync {
710 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; 664 struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
711 struct amdgpu_fence *sync_to[AMDGPU_MAX_RINGS]; 665 struct fence *sync_to[AMDGPU_MAX_RINGS];
712 DECLARE_HASHTABLE(fences, 4); 666 DECLARE_HASHTABLE(fences, 4);
713 struct fence *last_vm_update; 667 struct fence *last_vm_update;
714}; 668};
@@ -905,8 +859,6 @@ struct amdgpu_ring {
905 unsigned ring_size; 859 unsigned ring_size;
906 unsigned ring_free_dw; 860 unsigned ring_free_dw;
907 int count_dw; 861 int count_dw;
908 atomic_t last_rptr;
909 atomic64_t last_activity;
910 uint64_t gpu_addr; 862 uint64_t gpu_addr;
911 uint32_t align_mask; 863 uint32_t align_mask;
912 uint32_t ptr_mask; 864 uint32_t ptr_mask;
@@ -960,9 +912,14 @@ struct amdgpu_ring {
960#define AMDGPU_PTE_FRAG_64KB (4 << 7) 912#define AMDGPU_PTE_FRAG_64KB (4 << 7)
961#define AMDGPU_LOG2_PAGES_PER_FRAG 4 913#define AMDGPU_LOG2_PAGES_PER_FRAG 4
962 914
915/* How to programm VM fault handling */
916#define AMDGPU_VM_FAULT_STOP_NEVER 0
917#define AMDGPU_VM_FAULT_STOP_FIRST 1
918#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
919
963struct amdgpu_vm_pt { 920struct amdgpu_vm_pt {
964 struct amdgpu_bo *bo; 921 struct amdgpu_bo *bo;
965 uint64_t addr; 922 uint64_t addr;
966}; 923};
967 924
968struct amdgpu_vm_id { 925struct amdgpu_vm_id {
@@ -970,13 +927,9 @@ struct amdgpu_vm_id {
970 uint64_t pd_gpu_addr; 927 uint64_t pd_gpu_addr;
971 /* last flushed PD/PT update */ 928 /* last flushed PD/PT update */
972 struct fence *flushed_updates; 929 struct fence *flushed_updates;
973 /* last use of vmid */
974 struct amdgpu_fence *last_id_use;
975}; 930};
976 931
977struct amdgpu_vm { 932struct amdgpu_vm {
978 struct mutex mutex;
979
980 struct rb_root va; 933 struct rb_root va;
981 934
982 /* protecting invalidated */ 935 /* protecting invalidated */
@@ -1001,24 +954,72 @@ struct amdgpu_vm {
1001 954
1002 /* for id and flush management per ring */ 955 /* for id and flush management per ring */
1003 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; 956 struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
957 /* for interval tree */
958 spinlock_t it_lock;
959 /* protecting freed */
960 spinlock_t freed_lock;
1004}; 961};
1005 962
1006struct amdgpu_vm_manager { 963struct amdgpu_vm_manager {
1007 struct amdgpu_fence *active[AMDGPU_NUM_VM]; 964 struct {
1008 uint32_t max_pfn; 965 struct fence *active;
966 atomic_long_t owner;
967 } ids[AMDGPU_NUM_VM];
968
969 uint32_t max_pfn;
1009 /* number of VMIDs */ 970 /* number of VMIDs */
1010 unsigned nvm; 971 unsigned nvm;
1011 /* vram base address for page table entry */ 972 /* vram base address for page table entry */
1012 u64 vram_base_offset; 973 u64 vram_base_offset;
1013 /* is vm enabled? */ 974 /* is vm enabled? */
1014 bool enabled; 975 bool enabled;
1015 /* for hw to save the PD addr on suspend/resume */
1016 uint32_t saved_table_addr[AMDGPU_NUM_VM];
1017 /* vm pte handling */ 976 /* vm pte handling */
1018 const struct amdgpu_vm_pte_funcs *vm_pte_funcs; 977 const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
1019 struct amdgpu_ring *vm_pte_funcs_ring; 978 struct amdgpu_ring *vm_pte_funcs_ring;
1020}; 979};
1021 980
981void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
982int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
983void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
984struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
985 struct amdgpu_vm *vm,
986 struct list_head *head);
987int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
988 struct amdgpu_sync *sync);
989void amdgpu_vm_flush(struct amdgpu_ring *ring,
990 struct amdgpu_vm *vm,
991 struct fence *updates);
992void amdgpu_vm_fence(struct amdgpu_device *adev,
993 struct amdgpu_vm *vm,
994 struct fence *fence);
995uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
996int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
997 struct amdgpu_vm *vm);
998int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
999 struct amdgpu_vm *vm);
1000int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1001 struct amdgpu_sync *sync);
1002int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1003 struct amdgpu_bo_va *bo_va,
1004 struct ttm_mem_reg *mem);
1005void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
1006 struct amdgpu_bo *bo);
1007struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
1008 struct amdgpu_bo *bo);
1009struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1010 struct amdgpu_vm *vm,
1011 struct amdgpu_bo *bo);
1012int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1013 struct amdgpu_bo_va *bo_va,
1014 uint64_t addr, uint64_t offset,
1015 uint64_t size, uint32_t flags);
1016int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1017 struct amdgpu_bo_va *bo_va,
1018 uint64_t addr);
1019void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1020 struct amdgpu_bo_va *bo_va);
1021int amdgpu_vm_free_job(struct amdgpu_job *job);
1022
1022/* 1023/*
1023 * context related structures 1024 * context related structures
1024 */ 1025 */
@@ -1223,8 +1224,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring);
1223void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); 1224void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
1224void amdgpu_ring_undo(struct amdgpu_ring *ring); 1225void amdgpu_ring_undo(struct amdgpu_ring *ring);
1225void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring); 1226void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring);
1226void amdgpu_ring_lockup_update(struct amdgpu_ring *ring);
1227bool amdgpu_ring_test_lockup(struct amdgpu_ring *ring);
1228unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, 1227unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
1229 uint32_t **data); 1228 uint32_t **data);
1230int amdgpu_ring_restore(struct amdgpu_ring *ring, 1229int amdgpu_ring_restore(struct amdgpu_ring *ring,
@@ -1234,6 +1233,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
1234 struct amdgpu_irq_src *irq_src, unsigned irq_type, 1233 struct amdgpu_irq_src *irq_src, unsigned irq_type,
1235 enum amdgpu_ring_type ring_type); 1234 enum amdgpu_ring_type ring_type);
1236void amdgpu_ring_fini(struct amdgpu_ring *ring); 1235void amdgpu_ring_fini(struct amdgpu_ring *ring);
1236struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
1237 1237
1238/* 1238/*
1239 * CS. 1239 * CS.
@@ -1256,6 +1256,7 @@ struct amdgpu_cs_parser {
1256 /* relocations */ 1256 /* relocations */
1257 struct amdgpu_bo_list_entry *vm_bos; 1257 struct amdgpu_bo_list_entry *vm_bos;
1258 struct list_head validated; 1258 struct list_head validated;
1259 struct fence *fence;
1259 1260
1260 struct amdgpu_ib *ibs; 1261 struct amdgpu_ib *ibs;
1261 uint32_t num_ibs; 1262 uint32_t num_ibs;
@@ -1271,7 +1272,7 @@ struct amdgpu_job {
1271 struct amdgpu_device *adev; 1272 struct amdgpu_device *adev;
1272 struct amdgpu_ib *ibs; 1273 struct amdgpu_ib *ibs;
1273 uint32_t num_ibs; 1274 uint32_t num_ibs;
1274 struct mutex job_lock; 1275 void *owner;
1275 struct amdgpu_user_fence uf; 1276 struct amdgpu_user_fence uf;
1276 int (*free_job)(struct amdgpu_job *job); 1277 int (*free_job)(struct amdgpu_job *job);
1277}; 1278};
@@ -1654,6 +1655,7 @@ struct amdgpu_pm {
1654 u8 fan_max_rpm; 1655 u8 fan_max_rpm;
1655 /* dpm */ 1656 /* dpm */
1656 bool dpm_enabled; 1657 bool dpm_enabled;
1658 bool sysfs_initialized;
1657 struct amdgpu_dpm dpm; 1659 struct amdgpu_dpm dpm;
1658 const struct firmware *fw; /* SMC firmware */ 1660 const struct firmware *fw; /* SMC firmware */
1659 uint32_t fw_version; 1661 uint32_t fw_version;
@@ -1708,7 +1710,7 @@ struct amdgpu_vce {
1708/* 1710/*
1709 * SDMA 1711 * SDMA
1710 */ 1712 */
1711struct amdgpu_sdma { 1713struct amdgpu_sdma_instance {
1712 /* SDMA firmware */ 1714 /* SDMA firmware */
1713 const struct firmware *fw; 1715 const struct firmware *fw;
1714 uint32_t fw_version; 1716 uint32_t fw_version;
@@ -1718,6 +1720,13 @@ struct amdgpu_sdma {
1718 bool burst_nop; 1720 bool burst_nop;
1719}; 1721};
1720 1722
1723struct amdgpu_sdma {
1724 struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
1725 struct amdgpu_irq_src trap_irq;
1726 struct amdgpu_irq_src illegal_inst_irq;
1727 int num_instances;
1728};
1729
1721/* 1730/*
1722 * Firmware 1731 * Firmware
1723 */ 1732 */
@@ -1750,11 +1759,11 @@ void amdgpu_test_syncing(struct amdgpu_device *adev);
1750int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); 1759int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
1751void amdgpu_mn_unregister(struct amdgpu_bo *bo); 1760void amdgpu_mn_unregister(struct amdgpu_bo *bo);
1752#else 1761#else
1753static int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) 1762static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
1754{ 1763{
1755 return -ENODEV; 1764 return -ENODEV;
1756} 1765}
1757static void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} 1766static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
1758#endif 1767#endif
1759 1768
1760/* 1769/*
@@ -1946,7 +1955,6 @@ struct amdgpu_device {
1946 struct device *dev; 1955 struct device *dev;
1947 struct drm_device *ddev; 1956 struct drm_device *ddev;
1948 struct pci_dev *pdev; 1957 struct pci_dev *pdev;
1949 struct rw_semaphore exclusive_lock;
1950 1958
1951 /* ASIC */ 1959 /* ASIC */
1952 enum amd_asic_type asic_type; 1960 enum amd_asic_type asic_type;
@@ -1960,7 +1968,6 @@ struct amdgpu_device {
1960 bool suspend; 1968 bool suspend;
1961 bool need_dma32; 1969 bool need_dma32;
1962 bool accel_working; 1970 bool accel_working;
1963 bool needs_reset;
1964 struct work_struct reset_work; 1971 struct work_struct reset_work;
1965 struct notifier_block acpi_nb; 1972 struct notifier_block acpi_nb;
1966 struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; 1973 struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
@@ -2064,9 +2071,7 @@ struct amdgpu_device {
2064 struct amdgpu_gfx gfx; 2071 struct amdgpu_gfx gfx;
2065 2072
2066 /* sdma */ 2073 /* sdma */
2067 struct amdgpu_sdma sdma[AMDGPU_MAX_SDMA_INSTANCES]; 2074 struct amdgpu_sdma sdma;
2068 struct amdgpu_irq_src sdma_trap_irq;
2069 struct amdgpu_irq_src sdma_illegal_inst_irq;
2070 2075
2071 /* uvd */ 2076 /* uvd */
2072 bool has_uvd; 2077 bool has_uvd;
@@ -2203,17 +2208,18 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
2203 ring->ring_free_dw--; 2208 ring->ring_free_dw--;
2204} 2209}
2205 2210
2206static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) 2211static inline struct amdgpu_sdma_instance *
2212amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2207{ 2213{
2208 struct amdgpu_device *adev = ring->adev; 2214 struct amdgpu_device *adev = ring->adev;
2209 int i; 2215 int i;
2210 2216
2211 for (i = 0; i < AMDGPU_MAX_SDMA_INSTANCES; i++) 2217 for (i = 0; i < adev->sdma.num_instances; i++)
2212 if (&adev->sdma[i].ring == ring) 2218 if (&adev->sdma.instance[i].ring == ring)
2213 break; 2219 break;
2214 2220
2215 if (i < AMDGPU_MAX_SDMA_INSTANCES) 2221 if (i < AMDGPU_MAX_SDMA_INSTANCES)
2216 return &adev->sdma[i]; 2222 return &adev->sdma.instance[i];
2217 else 2223 else
2218 return NULL; 2224 return NULL;
2219} 2225}
@@ -2240,7 +2246,6 @@ static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *
2240#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) 2246#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
2241#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) 2247#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
2242#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r)) 2248#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
2243#define amdgpu_ring_is_lockup(r) (r)->funcs->is_lockup((r))
2244#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) 2249#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
2245#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) 2250#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
2246#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) 2251#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@@ -2298,11 +2303,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
2298bool amdgpu_card_posted(struct amdgpu_device *adev); 2303bool amdgpu_card_posted(struct amdgpu_device *adev);
2299void amdgpu_update_display_priority(struct amdgpu_device *adev); 2304void amdgpu_update_display_priority(struct amdgpu_device *adev);
2300bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); 2305bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
2301struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
2302 struct drm_file *filp,
2303 struct amdgpu_ctx *ctx,
2304 struct amdgpu_ib *ibs,
2305 uint32_t num_ibs);
2306 2306
2307int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); 2307int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
2308int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 2308int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2349,10 +2349,10 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev,
2349 struct drm_file *file_priv); 2349 struct drm_file *file_priv);
2350int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon); 2350int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon);
2351int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon); 2351int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
2352u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, int crtc); 2352u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
2353int amdgpu_enable_vblank_kms(struct drm_device *dev, int crtc); 2353int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
2354void amdgpu_disable_vblank_kms(struct drm_device *dev, int crtc); 2354void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
2355int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, 2355int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
2356 int *max_error, 2356 int *max_error,
2357 struct timeval *vblank_time, 2357 struct timeval *vblank_time,
2358 unsigned flags); 2358 unsigned flags);
@@ -2360,49 +2360,6 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
2360 unsigned long arg); 2360 unsigned long arg);
2361 2361
2362/* 2362/*
2363 * vm
2364 */
2365int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
2366void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
2367struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
2368 struct amdgpu_vm *vm,
2369 struct list_head *head);
2370int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
2371 struct amdgpu_sync *sync);
2372void amdgpu_vm_flush(struct amdgpu_ring *ring,
2373 struct amdgpu_vm *vm,
2374 struct fence *updates);
2375void amdgpu_vm_fence(struct amdgpu_device *adev,
2376 struct amdgpu_vm *vm,
2377 struct amdgpu_fence *fence);
2378uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
2379int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
2380 struct amdgpu_vm *vm);
2381int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
2382 struct amdgpu_vm *vm);
2383int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
2384 struct amdgpu_vm *vm, struct amdgpu_sync *sync);
2385int amdgpu_vm_bo_update(struct amdgpu_device *adev,
2386 struct amdgpu_bo_va *bo_va,
2387 struct ttm_mem_reg *mem);
2388void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2389 struct amdgpu_bo *bo);
2390struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
2391 struct amdgpu_bo *bo);
2392struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
2393 struct amdgpu_vm *vm,
2394 struct amdgpu_bo *bo);
2395int amdgpu_vm_bo_map(struct amdgpu_device *adev,
2396 struct amdgpu_bo_va *bo_va,
2397 uint64_t addr, uint64_t offset,
2398 uint64_t size, uint32_t flags);
2399int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2400 struct amdgpu_bo_va *bo_va,
2401 uint64_t addr);
2402void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2403 struct amdgpu_bo_va *bo_va);
2404int amdgpu_vm_free_job(struct amdgpu_job *job);
2405/*
2406 * functions used by amdgpu_encoder.c 2363 * functions used by amdgpu_encoder.c
2407 */ 2364 */
2408struct amdgpu_afmt_acr { 2365struct amdgpu_afmt_acr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index aef4a7aac0f7..a142d5ae148d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -25,7 +25,6 @@
25#include <linux/acpi.h> 25#include <linux/acpi.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/power_supply.h> 27#include <linux/power_supply.h>
28#include <linux/vga_switcheroo.h>
29#include <acpi/video.h> 28#include <acpi/video.h>
30#include <drm/drmP.h> 29#include <drm/drmP.h>
31#include <drm/drm_crtc_helper.h> 30#include <drm/drm_crtc_helper.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index dd2037bc0b4a..0e1376317683 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -649,12 +649,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
649 649
650 case KGD_ENGINE_SDMA1: 650 case KGD_ENGINE_SDMA1:
651 hdr = (const union amdgpu_firmware_header *) 651 hdr = (const union amdgpu_firmware_header *)
652 adev->sdma[0].fw->data; 652 adev->sdma.instance[0].fw->data;
653 break; 653 break;
654 654
655 case KGD_ENGINE_SDMA2: 655 case KGD_ENGINE_SDMA2:
656 hdr = (const union amdgpu_firmware_header *) 656 hdr = (const union amdgpu_firmware_header *)
657 adev->sdma[1].fw->data; 657 adev->sdma.instance[1].fw->data;
658 break; 658 break;
659 659
660 default: 660 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index dfd1d503bccf..79fa5c7de856 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -523,12 +523,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
523 523
524 case KGD_ENGINE_SDMA1: 524 case KGD_ENGINE_SDMA1:
525 hdr = (const union amdgpu_firmware_header *) 525 hdr = (const union amdgpu_firmware_header *)
526 adev->sdma[0].fw->data; 526 adev->sdma.instance[0].fw->data;
527 break; 527 break;
528 528
529 case KGD_ENGINE_SDMA2: 529 case KGD_ENGINE_SDMA2:
530 hdr = (const union amdgpu_firmware_header *) 530 hdr = (const union amdgpu_firmware_header *)
531 adev->sdma[1].fw->data; 531 adev->sdma.instance[1].fw->data;
532 break; 532 break;
533 533
534 default: 534 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 77f1d7c6ea3a..9416e0f5c1db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -672,8 +672,12 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
672 /* disp clock */ 672 /* disp clock */
673 adev->clock.default_dispclk = 673 adev->clock.default_dispclk =
674 le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq); 674 le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
675 if (adev->clock.default_dispclk == 0) 675 /* set a reasonable default for DP */
676 adev->clock.default_dispclk = 54000; /* 540 Mhz */ 676 if (adev->clock.default_dispclk < 53900) {
677 DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
678 adev->clock.default_dispclk / 100);
679 adev->clock.default_dispclk = 60000;
680 }
677 adev->clock.dp_extclk = 681 adev->clock.dp_extclk =
678 le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); 682 le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
679 adev->clock.current_dispclk = adev->clock.default_dispclk; 683 adev->clock.current_dispclk = adev->clock.default_dispclk;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3f7aaa45bf8e..5a8fbadbd27b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -501,7 +501,7 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
501 return VGA_SWITCHEROO_DIS; 501 return VGA_SWITCHEROO_DIS;
502} 502}
503 503
504static struct vga_switcheroo_handler amdgpu_atpx_handler = { 504static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
505 .switchto = amdgpu_atpx_switchto, 505 .switchto = amdgpu_atpx_switchto,
506 .power_state = amdgpu_atpx_power_state, 506 .power_state = amdgpu_atpx_power_state,
507 .init = amdgpu_atpx_init, 507 .init = amdgpu_atpx_init,
@@ -536,7 +536,7 @@ static bool amdgpu_atpx_detect(void)
536 536
537 if (has_atpx && vga_count == 2) { 537 if (has_atpx && vga_count == 2) {
538 acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer); 538 acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer);
539 printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n", 539 printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
540 acpi_method_name); 540 acpi_method_name);
541 amdgpu_atpx_priv.atpx_detected = true; 541 amdgpu_atpx_priv.atpx_detected = true;
542 return true; 542 return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 02add0a508cb..c44c0c6afd1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -29,7 +29,6 @@
29#include "amdgpu.h" 29#include "amdgpu.h"
30#include "atom.h" 30#include "atom.h"
31 31
32#include <linux/vga_switcheroo.h>
33#include <linux/slab.h> 32#include <linux/slab.h>
34#include <linux/acpi.h> 33#include <linux/acpi.h>
35/* 34/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index cb3c274edb0a..4f352ec9dec4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,10 +104,11 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
104 } 104 }
105 break; 105 break;
106 case AMDGPU_HW_IP_DMA: 106 case AMDGPU_HW_IP_DMA:
107 if (ring < 2) { 107 if (ring < adev->sdma.num_instances) {
108 *out_ring = &adev->sdma[ring].ring; 108 *out_ring = &adev->sdma.instance[ring].ring;
109 } else { 109 } else {
110 DRM_ERROR("only two SDMA rings are supported\n"); 110 DRM_ERROR("only %d SDMA rings are supported\n",
111 adev->sdma.num_instances);
111 return -EINVAL; 112 return -EINVAL;
112 } 113 }
113 break; 114 break;
@@ -126,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
126 return 0; 127 return 0;
127} 128}
128 129
129struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
130 struct drm_file *filp,
131 struct amdgpu_ctx *ctx,
132 struct amdgpu_ib *ibs,
133 uint32_t num_ibs)
134{
135 struct amdgpu_cs_parser *parser;
136 int i;
137
138 parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
139 if (!parser)
140 return NULL;
141
142 parser->adev = adev;
143 parser->filp = filp;
144 parser->ctx = ctx;
145 parser->ibs = ibs;
146 parser->num_ibs = num_ibs;
147 for (i = 0; i < num_ibs; i++)
148 ibs[i].ctx = ctx;
149
150 return parser;
151}
152
153int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 130int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
154{ 131{
155 union drm_amdgpu_cs *cs = data; 132 union drm_amdgpu_cs *cs = data;
@@ -177,7 +154,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
177 154
178 /* get chunks */ 155 /* get chunks */
179 INIT_LIST_HEAD(&p->validated); 156 INIT_LIST_HEAD(&p->validated);
180 chunk_array_user = (uint64_t __user *)(cs->in.chunks); 157 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
181 if (copy_from_user(chunk_array, chunk_array_user, 158 if (copy_from_user(chunk_array, chunk_array_user,
182 sizeof(uint64_t)*cs->in.num_chunks)) { 159 sizeof(uint64_t)*cs->in.num_chunks)) {
183 ret = -EFAULT; 160 ret = -EFAULT;
@@ -197,7 +174,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
197 struct drm_amdgpu_cs_chunk user_chunk; 174 struct drm_amdgpu_cs_chunk user_chunk;
198 uint32_t __user *cdata; 175 uint32_t __user *cdata;
199 176
200 chunk_ptr = (void __user *)chunk_array[i]; 177 chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
201 if (copy_from_user(&user_chunk, chunk_ptr, 178 if (copy_from_user(&user_chunk, chunk_ptr,
202 sizeof(struct drm_amdgpu_cs_chunk))) { 179 sizeof(struct drm_amdgpu_cs_chunk))) {
203 ret = -EFAULT; 180 ret = -EFAULT;
@@ -208,7 +185,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
208 p->chunks[i].length_dw = user_chunk.length_dw; 185 p->chunks[i].length_dw = user_chunk.length_dw;
209 186
210 size = p->chunks[i].length_dw; 187 size = p->chunks[i].length_dw;
211 cdata = (void __user *)user_chunk.chunk_data; 188 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
212 p->chunks[i].user_ptr = cdata; 189 p->chunks[i].user_ptr = cdata;
213 190
214 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 191 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
@@ -245,6 +222,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
245 } 222 }
246 223
247 p->uf.bo = gem_to_amdgpu_bo(gobj); 224 p->uf.bo = gem_to_amdgpu_bo(gobj);
225 amdgpu_bo_ref(p->uf.bo);
226 drm_gem_object_unreference_unlocked(gobj);
248 p->uf.offset = fence_data->offset; 227 p->uf.offset = fence_data->offset;
249 } else { 228 } else {
250 ret = -EINVAL; 229 ret = -EINVAL;
@@ -462,8 +441,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
462 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 441 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
463} 442}
464 443
465static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) 444/**
445 * cs_parser_fini() - clean parser states
446 * @parser: parser structure holding parsing context.
447 * @error: error number
448 *
449 * If error is set than unvalidate buffer, otherwise just free memory
450 * used by parsing context.
451 **/
452static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
466{ 453{
454 unsigned i;
455
467 if (!error) { 456 if (!error) {
468 /* Sort the buffer list from the smallest to largest buffer, 457 /* Sort the buffer list from the smallest to largest buffer,
469 * which affects the order of buffers in the LRU list. 458 * which affects the order of buffers in the LRU list.
@@ -478,17 +467,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err
478 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 467 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
479 468
480 ttm_eu_fence_buffer_objects(&parser->ticket, 469 ttm_eu_fence_buffer_objects(&parser->ticket,
481 &parser->validated, 470 &parser->validated,
482 &parser->ibs[parser->num_ibs-1].fence->base); 471 parser->fence);
483 } else if (backoff) { 472 } else if (backoff) {
484 ttm_eu_backoff_reservation(&parser->ticket, 473 ttm_eu_backoff_reservation(&parser->ticket,
485 &parser->validated); 474 &parser->validated);
486 } 475 }
487} 476 fence_put(parser->fence);
488 477
489static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
490{
491 unsigned i;
492 if (parser->ctx) 478 if (parser->ctx)
493 amdgpu_ctx_put(parser->ctx); 479 amdgpu_ctx_put(parser->ctx);
494 if (parser->bo_list) 480 if (parser->bo_list)
@@ -498,31 +484,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
498 for (i = 0; i < parser->nchunks; i++) 484 for (i = 0; i < parser->nchunks; i++)
499 drm_free_large(parser->chunks[i].kdata); 485 drm_free_large(parser->chunks[i].kdata);
500 kfree(parser->chunks); 486 kfree(parser->chunks);
501 if (!amdgpu_enable_scheduler) 487 if (parser->ibs)
502 { 488 for (i = 0; i < parser->num_ibs; i++)
503 if (parser->ibs) 489 amdgpu_ib_free(parser->adev, &parser->ibs[i]);
504 for (i = 0; i < parser->num_ibs; i++) 490 kfree(parser->ibs);
505 amdgpu_ib_free(parser->adev, &parser->ibs[i]); 491 if (parser->uf.bo)
506 kfree(parser->ibs); 492 amdgpu_bo_unref(&parser->uf.bo);
507 if (parser->uf.bo)
508 drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
509 }
510
511 kfree(parser);
512}
513
514/**
515 * cs_parser_fini() - clean parser states
516 * @parser: parser structure holding parsing context.
517 * @error: error number
518 *
519 * If error is set than unvalidate buffer, otherwise just free memory
520 * used by parsing context.
521 **/
522static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
523{
524 amdgpu_cs_parser_fini_early(parser, error, backoff);
525 amdgpu_cs_parser_fini_late(parser);
526} 493}
527 494
528static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 495static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -567,9 +534,24 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
567 if (r) 534 if (r)
568 return r; 535 return r;
569 } 536 }
537
570 } 538 }
571 539
572 return amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); 540 r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync);
541
542 if (amdgpu_vm_debug && p->bo_list) {
543 /* Invalidate all BOs to test for userspace bugs */
544 for (i = 0; i < p->bo_list->num_entries; i++) {
545 /* ignore duplicates */
546 bo = p->bo_list->array[i].robj;
547 if (!bo)
548 continue;
549
550 amdgpu_vm_bo_invalidate(adev, bo);
551 }
552 }
553
554 return r;
573} 555}
574 556
575static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 557static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
@@ -593,18 +575,10 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
593 } 575 }
594 } 576 }
595 577
596 mutex_lock(&vm->mutex);
597 r = amdgpu_bo_vm_update_pte(parser, vm); 578 r = amdgpu_bo_vm_update_pte(parser, vm);
598 if (r) { 579 if (!r)
599 goto out; 580 amdgpu_cs_sync_rings(parser);
600 }
601 amdgpu_cs_sync_rings(parser);
602 if (!amdgpu_enable_scheduler)
603 r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
604 parser->filp);
605 581
606out:
607 mutex_unlock(&vm->mutex);
608 return r; 582 return r;
609} 583}
610 584
@@ -804,7 +778,7 @@ static int amdgpu_cs_free_job(struct amdgpu_job *job)
804 amdgpu_ib_free(job->adev, &job->ibs[i]); 778 amdgpu_ib_free(job->adev, &job->ibs[i]);
805 kfree(job->ibs); 779 kfree(job->ibs);
806 if (job->uf.bo) 780 if (job->uf.bo)
807 drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base); 781 amdgpu_bo_unref(&job->uf.bo);
808 return 0; 782 return 0;
809} 783}
810 784
@@ -812,40 +786,35 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
812{ 786{
813 struct amdgpu_device *adev = dev->dev_private; 787 struct amdgpu_device *adev = dev->dev_private;
814 union drm_amdgpu_cs *cs = data; 788 union drm_amdgpu_cs *cs = data;
815 struct amdgpu_cs_parser *parser; 789 struct amdgpu_cs_parser parser = {};
816 bool reserved_buffers = false; 790 bool reserved_buffers = false;
817 int i, r; 791 int i, r;
818 792
819 down_read(&adev->exclusive_lock); 793 if (!adev->accel_working)
820 if (!adev->accel_working) {
821 up_read(&adev->exclusive_lock);
822 return -EBUSY; 794 return -EBUSY;
823 }
824 795
825 parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); 796 parser.adev = adev;
826 if (!parser) 797 parser.filp = filp;
827 return -ENOMEM; 798
828 r = amdgpu_cs_parser_init(parser, data); 799 r = amdgpu_cs_parser_init(&parser, data);
829 if (r) { 800 if (r) {
830 DRM_ERROR("Failed to initialize parser !\n"); 801 DRM_ERROR("Failed to initialize parser !\n");
831 kfree(parser); 802 amdgpu_cs_parser_fini(&parser, r, false);
832 up_read(&adev->exclusive_lock);
833 r = amdgpu_cs_handle_lockup(adev, r); 803 r = amdgpu_cs_handle_lockup(adev, r);
834 return r; 804 return r;
835 } 805 }
836 806 r = amdgpu_cs_parser_relocs(&parser);
837 r = amdgpu_cs_parser_relocs(parser);
838 if (r == -ENOMEM) 807 if (r == -ENOMEM)
839 DRM_ERROR("Not enough memory for command submission!\n"); 808 DRM_ERROR("Not enough memory for command submission!\n");
840 else if (r && r != -ERESTARTSYS) 809 else if (r && r != -ERESTARTSYS)
841 DRM_ERROR("Failed to process the buffer list %d!\n", r); 810 DRM_ERROR("Failed to process the buffer list %d!\n", r);
842 else if (!r) { 811 else if (!r) {
843 reserved_buffers = true; 812 reserved_buffers = true;
844 r = amdgpu_cs_ib_fill(adev, parser); 813 r = amdgpu_cs_ib_fill(adev, &parser);
845 } 814 }
846 815
847 if (!r) { 816 if (!r) {
848 r = amdgpu_cs_dependencies(adev, parser); 817 r = amdgpu_cs_dependencies(adev, &parser);
849 if (r) 818 if (r)
850 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 819 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
851 } 820 }
@@ -853,61 +822,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
853 if (r) 822 if (r)
854 goto out; 823 goto out;
855 824
856 for (i = 0; i < parser->num_ibs; i++) 825 for (i = 0; i < parser.num_ibs; i++)
857 trace_amdgpu_cs(parser, i); 826 trace_amdgpu_cs(&parser, i);
858 827
859 r = amdgpu_cs_ib_vm_chunk(adev, parser); 828 r = amdgpu_cs_ib_vm_chunk(adev, &parser);
860 if (r) 829 if (r)
861 goto out; 830 goto out;
862 831
863 if (amdgpu_enable_scheduler && parser->num_ibs) { 832 if (amdgpu_enable_scheduler && parser.num_ibs) {
833 struct amdgpu_ring * ring = parser.ibs->ring;
834 struct amd_sched_fence *fence;
864 struct amdgpu_job *job; 835 struct amdgpu_job *job;
865 struct amdgpu_ring * ring = parser->ibs->ring; 836
866 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); 837 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
867 if (!job) 838 if (!job) {
868 return -ENOMEM; 839 r = -ENOMEM;
840 goto out;
841 }
842
869 job->base.sched = &ring->sched; 843 job->base.sched = &ring->sched;
870 job->base.s_entity = &parser->ctx->rings[ring->idx].entity; 844 job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
871 job->adev = parser->adev; 845 job->adev = parser.adev;
872 job->ibs = parser->ibs; 846 job->owner = parser.filp;
873 job->num_ibs = parser->num_ibs; 847 job->free_job = amdgpu_cs_free_job;
874 job->base.owner = parser->filp; 848
875 mutex_init(&job->job_lock); 849 job->ibs = parser.ibs;
850 job->num_ibs = parser.num_ibs;
851 parser.ibs = NULL;
852 parser.num_ibs = 0;
853
876 if (job->ibs[job->num_ibs - 1].user) { 854 if (job->ibs[job->num_ibs - 1].user) {
877 memcpy(&job->uf, &parser->uf, 855 job->uf = parser.uf;
878 sizeof(struct amdgpu_user_fence));
879 job->ibs[job->num_ibs - 1].user = &job->uf; 856 job->ibs[job->num_ibs - 1].user = &job->uf;
857 parser.uf.bo = NULL;
880 } 858 }
881 859
882 job->free_job = amdgpu_cs_free_job; 860 fence = amd_sched_fence_create(job->base.s_entity,
883 mutex_lock(&job->job_lock); 861 parser.filp);
884 r = amd_sched_entity_push_job(&job->base); 862 if (!fence) {
885 if (r) { 863 r = -ENOMEM;
886 mutex_unlock(&job->job_lock);
887 amdgpu_cs_free_job(job); 864 amdgpu_cs_free_job(job);
888 kfree(job); 865 kfree(job);
889 goto out; 866 goto out;
890 } 867 }
891 cs->out.handle = 868 job->base.s_fence = fence;
892 amdgpu_ctx_add_fence(parser->ctx, ring, 869 parser.fence = fence_get(&fence->base);
893 &job->base.s_fence->base);
894 parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
895 870
896 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 871 cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
897 ttm_eu_fence_buffer_objects(&parser->ticket, 872 &fence->base);
898 &parser->validated, 873 job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
899 &job->base.s_fence->base);
900 874
901 mutex_unlock(&job->job_lock); 875 trace_amdgpu_cs_ioctl(job);
902 amdgpu_cs_parser_fini_late(parser); 876 amd_sched_entity_push_job(&job->base);
903 up_read(&adev->exclusive_lock); 877
904 return 0; 878 } else {
879 struct amdgpu_fence *fence;
880
881 r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
882 parser.filp);
883 fence = parser.ibs[parser.num_ibs - 1].fence;
884 parser.fence = fence_get(&fence->base);
885 cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
905 } 886 }
906 887
907 cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
908out: 888out:
909 amdgpu_cs_parser_fini(parser, r, reserved_buffers); 889 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
910 up_read(&adev->exclusive_lock);
911 r = amdgpu_cs_handle_lockup(adev, r); 890 r = amdgpu_cs_handle_lockup(adev, r);
912 return r; 891 return r;
913} 892}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e0b80ccdfe8a..fec65f01c031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -69,6 +69,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
69 struct amdgpu_device *adev = ctx->adev; 69 struct amdgpu_device *adev = ctx->adev;
70 unsigned i, j; 70 unsigned i, j;
71 71
72 if (!adev)
73 return;
74
72 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 75 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
73 for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j) 76 for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
74 fence_put(ctx->rings[i].fences[j]); 77 fence_put(ctx->rings[i].fences[j]);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6068d8207d10..d5b421330145 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -57,6 +57,7 @@ static const char *amdgpu_asic_name[] = {
57 "TONGA", 57 "TONGA",
58 "FIJI", 58 "FIJI",
59 "CARRIZO", 59 "CARRIZO",
60 "STONEY",
60 "LAST", 61 "LAST",
61}; 62};
62 63
@@ -1022,7 +1023,7 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
1022 * amdgpu_switcheroo_set_state - set switcheroo state 1023 * amdgpu_switcheroo_set_state - set switcheroo state
1023 * 1024 *
1024 * @pdev: pci dev pointer 1025 * @pdev: pci dev pointer
1025 * @state: vga switcheroo state 1026 * @state: vga_switcheroo state
1026 * 1027 *
1027 * Callback for the switcheroo driver. Suspends or resumes the 1028 * Callback for the switcheroo driver. Suspends or resumes the
1028 * the asics before or after it is powered up using ACPI methods. 1029 * the asics before or after it is powered up using ACPI methods.
@@ -1165,7 +1166,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
1165 case CHIP_TONGA: 1166 case CHIP_TONGA:
1166 case CHIP_FIJI: 1167 case CHIP_FIJI:
1167 case CHIP_CARRIZO: 1168 case CHIP_CARRIZO:
1168 if (adev->asic_type == CHIP_CARRIZO) 1169 case CHIP_STONEY:
1170 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
1169 adev->family = AMDGPU_FAMILY_CZ; 1171 adev->family = AMDGPU_FAMILY_CZ;
1170 else 1172 else
1171 adev->family = AMDGPU_FAMILY_VI; 1173 adev->family = AMDGPU_FAMILY_VI;
@@ -1418,7 +1420,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
1418 mutex_init(&adev->gfx.gpu_clock_mutex); 1420 mutex_init(&adev->gfx.gpu_clock_mutex);
1419 mutex_init(&adev->srbm_mutex); 1421 mutex_init(&adev->srbm_mutex);
1420 mutex_init(&adev->grbm_idx_mutex); 1422 mutex_init(&adev->grbm_idx_mutex);
1421 init_rwsem(&adev->exclusive_lock);
1422 mutex_init(&adev->mn_lock); 1423 mutex_init(&adev->mn_lock);
1423 hash_init(adev->mn_hash); 1424 hash_init(adev->mn_hash);
1424 1425
@@ -1657,11 +1658,21 @@ int amdgpu_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
1657 } 1658 }
1658 drm_modeset_unlock_all(dev); 1659 drm_modeset_unlock_all(dev);
1659 1660
1660 /* unpin the front buffers */ 1661 /* unpin the front buffers and cursors */
1661 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 1662 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
1663 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1662 struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); 1664 struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
1663 struct amdgpu_bo *robj; 1665 struct amdgpu_bo *robj;
1664 1666
1667 if (amdgpu_crtc->cursor_bo) {
1668 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
1669 r = amdgpu_bo_reserve(aobj, false);
1670 if (r == 0) {
1671 amdgpu_bo_unpin(aobj);
1672 amdgpu_bo_unreserve(aobj);
1673 }
1674 }
1675
1665 if (rfb == NULL || rfb->obj == NULL) { 1676 if (rfb == NULL || rfb->obj == NULL) {
1666 continue; 1677 continue;
1667 } 1678 }
@@ -1713,6 +1724,7 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
1713{ 1724{
1714 struct drm_connector *connector; 1725 struct drm_connector *connector;
1715 struct amdgpu_device *adev = dev->dev_private; 1726 struct amdgpu_device *adev = dev->dev_private;
1727 struct drm_crtc *crtc;
1716 int r; 1728 int r;
1717 1729
1718 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 1730 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
@@ -1746,6 +1758,24 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
1746 if (r) 1758 if (r)
1747 return r; 1759 return r;
1748 1760
1761 /* pin cursors */
1762 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
1763 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
1764
1765 if (amdgpu_crtc->cursor_bo) {
1766 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
1767 r = amdgpu_bo_reserve(aobj, false);
1768 if (r == 0) {
1769 r = amdgpu_bo_pin(aobj,
1770 AMDGPU_GEM_DOMAIN_VRAM,
1771 &amdgpu_crtc->cursor_addr);
1772 if (r != 0)
1773 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
1774 amdgpu_bo_unreserve(aobj);
1775 }
1776 }
1777 }
1778
1749 /* blat the mode back in */ 1779 /* blat the mode back in */
1750 if (fbcon) { 1780 if (fbcon) {
1751 drm_helper_resume_force_mode(dev); 1781 drm_helper_resume_force_mode(dev);
@@ -1785,14 +1815,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
1785 int i, r; 1815 int i, r;
1786 int resched; 1816 int resched;
1787 1817
1788 down_write(&adev->exclusive_lock);
1789
1790 if (!adev->needs_reset) {
1791 up_write(&adev->exclusive_lock);
1792 return 0;
1793 }
1794
1795 adev->needs_reset = false;
1796 atomic_inc(&adev->gpu_reset_counter); 1818 atomic_inc(&adev->gpu_reset_counter);
1797 1819
1798 /* block TTM */ 1820 /* block TTM */
@@ -1856,7 +1878,6 @@ retry:
1856 dev_info(adev->dev, "GPU reset failed\n"); 1878 dev_info(adev->dev, "GPU reset failed\n");
1857 } 1879 }
1858 1880
1859 up_write(&adev->exclusive_lock);
1860 return r; 1881 return r;
1861} 1882}
1862 1883
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index e3d70772b531..5580d3420c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -47,11 +47,8 @@ static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
47 fence = to_amdgpu_fence(*f); 47 fence = to_amdgpu_fence(*f);
48 if (fence) { 48 if (fence) {
49 r = fence_wait(&fence->base, false); 49 r = fence_wait(&fence->base, false);
50 if (r == -EDEADLK) { 50 if (r == -EDEADLK)
51 up_read(&adev->exclusive_lock);
52 r = amdgpu_gpu_reset(adev); 51 r = amdgpu_gpu_reset(adev);
53 down_read(&adev->exclusive_lock);
54 }
55 } else 52 } else
56 r = fence_wait(*f, false); 53 r = fence_wait(*f, false);
57 54
@@ -76,8 +73,9 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
76 struct drm_crtc *crtc = &amdgpuCrtc->base; 73 struct drm_crtc *crtc = &amdgpuCrtc->base;
77 unsigned long flags; 74 unsigned long flags;
78 unsigned i; 75 unsigned i;
76 int vpos, hpos, stat, min_udelay;
77 struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
79 78
80 down_read(&adev->exclusive_lock);
81 amdgpu_flip_wait_fence(adev, &work->excl); 79 amdgpu_flip_wait_fence(adev, &work->excl);
82 for (i = 0; i < work->shared_count; ++i) 80 for (i = 0; i < work->shared_count; ++i)
83 amdgpu_flip_wait_fence(adev, &work->shared[i]); 81 amdgpu_flip_wait_fence(adev, &work->shared[i]);
@@ -85,15 +83,47 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
85 /* We borrow the event spin lock for protecting flip_status */ 83 /* We borrow the event spin lock for protecting flip_status */
86 spin_lock_irqsave(&crtc->dev->event_lock, flags); 84 spin_lock_irqsave(&crtc->dev->event_lock, flags);
87 85
88 /* set the proper interrupt */ 86 /* If this happens to execute within the "virtually extended" vblank
89 amdgpu_irq_get(adev, &adev->pageflip_irq, work->crtc_id); 87 * interval before the start of the real vblank interval then it needs
88 * to delay programming the mmio flip until the real vblank is entered.
89 * This prevents completing a flip too early due to the way we fudge
90 * our vblank counter and vblank timestamps in order to work around the
91 * problem that the hw fires vblank interrupts before actual start of
92 * vblank (when line buffer refilling is done for a frame). It
93 * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for
94 * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts.
95 *
96 * In practice this won't execute very often unless on very fast
97 * machines because the time window for this to happen is very small.
98 */
99 for (;;) {
100 /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
101 * start in hpos, and to the "fudged earlier" vblank start in
102 * vpos.
103 */
104 stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id,
105 GET_DISTANCE_TO_VBLANKSTART,
106 &vpos, &hpos, NULL, NULL,
107 &crtc->hwmode);
108
109 if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
110 (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
111 !(vpos >= 0 && hpos <= 0))
112 break;
113
114 /* Sleep at least until estimated real start of hw vblank */
115 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
116 min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
117 usleep_range(min_udelay, 2 * min_udelay);
118 spin_lock_irqsave(&crtc->dev->event_lock, flags);
119 };
120
90 /* do the flip (mmio) */ 121 /* do the flip (mmio) */
91 adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); 122 adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
92 /* set the flip status */ 123 /* set the flip status */
93 amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; 124 amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
94 125
95 spin_unlock_irqrestore(&crtc->dev->event_lock, flags); 126 spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
96 up_read(&adev->exclusive_lock);
97} 127}
98 128
99/* 129/*
@@ -116,7 +146,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
116 } else 146 } else
117 DRM_ERROR("failed to reserve buffer after flip\n"); 147 DRM_ERROR("failed to reserve buffer after flip\n");
118 148
119 drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); 149 amdgpu_bo_unref(&work->old_rbo);
120 kfree(work->shared); 150 kfree(work->shared);
121 kfree(work); 151 kfree(work);
122} 152}
@@ -155,8 +185,8 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
155 obj = old_amdgpu_fb->obj; 185 obj = old_amdgpu_fb->obj;
156 186
157 /* take a reference to the old object */ 187 /* take a reference to the old object */
158 drm_gem_object_reference(obj);
159 work->old_rbo = gem_to_amdgpu_bo(obj); 188 work->old_rbo = gem_to_amdgpu_bo(obj);
189 amdgpu_bo_ref(work->old_rbo);
160 190
161 new_amdgpu_fb = to_amdgpu_framebuffer(fb); 191 new_amdgpu_fb = to_amdgpu_framebuffer(fb);
162 obj = new_amdgpu_fb->obj; 192 obj = new_amdgpu_fb->obj;
@@ -186,10 +216,6 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
186 goto cleanup; 216 goto cleanup;
187 } 217 }
188 218
189 fence_get(work->excl);
190 for (i = 0; i < work->shared_count; ++i)
191 fence_get(work->shared[i]);
192
193 amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags); 219 amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
194 amdgpu_bo_unreserve(new_rbo); 220 amdgpu_bo_unreserve(new_rbo);
195 221
@@ -233,7 +259,7 @@ pflip_cleanup:
233 amdgpu_bo_unreserve(new_rbo); 259 amdgpu_bo_unreserve(new_rbo);
234 260
235cleanup: 261cleanup:
236 drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); 262 amdgpu_bo_unref(&work->old_rbo);
237 fence_put(work->excl); 263 fence_put(work->excl);
238 for (i = 0; i < work->shared_count; ++i) 264 for (i = 0; i < work->shared_count; ++i)
239 fence_put(work->shared[i]); 265 fence_put(work->shared[i]);
@@ -721,8 +747,17 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
721 * an optional accurate timestamp of when query happened. 747 * an optional accurate timestamp of when query happened.
722 * 748 *
723 * \param dev Device to query. 749 * \param dev Device to query.
724 * \param crtc Crtc to query. 750 * \param pipe Crtc to query.
725 * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0). 751 * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
752 * For driver internal use only also supports these flags:
753 *
754 * USE_REAL_VBLANKSTART to use the real start of vblank instead
755 * of a fudged earlier start of vblank.
756 *
757 * GET_DISTANCE_TO_VBLANKSTART to return distance to the
758 * fudged earlier start of vblank in *vpos and the distance
759 * to true start of vblank in *hpos.
760 *
726 * \param *vpos Location where vertical scanout position should be stored. 761 * \param *vpos Location where vertical scanout position should be stored.
727 * \param *hpos Location where horizontal scanout position should go. 762 * \param *hpos Location where horizontal scanout position should go.
728 * \param *stime Target location for timestamp taken immediately before 763 * \param *stime Target location for timestamp taken immediately before
@@ -744,8 +779,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
744 * unknown small number of scanlines wrt. real scanout position. 779 * unknown small number of scanlines wrt. real scanout position.
745 * 780 *
746 */ 781 */
747int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int flags, 782int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
748 int *vpos, int *hpos, ktime_t *stime, ktime_t *etime) 783 unsigned int flags, int *vpos, int *hpos,
784 ktime_t *stime, ktime_t *etime,
785 const struct drm_display_mode *mode)
749{ 786{
750 u32 vbl = 0, position = 0; 787 u32 vbl = 0, position = 0;
751 int vbl_start, vbl_end, vtotal, ret = 0; 788 int vbl_start, vbl_end, vtotal, ret = 0;
@@ -759,7 +796,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
759 if (stime) 796 if (stime)
760 *stime = ktime_get(); 797 *stime = ktime_get();
761 798
762 if (amdgpu_display_page_flip_get_scanoutpos(adev, crtc, &vbl, &position) == 0) 799 if (amdgpu_display_page_flip_get_scanoutpos(adev, pipe, &vbl, &position) == 0)
763 ret |= DRM_SCANOUTPOS_VALID; 800 ret |= DRM_SCANOUTPOS_VALID;
764 801
765 /* Get optional system timestamp after query. */ 802 /* Get optional system timestamp after query. */
@@ -781,14 +818,44 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
781 } 818 }
782 else { 819 else {
783 /* No: Fake something reasonable which gives at least ok results. */ 820 /* No: Fake something reasonable which gives at least ok results. */
784 vbl_start = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vdisplay; 821 vbl_start = mode->crtc_vdisplay;
785 vbl_end = 0; 822 vbl_end = 0;
786 } 823 }
787 824
825 /* Called from driver internal vblank counter query code? */
826 if (flags & GET_DISTANCE_TO_VBLANKSTART) {
827 /* Caller wants distance from real vbl_start in *hpos */
828 *hpos = *vpos - vbl_start;
829 }
830
831 /* Fudge vblank to start a few scanlines earlier to handle the
832 * problem that vblank irqs fire a few scanlines before start
833 * of vblank. Some driver internal callers need the true vblank
834 * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
835 *
836 * The cause of the "early" vblank irq is that the irq is triggered
837 * by the line buffer logic when the line buffer read position enters
838 * the vblank, whereas our crtc scanout position naturally lags the
839 * line buffer read position.
840 */
841 if (!(flags & USE_REAL_VBLANKSTART))
842 vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
843
788 /* Test scanout position against vblank region. */ 844 /* Test scanout position against vblank region. */
789 if ((*vpos < vbl_start) && (*vpos >= vbl_end)) 845 if ((*vpos < vbl_start) && (*vpos >= vbl_end))
790 in_vbl = false; 846 in_vbl = false;
791 847
848 /* In vblank? */
849 if (in_vbl)
850 ret |= DRM_SCANOUTPOS_IN_VBLANK;
851
852 /* Called from driver internal vblank counter query code? */
853 if (flags & GET_DISTANCE_TO_VBLANKSTART) {
854 /* Caller wants distance from fudged earlier vbl_start */
855 *vpos -= vbl_start;
856 return ret;
857 }
858
792 /* Check if inside vblank area and apply corrective offsets: 859 /* Check if inside vblank area and apply corrective offsets:
793 * vpos will then be >=0 in video scanout area, but negative 860 * vpos will then be >=0 in video scanout area, but negative
794 * within vblank area, counting down the number of lines until 861 * within vblank area, counting down the number of lines until
@@ -797,39 +864,13 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
797 864
798 /* Inside "upper part" of vblank area? Apply corrective offset if so: */ 865 /* Inside "upper part" of vblank area? Apply corrective offset if so: */
799 if (in_vbl && (*vpos >= vbl_start)) { 866 if (in_vbl && (*vpos >= vbl_start)) {
800 vtotal = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vtotal; 867 vtotal = mode->crtc_vtotal;
801 *vpos = *vpos - vtotal; 868 *vpos = *vpos - vtotal;
802 } 869 }
803 870
804 /* Correct for shifted end of vbl at vbl_end. */ 871 /* Correct for shifted end of vbl at vbl_end. */
805 *vpos = *vpos - vbl_end; 872 *vpos = *vpos - vbl_end;
806 873
807 /* In vblank? */
808 if (in_vbl)
809 ret |= DRM_SCANOUTPOS_IN_VBLANK;
810
811 /* Is vpos outside nominal vblank area, but less than
812 * 1/100 of a frame height away from start of vblank?
813 * If so, assume this isn't a massively delayed vblank
814 * interrupt, but a vblank interrupt that fired a few
815 * microseconds before true start of vblank. Compensate
816 * by adding a full frame duration to the final timestamp.
817 * Happens, e.g., on ATI R500, R600.
818 *
819 * We only do this if DRM_CALLED_FROM_VBLIRQ.
820 */
821 if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
822 vbl_start = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vdisplay;
823 vtotal = adev->mode_info.crtcs[crtc]->base.hwmode.crtc_vtotal;
824
825 if (vbl_start - *vpos < vtotal / 100) {
826 *vpos -= vtotal;
827
828 /* Signal this correction as "applied". */
829 ret |= 0x8;
830 }
831 }
832
833 return ret; 874 return ret;
834} 875}
835 876
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index adb48353f2e1..0508c5cd103a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -73,13 +73,15 @@ int amdgpu_hard_reset = 0;
73unsigned amdgpu_ip_block_mask = 0xffffffff; 73unsigned amdgpu_ip_block_mask = 0xffffffff;
74int amdgpu_bapm = -1; 74int amdgpu_bapm = -1;
75int amdgpu_deep_color = 0; 75int amdgpu_deep_color = 0;
76int amdgpu_vm_size = 8; 76int amdgpu_vm_size = 64;
77int amdgpu_vm_block_size = -1; 77int amdgpu_vm_block_size = -1;
78int amdgpu_vm_fault_stop = 0;
79int amdgpu_vm_debug = 0;
78int amdgpu_exp_hw_support = 0; 80int amdgpu_exp_hw_support = 0;
79int amdgpu_enable_scheduler = 0; 81int amdgpu_enable_scheduler = 1;
80int amdgpu_sched_jobs = 16; 82int amdgpu_sched_jobs = 16;
81int amdgpu_sched_hw_submission = 2; 83int amdgpu_sched_hw_submission = 2;
82int amdgpu_enable_semaphores = 1; 84int amdgpu_enable_semaphores = 0;
83 85
84MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 86MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
85module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); 87module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -135,16 +137,22 @@ module_param_named(bapm, amdgpu_bapm, int, 0444);
135MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))"); 137MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
136module_param_named(deep_color, amdgpu_deep_color, int, 0444); 138module_param_named(deep_color, amdgpu_deep_color, int, 0444);
137 139
138MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 8GB)"); 140MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
139module_param_named(vm_size, amdgpu_vm_size, int, 0444); 141module_param_named(vm_size, amdgpu_vm_size, int, 0444);
140 142
141MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); 143MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
142module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); 144module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
143 145
146MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
147module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
148
149MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
150module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
151
144MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); 152MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
145module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); 153module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
146 154
147MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable, 0 = disable ((default))"); 155MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
148module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444); 156module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
149 157
150MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 16)"); 158MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 16)");
@@ -153,7 +161,7 @@ module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
153MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); 161MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
154module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); 162module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
155 163
156MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable (default), 0 = disable)"); 164MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
157module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644); 165module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
158 166
159static struct pci_device_id pciidlist[] = { 167static struct pci_device_id pciidlist[] = {
@@ -242,11 +250,11 @@ static struct pci_device_id pciidlist[] = {
242 {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, 250 {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
243#endif 251#endif
244 /* topaz */ 252 /* topaz */
245 {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, 253 {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
246 {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, 254 {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
247 {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, 255 {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
248 {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, 256 {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
249 {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, 257 {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT},
250 /* tonga */ 258 /* tonga */
251 {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA}, 259 {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
252 {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA}, 260 {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
@@ -265,6 +273,8 @@ static struct pci_device_id pciidlist[] = {
265 {0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU}, 273 {0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
266 {0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU}, 274 {0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
267 {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU}, 275 {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
276 /* stoney */
277 {0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU},
268 278
269 {0, 0, 0} 279 {0, 0, 0}
270}; 280};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 8a122b1b7786..093a8c618931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
207 } 207 }
208 208
209 info->par = rfbdev; 209 info->par = rfbdev;
210 info->skip_vt_switch = true;
210 211
211 ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); 212 ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
212 if (ret) { 213 if (ret) {
@@ -402,3 +403,19 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
402 return true; 403 return true;
403 return false; 404 return false;
404} 405}
406
407void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev)
408{
409 struct amdgpu_fbdev *afbdev = adev->mode_info.rfbdev;
410 struct drm_fb_helper *fb_helper;
411 int ret;
412
413 if (!afbdev)
414 return;
415
416 fb_helper = &afbdev->helper;
417
418 ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
419 if (ret)
420 DRM_DEBUG("failed to restore crtc mode\n");
421}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index b3fc26c59787..3671f9f220bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,6 +47,9 @@
47 * that the the relevant GPU caches have been flushed. 47 * that the the relevant GPU caches have been flushed.
48 */ 48 */
49 49
50static struct kmem_cache *amdgpu_fence_slab;
51static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
52
50/** 53/**
51 * amdgpu_fence_write - write a fence value 54 * amdgpu_fence_write - write a fence value
52 * 55 *
@@ -85,24 +88,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
85} 88}
86 89
87/** 90/**
88 * amdgpu_fence_schedule_check - schedule lockup check
89 *
90 * @ring: pointer to struct amdgpu_ring
91 *
92 * Queues a delayed work item to check for lockups.
93 */
94static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
95{
96 /*
97 * Do not reset the timer here with mod_delayed_work,
98 * this can livelock in an interaction with TTM delayed destroy.
99 */
100 queue_delayed_work(system_power_efficient_wq,
101 &ring->fence_drv.lockup_work,
102 AMDGPU_FENCE_JIFFIES_TIMEOUT);
103}
104
105/**
106 * amdgpu_fence_emit - emit a fence on the requested ring 91 * amdgpu_fence_emit - emit a fence on the requested ring
107 * 92 *
108 * @ring: ring the fence is associated with 93 * @ring: ring the fence is associated with
@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
118 struct amdgpu_device *adev = ring->adev; 103 struct amdgpu_device *adev = ring->adev;
119 104
120 /* we are protected by the ring emission mutex */ 105 /* we are protected by the ring emission mutex */
121 *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); 106 *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
122 if ((*fence) == NULL) { 107 if ((*fence) == NULL) {
123 return -ENOMEM; 108 return -ENOMEM;
124 } 109 }
@@ -132,44 +117,20 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
132 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 117 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
133 (*fence)->seq, 118 (*fence)->seq,
134 AMDGPU_FENCE_FLAG_INT); 119 AMDGPU_FENCE_FLAG_INT);
135 trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
136 return 0; 120 return 0;
137} 121}
138 122
139/** 123/**
140 * amdgpu_fence_check_signaled - callback from fence_queue 124 * amdgpu_fence_schedule_fallback - schedule fallback check
141 * 125 *
142 * this function is called with fence_queue lock held, which is also used 126 * @ring: pointer to struct amdgpu_ring
143 * for the fence locking itself, so unlocked variants are used for 127 *
144 * fence_signal, and remove_wait_queue. 128 * Start a timer as fallback to our interrupts.
145 */ 129 */
146static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) 130static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
147{ 131{
148 struct amdgpu_fence *fence; 132 mod_timer(&ring->fence_drv.fallback_timer,
149 struct amdgpu_device *adev; 133 jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
150 u64 seq;
151 int ret;
152
153 fence = container_of(wait, struct amdgpu_fence, fence_wake);
154 adev = fence->ring->adev;
155
156 /*
157 * We cannot use amdgpu_fence_process here because we're already
158 * in the waitqueue, in a call from wake_up_all.
159 */
160 seq = atomic64_read(&fence->ring->fence_drv.last_seq);
161 if (seq >= fence->seq) {
162 ret = fence_signal_locked(&fence->base);
163 if (!ret)
164 FENCE_TRACE(&fence->base, "signaled from irq context\n");
165 else
166 FENCE_TRACE(&fence->base, "was already signaled\n");
167
168 __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
169 fence_put(&fence->base);
170 } else
171 FENCE_TRACE(&fence->base, "pending\n");
172 return 0;
173} 134}
174 135
175/** 136/**
@@ -238,52 +199,12 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
238 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); 199 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
239 200
240 if (seq < last_emitted) 201 if (seq < last_emitted)
241 amdgpu_fence_schedule_check(ring); 202 amdgpu_fence_schedule_fallback(ring);
242 203
243 return wake; 204 return wake;
244} 205}
245 206
246/** 207/**
247 * amdgpu_fence_check_lockup - check for hardware lockup
248 *
249 * @work: delayed work item
250 *
251 * Checks for fence activity and if there is none probe
252 * the hardware if a lockup occured.
253 */
254static void amdgpu_fence_check_lockup(struct work_struct *work)
255{
256 struct amdgpu_fence_driver *fence_drv;
257 struct amdgpu_ring *ring;
258
259 fence_drv = container_of(work, struct amdgpu_fence_driver,
260 lockup_work.work);
261 ring = fence_drv->ring;
262
263 if (!down_read_trylock(&ring->adev->exclusive_lock)) {
264 /* just reschedule the check if a reset is going on */
265 amdgpu_fence_schedule_check(ring);
266 return;
267 }
268
269 if (amdgpu_fence_activity(ring)) {
270 wake_up_all(&ring->fence_drv.fence_queue);
271 }
272 else if (amdgpu_ring_is_lockup(ring)) {
273 /* good news we believe it's a lockup */
274 dev_warn(ring->adev->dev, "GPU lockup (current fence id "
275 "0x%016llx last fence id 0x%016llx on ring %d)\n",
276 (uint64_t)atomic64_read(&fence_drv->last_seq),
277 fence_drv->sync_seq[ring->idx], ring->idx);
278
279 /* remember that we need an reset */
280 ring->adev->needs_reset = true;
281 wake_up_all(&ring->fence_drv.fence_queue);
282 }
283 up_read(&ring->adev->exclusive_lock);
284}
285
286/**
287 * amdgpu_fence_process - process a fence 208 * amdgpu_fence_process - process a fence
288 * 209 *
289 * @adev: amdgpu_device pointer 210 * @adev: amdgpu_device pointer
@@ -299,6 +220,20 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
299} 220}
300 221
301/** 222/**
223 * amdgpu_fence_fallback - fallback for hardware interrupts
224 *
225 * @work: delayed work item
226 *
227 * Checks for fence activity.
228 */
229static void amdgpu_fence_fallback(unsigned long arg)
230{
231 struct amdgpu_ring *ring = (void *)arg;
232
233 amdgpu_fence_process(ring);
234}
235
236/**
302 * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled 237 * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
303 * 238 *
304 * @ring: ring the fence is associated with 239 * @ring: ring the fence is associated with
@@ -324,50 +259,6 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
324 return false; 259 return false;
325} 260}
326 261
327static bool amdgpu_fence_is_signaled(struct fence *f)
328{
329 struct amdgpu_fence *fence = to_amdgpu_fence(f);
330 struct amdgpu_ring *ring = fence->ring;
331 struct amdgpu_device *adev = ring->adev;
332
333 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
334 return true;
335
336 if (down_read_trylock(&adev->exclusive_lock)) {
337 amdgpu_fence_process(ring);
338 up_read(&adev->exclusive_lock);
339
340 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
341 return true;
342 }
343 return false;
344}
345
346/**
347 * amdgpu_fence_enable_signaling - enable signalling on fence
348 * @fence: fence
349 *
350 * This function is called with fence_queue lock held, and adds a callback
351 * to fence_queue that checks if this fence is signaled, and if so it
352 * signals the fence and removes itself.
353 */
354static bool amdgpu_fence_enable_signaling(struct fence *f)
355{
356 struct amdgpu_fence *fence = to_amdgpu_fence(f);
357 struct amdgpu_ring *ring = fence->ring;
358
359 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
360 return false;
361
362 fence->fence_wake.flags = 0;
363 fence->fence_wake.private = NULL;
364 fence->fence_wake.func = amdgpu_fence_check_signaled;
365 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
366 fence_get(f);
367 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
368 return true;
369}
370
371/* 262/*
372 * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal 263 * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
373 * @ring: ring to wait on for the seq number 264 * @ring: ring to wait on for the seq number
@@ -380,7 +271,6 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
380 */ 271 */
381static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) 272static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
382{ 273{
383 struct amdgpu_device *adev = ring->adev;
384 bool signaled = false; 274 bool signaled = false;
385 275
386 BUG_ON(!ring); 276 BUG_ON(!ring);
@@ -390,9 +280,9 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
390 if (atomic64_read(&ring->fence_drv.last_seq) >= seq) 280 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
391 return 0; 281 return 0;
392 282
283 amdgpu_fence_schedule_fallback(ring);
393 wait_event(ring->fence_drv.fence_queue, ( 284 wait_event(ring->fence_drv.fence_queue, (
394 (signaled = amdgpu_fence_seq_signaled(ring, seq)) 285 (signaled = amdgpu_fence_seq_signaled(ring, seq))));
395 || adev->needs_reset));
396 286
397 if (signaled) 287 if (signaled)
398 return 0; 288 return 0;
@@ -441,36 +331,6 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
441} 331}
442 332
443/** 333/**
444 * amdgpu_fence_ref - take a ref on a fence
445 *
446 * @fence: amdgpu fence object
447 *
448 * Take a reference on a fence (all asics).
449 * Returns the fence.
450 */
451struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence)
452{
453 fence_get(&fence->base);
454 return fence;
455}
456
457/**
458 * amdgpu_fence_unref - remove a ref on a fence
459 *
460 * @fence: amdgpu fence object
461 *
462 * Remove a reference on a fence (all asics).
463 */
464void amdgpu_fence_unref(struct amdgpu_fence **fence)
465{
466 struct amdgpu_fence *tmp = *fence;
467
468 *fence = NULL;
469 if (tmp)
470 fence_put(&tmp->base);
471}
472
473/**
474 * amdgpu_fence_count_emitted - get the count of emitted fences 334 * amdgpu_fence_count_emitted - get the count of emitted fences
475 * 335 *
476 * @ring: ring the fence is associated with 336 * @ring: ring the fence is associated with
@@ -621,15 +481,26 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
621 atomic64_set(&ring->fence_drv.last_seq, 0); 481 atomic64_set(&ring->fence_drv.last_seq, 0);
622 ring->fence_drv.initialized = false; 482 ring->fence_drv.initialized = false;
623 483
624 INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, 484 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
625 amdgpu_fence_check_lockup); 485 (unsigned long)ring);
626 ring->fence_drv.ring = ring;
627 486
628 init_waitqueue_head(&ring->fence_drv.fence_queue); 487 init_waitqueue_head(&ring->fence_drv.fence_queue);
629 488
630 if (amdgpu_enable_scheduler) { 489 if (amdgpu_enable_scheduler) {
490 long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
491 if (timeout == 0) {
492 /*
493 * FIXME:
494 * Delayed workqueue cannot use it directly,
495 * so the scheduler will not use delayed workqueue if
496 * MAX_SCHEDULE_TIMEOUT is set.
497 * Currently keep it simple and silly.
498 */
499 timeout = MAX_SCHEDULE_TIMEOUT;
500 }
631 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, 501 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
632 amdgpu_sched_hw_submission, ring->name); 502 amdgpu_sched_hw_submission,
503 timeout, ring->name);
633 if (r) { 504 if (r) {
634 DRM_ERROR("Failed to create scheduler on ring %s.\n", 505 DRM_ERROR("Failed to create scheduler on ring %s.\n",
635 ring->name); 506 ring->name);
@@ -654,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
654 */ 525 */
655int amdgpu_fence_driver_init(struct amdgpu_device *adev) 526int amdgpu_fence_driver_init(struct amdgpu_device *adev)
656{ 527{
528 if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
529 amdgpu_fence_slab = kmem_cache_create(
530 "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
531 SLAB_HWCACHE_ALIGN, NULL);
532 if (!amdgpu_fence_slab)
533 return -ENOMEM;
534 }
657 if (amdgpu_debugfs_fence_init(adev)) 535 if (amdgpu_debugfs_fence_init(adev))
658 dev_err(adev->dev, "fence debugfs file creation failed\n"); 536 dev_err(adev->dev, "fence debugfs file creation failed\n");
659 537
@@ -672,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
672{ 550{
673 int i, r; 551 int i, r;
674 552
553 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
554 kmem_cache_destroy(amdgpu_fence_slab);
675 mutex_lock(&adev->ring_lock); 555 mutex_lock(&adev->ring_lock);
676 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 556 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
677 struct amdgpu_ring *ring = adev->rings[i]; 557 struct amdgpu_ring *ring = adev->rings[i];
558
678 if (!ring || !ring->fence_drv.initialized) 559 if (!ring || !ring->fence_drv.initialized)
679 continue; 560 continue;
680 r = amdgpu_fence_wait_empty(ring); 561 r = amdgpu_fence_wait_empty(ring);
@@ -686,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
686 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 567 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
687 ring->fence_drv.irq_type); 568 ring->fence_drv.irq_type);
688 amd_sched_fini(&ring->sched); 569 amd_sched_fini(&ring->sched);
570 del_timer_sync(&ring->fence_drv.fallback_timer);
689 ring->fence_drv.initialized = false; 571 ring->fence_drv.initialized = false;
690 } 572 }
691 mutex_unlock(&adev->ring_lock); 573 mutex_unlock(&adev->ring_lock);
@@ -773,6 +655,122 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
773 } 655 }
774} 656}
775 657
658/*
659 * Common fence implementation
660 */
661
662static const char *amdgpu_fence_get_driver_name(struct fence *fence)
663{
664 return "amdgpu";
665}
666
667static const char *amdgpu_fence_get_timeline_name(struct fence *f)
668{
669 struct amdgpu_fence *fence = to_amdgpu_fence(f);
670 return (const char *)fence->ring->name;
671}
672
673/**
674 * amdgpu_fence_is_signaled - test if fence is signaled
675 *
676 * @f: fence to test
677 *
678 * Test the fence sequence number if it is already signaled. If it isn't
679 * signaled start fence processing. Returns True if the fence is signaled.
680 */
681static bool amdgpu_fence_is_signaled(struct fence *f)
682{
683 struct amdgpu_fence *fence = to_amdgpu_fence(f);
684 struct amdgpu_ring *ring = fence->ring;
685
686 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
687 return true;
688
689 amdgpu_fence_process(ring);
690
691 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
692 return true;
693
694 return false;
695}
696
697/**
698 * amdgpu_fence_check_signaled - callback from fence_queue
699 *
700 * this function is called with fence_queue lock held, which is also used
701 * for the fence locking itself, so unlocked variants are used for
702 * fence_signal, and remove_wait_queue.
703 */
704static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
705{
706 struct amdgpu_fence *fence;
707 struct amdgpu_device *adev;
708 u64 seq;
709 int ret;
710
711 fence = container_of(wait, struct amdgpu_fence, fence_wake);
712 adev = fence->ring->adev;
713
714 /*
715 * We cannot use amdgpu_fence_process here because we're already
716 * in the waitqueue, in a call from wake_up_all.
717 */
718 seq = atomic64_read(&fence->ring->fence_drv.last_seq);
719 if (seq >= fence->seq) {
720 ret = fence_signal_locked(&fence->base);
721 if (!ret)
722 FENCE_TRACE(&fence->base, "signaled from irq context\n");
723 else
724 FENCE_TRACE(&fence->base, "was already signaled\n");
725
726 __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
727 fence_put(&fence->base);
728 } else
729 FENCE_TRACE(&fence->base, "pending\n");
730 return 0;
731}
732
733/**
734 * amdgpu_fence_enable_signaling - enable signalling on fence
735 * @fence: fence
736 *
737 * This function is called with fence_queue lock held, and adds a callback
738 * to fence_queue that checks if this fence is signaled, and if so it
739 * signals the fence and removes itself.
740 */
741static bool amdgpu_fence_enable_signaling(struct fence *f)
742{
743 struct amdgpu_fence *fence = to_amdgpu_fence(f);
744 struct amdgpu_ring *ring = fence->ring;
745
746 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
747 return false;
748
749 fence->fence_wake.flags = 0;
750 fence->fence_wake.private = NULL;
751 fence->fence_wake.func = amdgpu_fence_check_signaled;
752 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
753 fence_get(f);
754 if (!timer_pending(&ring->fence_drv.fallback_timer))
755 amdgpu_fence_schedule_fallback(ring);
756 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
757 return true;
758}
759
760static void amdgpu_fence_release(struct fence *f)
761{
762 struct amdgpu_fence *fence = to_amdgpu_fence(f);
763 kmem_cache_free(amdgpu_fence_slab, fence);
764}
765
766const struct fence_ops amdgpu_fence_ops = {
767 .get_driver_name = amdgpu_fence_get_driver_name,
768 .get_timeline_name = amdgpu_fence_get_timeline_name,
769 .enable_signaling = amdgpu_fence_enable_signaling,
770 .signaled = amdgpu_fence_is_signaled,
771 .wait = fence_default_wait,
772 .release = amdgpu_fence_release,
773};
776 774
777/* 775/*
778 * Fence debugfs 776 * Fence debugfs
@@ -823,141 +821,3 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
823#endif 821#endif
824} 822}
825 823
826static const char *amdgpu_fence_get_driver_name(struct fence *fence)
827{
828 return "amdgpu";
829}
830
831static const char *amdgpu_fence_get_timeline_name(struct fence *f)
832{
833 struct amdgpu_fence *fence = to_amdgpu_fence(f);
834 return (const char *)fence->ring->name;
835}
836
837static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
838{
839 return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
840}
841
842static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
843{
844 int idx;
845 struct fence *fence;
846
847 for (idx = 0; idx < count; ++idx) {
848 fence = fences[idx];
849 if (fence) {
850 if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
851 return true;
852 }
853 }
854 return false;
855}
856
857struct amdgpu_wait_cb {
858 struct fence_cb base;
859 struct task_struct *task;
860};
861
862static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
863{
864 struct amdgpu_wait_cb *wait =
865 container_of(cb, struct amdgpu_wait_cb, base);
866 wake_up_process(wait->task);
867}
868
869static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
870 signed long t)
871{
872 struct amdgpu_fence *fence = to_amdgpu_fence(f);
873 struct amdgpu_device *adev = fence->ring->adev;
874
875 return amdgpu_fence_wait_any(adev, &f, 1, intr, t);
876}
877
878/**
879 * Wait the fence array with timeout
880 *
881 * @adev: amdgpu device
882 * @array: the fence array with amdgpu fence pointer
883 * @count: the number of the fence array
884 * @intr: when sleep, set the current task interruptable or not
885 * @t: timeout to wait
886 *
887 * It will return when any fence is signaled or timeout.
888 */
889signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
890 struct fence **array, uint32_t count,
891 bool intr, signed long t)
892{
893 struct amdgpu_wait_cb *cb;
894 struct fence *fence;
895 unsigned idx;
896
897 BUG_ON(!array);
898
899 cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
900 if (cb == NULL) {
901 t = -ENOMEM;
902 goto err_free_cb;
903 }
904
905 for (idx = 0; idx < count; ++idx) {
906 fence = array[idx];
907 if (fence) {
908 cb[idx].task = current;
909 if (fence_add_callback(fence,
910 &cb[idx].base, amdgpu_fence_wait_cb)) {
911 /* The fence is already signaled */
912 goto fence_rm_cb;
913 }
914 }
915 }
916
917 while (t > 0) {
918 if (intr)
919 set_current_state(TASK_INTERRUPTIBLE);
920 else
921 set_current_state(TASK_UNINTERRUPTIBLE);
922
923 /*
924 * amdgpu_test_signaled_any must be called after
925 * set_current_state to prevent a race with wake_up_process
926 */
927 if (amdgpu_test_signaled_any(array, count))
928 break;
929
930 if (adev->needs_reset) {
931 t = -EDEADLK;
932 break;
933 }
934
935 t = schedule_timeout(t);
936
937 if (t > 0 && intr && signal_pending(current))
938 t = -ERESTARTSYS;
939 }
940
941 __set_current_state(TASK_RUNNING);
942
943fence_rm_cb:
944 for (idx = 0; idx < count; ++idx) {
945 fence = array[idx];
946 if (fence && cb[idx].base.func)
947 fence_remove_callback(fence, &cb[idx].base);
948 }
949
950err_free_cb:
951 kfree(cb);
952
953 return t;
954}
955
956const struct fence_ops amdgpu_fence_ops = {
957 .get_driver_name = amdgpu_fence_get_driver_name,
958 .get_timeline_name = amdgpu_fence_get_timeline_name,
959 .enable_signaling = amdgpu_fence_enable_signaling,
960 .signaled = amdgpu_fence_is_signaled,
961 .wait = amdgpu_fence_default_wait,
962 .release = NULL,
963};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7297ca3a0ba7..9c253c535d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -115,11 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
115 struct amdgpu_vm *vm = &fpriv->vm; 115 struct amdgpu_vm *vm = &fpriv->vm;
116 struct amdgpu_bo_va *bo_va; 116 struct amdgpu_bo_va *bo_va;
117 int r; 117 int r;
118
119 r = amdgpu_bo_reserve(rbo, false); 118 r = amdgpu_bo_reserve(rbo, false);
120 if (r) { 119 if (r)
121 return r; 120 return r;
122 }
123 121
124 bo_va = amdgpu_vm_bo_find(vm, rbo); 122 bo_va = amdgpu_vm_bo_find(vm, rbo);
125 if (!bo_va) { 123 if (!bo_va) {
@@ -128,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
128 ++bo_va->ref_count; 126 ++bo_va->ref_count;
129 } 127 }
130 amdgpu_bo_unreserve(rbo); 128 amdgpu_bo_unreserve(rbo);
131
132 return 0; 129 return 0;
133} 130}
134 131
@@ -141,7 +138,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
141 struct amdgpu_vm *vm = &fpriv->vm; 138 struct amdgpu_vm *vm = &fpriv->vm;
142 struct amdgpu_bo_va *bo_va; 139 struct amdgpu_bo_va *bo_va;
143 int r; 140 int r;
144
145 r = amdgpu_bo_reserve(rbo, true); 141 r = amdgpu_bo_reserve(rbo, true);
146 if (r) { 142 if (r) {
147 dev_err(adev->dev, "leaking bo va because " 143 dev_err(adev->dev, "leaking bo va because "
@@ -181,7 +177,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
181 bool kernel = false; 177 bool kernel = false;
182 int r; 178 int r;
183 179
184 down_read(&adev->exclusive_lock);
185 /* create a gem object to contain this object in */ 180 /* create a gem object to contain this object in */
186 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | 181 if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
187 AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { 182 AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
@@ -214,11 +209,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
214 209
215 memset(args, 0, sizeof(*args)); 210 memset(args, 0, sizeof(*args));
216 args->out.handle = handle; 211 args->out.handle = handle;
217 up_read(&adev->exclusive_lock);
218 return 0; 212 return 0;
219 213
220error_unlock: 214error_unlock:
221 up_read(&adev->exclusive_lock);
222 r = amdgpu_gem_handle_lockup(adev, r); 215 r = amdgpu_gem_handle_lockup(adev, r);
223 return r; 216 return r;
224} 217}
@@ -242,16 +235,15 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
242 AMDGPU_GEM_USERPTR_REGISTER)) 235 AMDGPU_GEM_USERPTR_REGISTER))
243 return -EINVAL; 236 return -EINVAL;
244 237
245 if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) || 238 if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
246 !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) { 239 !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
240 !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
247 241
248 /* if we want to write to it we must require anonymous 242 /* if we want to write to it we must require anonymous
249 memory and install a MMU notifier */ 243 memory and install a MMU notifier */
250 return -EACCES; 244 return -EACCES;
251 } 245 }
252 246
253 down_read(&adev->exclusive_lock);
254
255 /* create a gem object to contain this object in */ 247 /* create a gem object to contain this object in */
256 r = amdgpu_gem_object_create(adev, args->size, 0, 248 r = amdgpu_gem_object_create(adev, args->size, 0,
257 AMDGPU_GEM_DOMAIN_CPU, 0, 249 AMDGPU_GEM_DOMAIN_CPU, 0,
@@ -293,14 +285,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
293 goto handle_lockup; 285 goto handle_lockup;
294 286
295 args->handle = handle; 287 args->handle = handle;
296 up_read(&adev->exclusive_lock);
297 return 0; 288 return 0;
298 289
299release_object: 290release_object:
300 drm_gem_object_unreference_unlocked(gobj); 291 drm_gem_object_unreference_unlocked(gobj);
301 292
302handle_lockup: 293handle_lockup:
303 up_read(&adev->exclusive_lock);
304 r = amdgpu_gem_handle_lockup(adev, r); 294 r = amdgpu_gem_handle_lockup(adev, r);
305 295
306 return r; 296 return r;
@@ -487,19 +477,25 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
487 if (domain == AMDGPU_GEM_DOMAIN_CPU) 477 if (domain == AMDGPU_GEM_DOMAIN_CPU)
488 goto error_unreserve; 478 goto error_unreserve;
489 } 479 }
480 list_for_each_entry(entry, &duplicates, head) {
481 domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
482 /* if anything is swapped out don't swap it in here,
483 just abort and wait for the next CS */
484 if (domain == AMDGPU_GEM_DOMAIN_CPU)
485 goto error_unreserve;
486 }
490 487
491 mutex_lock(&bo_va->vm->mutex); 488 r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
492 r = amdgpu_vm_clear_freed(adev, bo_va->vm);
493 if (r) 489 if (r)
494 goto error_unlock; 490 goto error_unreserve;
495 491
492 r = amdgpu_vm_clear_freed(adev, bo_va->vm);
493 if (r)
494 goto error_unreserve;
496 495
497 if (operation == AMDGPU_VA_OP_MAP) 496 if (operation == AMDGPU_VA_OP_MAP)
498 r = amdgpu_vm_bo_update(adev, bo_va, &bo_va->bo->tbo.mem); 497 r = amdgpu_vm_bo_update(adev, bo_va, &bo_va->bo->tbo.mem);
499 498
500error_unlock:
501 mutex_unlock(&bo_va->vm->mutex);
502
503error_unreserve: 499error_unreserve:
504 ttm_eu_backoff_reservation(&ticket, &list); 500 ttm_eu_backoff_reservation(&ticket, &list);
505 501
@@ -521,6 +517,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
521 struct amdgpu_fpriv *fpriv = filp->driver_priv; 517 struct amdgpu_fpriv *fpriv = filp->driver_priv;
522 struct amdgpu_bo *rbo; 518 struct amdgpu_bo *rbo;
523 struct amdgpu_bo_va *bo_va; 519 struct amdgpu_bo_va *bo_va;
520 struct ttm_validate_buffer tv, tv_pd;
521 struct ww_acquire_ctx ticket;
522 struct list_head list, duplicates;
524 uint32_t invalid_flags, va_flags = 0; 523 uint32_t invalid_flags, va_flags = 0;
525 int r = 0; 524 int r = 0;
526 525
@@ -556,9 +555,19 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
556 gobj = drm_gem_object_lookup(dev, filp, args->handle); 555 gobj = drm_gem_object_lookup(dev, filp, args->handle);
557 if (gobj == NULL) 556 if (gobj == NULL)
558 return -ENOENT; 557 return -ENOENT;
559
560 rbo = gem_to_amdgpu_bo(gobj); 558 rbo = gem_to_amdgpu_bo(gobj);
561 r = amdgpu_bo_reserve(rbo, false); 559 INIT_LIST_HEAD(&list);
560 INIT_LIST_HEAD(&duplicates);
561 tv.bo = &rbo->tbo;
562 tv.shared = true;
563 list_add(&tv.head, &list);
564
565 if (args->operation == AMDGPU_VA_OP_MAP) {
566 tv_pd.bo = &fpriv->vm.page_directory->tbo;
567 tv_pd.shared = true;
568 list_add(&tv_pd.head, &list);
569 }
570 r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
562 if (r) { 571 if (r) {
563 drm_gem_object_unreference_unlocked(gobj); 572 drm_gem_object_unreference_unlocked(gobj);
564 return r; 573 return r;
@@ -566,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
566 575
567 bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); 576 bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
568 if (!bo_va) { 577 if (!bo_va) {
569 amdgpu_bo_unreserve(rbo); 578 ttm_eu_backoff_reservation(&ticket, &list);
579 drm_gem_object_unreference_unlocked(gobj);
570 return -ENOENT; 580 return -ENOENT;
571 } 581 }
572 582
@@ -588,7 +598,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
588 default: 598 default:
589 break; 599 break;
590 } 600 }
591 601 ttm_eu_backoff_reservation(&ticket, &list);
592 if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) 602 if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
593 amdgpu_gem_va_update_vm(adev, bo_va, args->operation); 603 amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
594 604
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index c439735ee670..9e25edafa721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
62 int r; 62 int r;
63 63
64 if (size) { 64 if (size) {
65 r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, 65 r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
66 &ib->sa_bo, size, 256); 66 &ib->sa_bo, size, 256);
67 if (r) { 67 if (r) {
68 dev_err(adev->dev, "failed to get a new IB (%d)\n", r); 68 dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -95,7 +95,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
95{ 95{
96 amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); 96 amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
97 amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); 97 amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
98 amdgpu_fence_unref(&ib->fence); 98 if (ib->fence)
99 fence_put(&ib->fence->base);
99} 100}
100 101
101/** 102/**
@@ -215,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
215 } 216 }
216 217
217 if (ib->vm) 218 if (ib->vm)
218 amdgpu_vm_fence(adev, ib->vm, ib->fence); 219 amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
219 220
220 amdgpu_ring_unlock_commit(ring); 221 amdgpu_ring_unlock_commit(ring);
221 return 0; 222 return 0;
@@ -298,7 +299,6 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
298 r = amdgpu_ring_test_ib(ring); 299 r = amdgpu_ring_test_ib(ring);
299 if (r) { 300 if (r) {
300 ring->ready = false; 301 ring->ready = false;
301 adev->needs_reset = false;
302 302
303 if (ring == &adev->gfx.gfx_ring[0]) { 303 if (ring == &adev->gfx.gfx_ring[0]) {
304 /* oh, oh, that's really bad */ 304 /* oh, oh, that's really bad */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 8c735f544b66..e23843f4d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -218,8 +218,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
218 break; 218 break;
219 case AMDGPU_HW_IP_DMA: 219 case AMDGPU_HW_IP_DMA:
220 type = AMD_IP_BLOCK_TYPE_SDMA; 220 type = AMD_IP_BLOCK_TYPE_SDMA;
221 ring_mask = adev->sdma[0].ring.ready ? 1 : 0; 221 for (i = 0; i < adev->sdma.num_instances; i++)
222 ring_mask |= ((adev->sdma[1].ring.ready ? 1 : 0) << 1); 222 ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i);
223 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; 223 ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
224 ib_size_alignment = 1; 224 ib_size_alignment = 1;
225 break; 225 break;
@@ -341,10 +341,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
341 fw_info.feature = 0; 341 fw_info.feature = 0;
342 break; 342 break;
343 case AMDGPU_INFO_FW_SDMA: 343 case AMDGPU_INFO_FW_SDMA:
344 if (info->query_fw.index >= 2) 344 if (info->query_fw.index >= adev->sdma.num_instances)
345 return -EINVAL; 345 return -EINVAL;
346 fw_info.ver = adev->sdma[info->query_fw.index].fw_version; 346 fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
347 fw_info.feature = adev->sdma[info->query_fw.index].feature_version; 347 fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
348 break; 348 break;
349 default: 349 default:
350 return -EINVAL; 350 return -EINVAL;
@@ -485,14 +485,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
485 * Outdated mess for old drm with Xorg being in charge (void function now). 485 * Outdated mess for old drm with Xorg being in charge (void function now).
486 */ 486 */
487/** 487/**
488 * amdgpu_driver_firstopen_kms - drm callback for last close 488 * amdgpu_driver_lastclose_kms - drm callback for last close
489 * 489 *
490 * @dev: drm dev pointer 490 * @dev: drm dev pointer
491 * 491 *
492 * Switch vga switcheroo state after last close (all asics). 492 * Switch vga_switcheroo state after last close (all asics).
493 */ 493 */
494void amdgpu_driver_lastclose_kms(struct drm_device *dev) 494void amdgpu_driver_lastclose_kms(struct drm_device *dev)
495{ 495{
496 struct amdgpu_device *adev = dev->dev_private;
497
498 amdgpu_fbdev_restore_mode(adev);
496 vga_switcheroo_process_delayed_switch(); 499 vga_switcheroo_process_delayed_switch();
497} 500}
498 501
@@ -600,36 +603,82 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev,
600 * amdgpu_get_vblank_counter_kms - get frame count 603 * amdgpu_get_vblank_counter_kms - get frame count
601 * 604 *
602 * @dev: drm dev pointer 605 * @dev: drm dev pointer
603 * @crtc: crtc to get the frame count from 606 * @pipe: crtc to get the frame count from
604 * 607 *
605 * Gets the frame count on the requested crtc (all asics). 608 * Gets the frame count on the requested crtc (all asics).
606 * Returns frame count on success, -EINVAL on failure. 609 * Returns frame count on success, -EINVAL on failure.
607 */ 610 */
608u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, int crtc) 611u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
609{ 612{
610 struct amdgpu_device *adev = dev->dev_private; 613 struct amdgpu_device *adev = dev->dev_private;
614 int vpos, hpos, stat;
615 u32 count;
611 616
612 if (crtc < 0 || crtc >= adev->mode_info.num_crtc) { 617 if (pipe >= adev->mode_info.num_crtc) {
613 DRM_ERROR("Invalid crtc %d\n", crtc); 618 DRM_ERROR("Invalid crtc %u\n", pipe);
614 return -EINVAL; 619 return -EINVAL;
615 } 620 }
616 621
617 return amdgpu_display_vblank_get_counter(adev, crtc); 622 /* The hw increments its frame counter at start of vsync, not at start
623 * of vblank, as is required by DRM core vblank counter handling.
624 * Cook the hw count here to make it appear to the caller as if it
625 * incremented at start of vblank. We measure distance to start of
626 * vblank in vpos. vpos therefore will be >= 0 between start of vblank
627 * and start of vsync, so vpos >= 0 means to bump the hw frame counter
628 * result by 1 to give the proper appearance to caller.
629 */
630 if (adev->mode_info.crtcs[pipe]) {
631 /* Repeat readout if needed to provide stable result if
632 * we cross start of vsync during the queries.
633 */
634 do {
635 count = amdgpu_display_vblank_get_counter(adev, pipe);
636 /* Ask amdgpu_get_crtc_scanoutpos to return vpos as
637 * distance to start of vblank, instead of regular
638 * vertical scanout pos.
639 */
640 stat = amdgpu_get_crtc_scanoutpos(
641 dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
642 &vpos, &hpos, NULL, NULL,
643 &adev->mode_info.crtcs[pipe]->base.hwmode);
644 } while (count != amdgpu_display_vblank_get_counter(adev, pipe));
645
646 if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
647 (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
648 DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
649 } else {
650 DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
651 pipe, vpos);
652
653 /* Bump counter if we are at >= leading edge of vblank,
654 * but before vsync where vpos would turn negative and
655 * the hw counter really increments.
656 */
657 if (vpos >= 0)
658 count++;
659 }
660 } else {
661 /* Fallback to use value as is. */
662 count = amdgpu_display_vblank_get_counter(adev, pipe);
663 DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
664 }
665
666 return count;
618} 667}
619 668
620/** 669/**
621 * amdgpu_enable_vblank_kms - enable vblank interrupt 670 * amdgpu_enable_vblank_kms - enable vblank interrupt
622 * 671 *
623 * @dev: drm dev pointer 672 * @dev: drm dev pointer
624 * @crtc: crtc to enable vblank interrupt for 673 * @pipe: crtc to enable vblank interrupt for
625 * 674 *
626 * Enable the interrupt on the requested crtc (all asics). 675 * Enable the interrupt on the requested crtc (all asics).
627 * Returns 0 on success, -EINVAL on failure. 676 * Returns 0 on success, -EINVAL on failure.
628 */ 677 */
629int amdgpu_enable_vblank_kms(struct drm_device *dev, int crtc) 678int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
630{ 679{
631 struct amdgpu_device *adev = dev->dev_private; 680 struct amdgpu_device *adev = dev->dev_private;
632 int idx = amdgpu_crtc_idx_to_irq_type(adev, crtc); 681 int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
633 682
634 return amdgpu_irq_get(adev, &adev->crtc_irq, idx); 683 return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
635} 684}
@@ -638,14 +687,14 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, int crtc)
638 * amdgpu_disable_vblank_kms - disable vblank interrupt 687 * amdgpu_disable_vblank_kms - disable vblank interrupt
639 * 688 *
640 * @dev: drm dev pointer 689 * @dev: drm dev pointer
641 * @crtc: crtc to disable vblank interrupt for 690 * @pipe: crtc to disable vblank interrupt for
642 * 691 *
643 * Disable the interrupt on the requested crtc (all asics). 692 * Disable the interrupt on the requested crtc (all asics).
644 */ 693 */
645void amdgpu_disable_vblank_kms(struct drm_device *dev, int crtc) 694void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
646{ 695{
647 struct amdgpu_device *adev = dev->dev_private; 696 struct amdgpu_device *adev = dev->dev_private;
648 int idx = amdgpu_crtc_idx_to_irq_type(adev, crtc); 697 int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
649 698
650 amdgpu_irq_put(adev, &adev->crtc_irq, idx); 699 amdgpu_irq_put(adev, &adev->crtc_irq, idx);
651} 700}
@@ -663,41 +712,41 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, int crtc)
663 * scanout position. (all asics). 712 * scanout position. (all asics).
664 * Returns postive status flags on success, negative error on failure. 713 * Returns postive status flags on success, negative error on failure.
665 */ 714 */
666int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, 715int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
667 int *max_error, 716 int *max_error,
668 struct timeval *vblank_time, 717 struct timeval *vblank_time,
669 unsigned flags) 718 unsigned flags)
670{ 719{
671 struct drm_crtc *drmcrtc; 720 struct drm_crtc *crtc;
672 struct amdgpu_device *adev = dev->dev_private; 721 struct amdgpu_device *adev = dev->dev_private;
673 722
674 if (crtc < 0 || crtc >= dev->num_crtcs) { 723 if (pipe >= dev->num_crtcs) {
675 DRM_ERROR("Invalid crtc %d\n", crtc); 724 DRM_ERROR("Invalid crtc %u\n", pipe);
676 return -EINVAL; 725 return -EINVAL;
677 } 726 }
678 727
679 /* Get associated drm_crtc: */ 728 /* Get associated drm_crtc: */
680 drmcrtc = &adev->mode_info.crtcs[crtc]->base; 729 crtc = &adev->mode_info.crtcs[pipe]->base;
681 730
682 /* Helper routine in DRM core does all the work: */ 731 /* Helper routine in DRM core does all the work: */
683 return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error, 732 return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
684 vblank_time, flags, 733 vblank_time, flags,
685 drmcrtc, &drmcrtc->hwmode); 734 &crtc->hwmode);
686} 735}
687 736
688const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { 737const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
689 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 738 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
690 DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 739 DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
691 DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 740 DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
692 /* KMS */ 741 /* KMS */
693 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 742 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
694 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 743 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
695 DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 744 DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
696 DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 745 DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
697 DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 746 DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
698 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 747 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
699 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 748 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
700 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 749 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
701 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), 750 DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
702}; 751};
703int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); 752int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 64efe5b52e65..064ebb347074 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -373,6 +373,10 @@ struct amdgpu_crtc {
373 uint32_t crtc_offset; 373 uint32_t crtc_offset;
374 struct drm_gem_object *cursor_bo; 374 struct drm_gem_object *cursor_bo;
375 uint64_t cursor_addr; 375 uint64_t cursor_addr;
376 int cursor_x;
377 int cursor_y;
378 int cursor_hot_x;
379 int cursor_hot_y;
376 int cursor_width; 380 int cursor_width;
377 int cursor_height; 381 int cursor_height;
378 int max_cursor_width; 382 int max_cursor_width;
@@ -403,6 +407,7 @@ struct amdgpu_crtc {
403 u32 line_time; 407 u32 line_time;
404 u32 wm_low; 408 u32 wm_low;
405 u32 wm_high; 409 u32 wm_high;
410 u32 lb_vblank_lead_lines;
406 struct drm_display_mode hw_mode; 411 struct drm_display_mode hw_mode;
407}; 412};
408 413
@@ -524,6 +529,10 @@ struct amdgpu_framebuffer {
524#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ 529#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
525 ((em) == ATOM_ENCODER_MODE_DP_MST)) 530 ((em) == ATOM_ENCODER_MODE_DP_MST))
526 531
532/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
533#define USE_REAL_VBLANKSTART (1 << 30)
534#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
535
527void amdgpu_link_encoder_connector(struct drm_device *dev); 536void amdgpu_link_encoder_connector(struct drm_device *dev);
528 537
529struct drm_connector * 538struct drm_connector *
@@ -540,10 +549,10 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux);
540 549
541void amdgpu_encoder_set_active_device(struct drm_encoder *encoder); 550void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
542 551
543int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, int crtc, 552int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
544 unsigned int flags, 553 unsigned int flags, int *vpos, int *hpos,
545 int *vpos, int *hpos, ktime_t *stime, 554 ktime_t *stime, ktime_t *etime,
546 ktime_t *etime); 555 const struct drm_display_mode *mode);
547 556
548int amdgpu_framebuffer_init(struct drm_device *dev, 557int amdgpu_framebuffer_init(struct drm_device *dev,
549 struct amdgpu_framebuffer *rfb, 558 struct amdgpu_framebuffer *rfb,
@@ -567,6 +576,7 @@ void amdgpu_fbdev_fini(struct amdgpu_device *adev);
567void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state); 576void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state);
568int amdgpu_fbdev_total_size(struct amdgpu_device *adev); 577int amdgpu_fbdev_total_size(struct amdgpu_device *adev);
569bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj); 578bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj);
579void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev);
570 580
571void amdgpu_fb_output_poll_changed(struct amdgpu_device *adev); 581void amdgpu_fb_output_poll_changed(struct amdgpu_device *adev);
572 582
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1a7708f365f3..c3ce103b6a33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,6 +100,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
100 list_del_init(&bo->list); 100 list_del_init(&bo->list);
101 mutex_unlock(&bo->adev->gem.mutex); 101 mutex_unlock(&bo->adev->gem.mutex);
102 drm_gem_object_release(&bo->gem_base); 102 drm_gem_object_release(&bo->gem_base);
103 amdgpu_bo_unref(&bo->parent);
103 kfree(bo->metadata); 104 kfree(bo->metadata);
104 kfree(bo); 105 kfree(bo);
105} 106}
@@ -132,6 +133,8 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
132 placements[c].fpfn = 0; 133 placements[c].fpfn = 0;
133 placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | 134 placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
134 TTM_PL_FLAG_VRAM; 135 TTM_PL_FLAG_VRAM;
136 if (!(flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED))
137 placements[c - 1].flags |= TTM_PL_FLAG_TOPDOWN;
135 } 138 }
136 139
137 if (domain & AMDGPU_GEM_DOMAIN_GTT) { 140 if (domain & AMDGPU_GEM_DOMAIN_GTT) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3c2ff4567798..ea756e77b023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
189 struct amdgpu_sa_manager *sa_manager); 189 struct amdgpu_sa_manager *sa_manager);
190int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, 190int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
191 struct amdgpu_sa_manager *sa_manager); 191 struct amdgpu_sa_manager *sa_manager);
192int amdgpu_sa_bo_new(struct amdgpu_device *adev, 192int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
193 struct amdgpu_sa_manager *sa_manager, 193 struct amdgpu_sa_bo **sa_bo,
194 struct amdgpu_sa_bo **sa_bo, 194 unsigned size, unsigned align);
195 unsigned size, unsigned align);
196void amdgpu_sa_bo_free(struct amdgpu_device *adev, 195void amdgpu_sa_bo_free(struct amdgpu_device *adev,
197 struct amdgpu_sa_bo **sa_bo, 196 struct amdgpu_sa_bo **sa_bo,
198 struct fence *fence); 197 struct fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index efed11509f4a..22a8c7d3a3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -294,10 +294,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
294 struct amdgpu_device *adev = dev_get_drvdata(dev); 294 struct amdgpu_device *adev = dev_get_drvdata(dev);
295 umode_t effective_mode = attr->mode; 295 umode_t effective_mode = attr->mode;
296 296
297 /* Skip limit attributes if DPM is not enabled */ 297 /* Skip attributes if DPM is not enabled */
298 if (!adev->pm.dpm_enabled && 298 if (!adev->pm.dpm_enabled &&
299 (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || 299 (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
300 attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) 300 attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr ||
301 attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
302 attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
303 attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
304 attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
301 return 0; 305 return 0;
302 306
303 /* Skip fan attributes if fan is not present */ 307 /* Skip fan attributes if fan is not present */
@@ -691,6 +695,9 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
691{ 695{
692 int ret; 696 int ret;
693 697
698 if (adev->pm.sysfs_initialized)
699 return 0;
700
694 if (adev->pm.funcs->get_temperature == NULL) 701 if (adev->pm.funcs->get_temperature == NULL)
695 return 0; 702 return 0;
696 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, 703 adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
@@ -719,6 +726,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
719 return ret; 726 return ret;
720 } 727 }
721 728
729 adev->pm.sysfs_initialized = true;
730
722 return 0; 731 return 0;
723} 732}
724 733
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 30dce235ddeb..78e9b0f14661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -67,8 +67,6 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
67 if (!ring->ring_free_dw) { 67 if (!ring->ring_free_dw) {
68 /* this is an empty ring */ 68 /* this is an empty ring */
69 ring->ring_free_dw = ring->ring_size / 4; 69 ring->ring_free_dw = ring->ring_size / 4;
70 /* update lockup info to avoid false positive */
71 amdgpu_ring_lockup_update(ring);
72 } 70 }
73} 71}
74 72
@@ -209,46 +207,6 @@ void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
209} 207}
210 208
211/** 209/**
212 * amdgpu_ring_lockup_update - update lockup variables
213 *
214 * @ring: amdgpu_ring structure holding ring information
215 *
216 * Update the last rptr value and timestamp (all asics).
217 */
218void amdgpu_ring_lockup_update(struct amdgpu_ring *ring)
219{
220 atomic_set(&ring->last_rptr, amdgpu_ring_get_rptr(ring));
221 atomic64_set(&ring->last_activity, jiffies_64);
222}
223
224/**
225 * amdgpu_ring_test_lockup() - check if ring is lockedup by recording information
226 * @ring: amdgpu_ring structure holding ring information
227 *
228 */
229bool amdgpu_ring_test_lockup(struct amdgpu_ring *ring)
230{
231 uint32_t rptr = amdgpu_ring_get_rptr(ring);
232 uint64_t last = atomic64_read(&ring->last_activity);
233 uint64_t elapsed;
234
235 if (rptr != atomic_read(&ring->last_rptr)) {
236 /* ring is still working, no lockup */
237 amdgpu_ring_lockup_update(ring);
238 return false;
239 }
240
241 elapsed = jiffies_to_msecs(jiffies_64 - last);
242 if (amdgpu_lockup_timeout && elapsed >= amdgpu_lockup_timeout) {
243 dev_err(ring->adev->dev, "ring %d stalled for more than %llumsec\n",
244 ring->idx, elapsed);
245 return true;
246 }
247 /* give a chance to the GPU ... */
248 return false;
249}
250
251/**
252 * amdgpu_ring_backup - Back up the content of a ring 210 * amdgpu_ring_backup - Back up the content of a ring
253 * 211 *
254 * @ring: the ring we want to back up 212 * @ring: the ring we want to back up
@@ -436,7 +394,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
436 if (amdgpu_debugfs_ring_init(adev, ring)) { 394 if (amdgpu_debugfs_ring_init(adev, ring)) {
437 DRM_ERROR("Failed to register debugfs file for rings !\n"); 395 DRM_ERROR("Failed to register debugfs file for rings !\n");
438 } 396 }
439 amdgpu_ring_lockup_update(ring);
440 return 0; 397 return 0;
441} 398}
442 399
@@ -479,6 +436,30 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
479 } 436 }
480} 437}
481 438
439/**
440 * amdgpu_ring_from_fence - get ring from fence
441 *
442 * @f: fence structure
443 *
444 * Extract the ring a fence belongs to. Handles both scheduler as
445 * well as hardware fences.
446 */
447struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
448{
449 struct amdgpu_fence *a_fence;
450 struct amd_sched_fence *s_fence;
451
452 s_fence = to_amd_sched_fence(f);
453 if (s_fence)
454 return container_of(s_fence->sched, struct amdgpu_ring, sched);
455
456 a_fence = to_amdgpu_fence(f);
457 if (a_fence)
458 return a_fence->ring;
459
460 return NULL;
461}
462
482/* 463/*
483 * Debugfs info 464 * Debugfs info
484 */ 465 */
@@ -540,8 +521,8 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
540static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]); 521static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]);
541static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]); 522static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]);
542static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]); 523static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]);
543static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma[0].ring); 524static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring);
544static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma[1].ring); 525static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring);
545static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring); 526static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring);
546static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]); 527static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]);
547static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]); 528static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index e90712443fe9..8b88edb0434b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -139,25 +139,6 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
139 return r; 139 return r;
140} 140}
141 141
142static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
143{
144 struct amdgpu_fence *a_fence;
145 struct amd_sched_fence *s_fence;
146
147 s_fence = to_amd_sched_fence(f);
148 if (s_fence) {
149 struct amdgpu_ring *ring;
150
151 ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
152 return ring->idx;
153 }
154
155 a_fence = to_amdgpu_fence(f);
156 if (a_fence)
157 return a_fence->ring->idx;
158 return 0;
159}
160
161static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 142static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
162{ 143{
163 struct amdgpu_sa_manager *sa_manager = sa_bo->manager; 144 struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -318,7 +299,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
318 } 299 }
319 300
320 if (best_bo) { 301 if (best_bo) {
321 uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence); 302 uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
322 ++tries[idx]; 303 ++tries[idx];
323 sa_manager->hole = best_bo->olist.prev; 304 sa_manager->hole = best_bo->olist.prev;
324 305
@@ -330,13 +311,13 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
330 return false; 311 return false;
331} 312}
332 313
333int amdgpu_sa_bo_new(struct amdgpu_device *adev, 314int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
334 struct amdgpu_sa_manager *sa_manager,
335 struct amdgpu_sa_bo **sa_bo, 315 struct amdgpu_sa_bo **sa_bo,
336 unsigned size, unsigned align) 316 unsigned size, unsigned align)
337{ 317{
338 struct fence *fences[AMDGPU_MAX_RINGS]; 318 struct fence *fences[AMDGPU_MAX_RINGS];
339 unsigned tries[AMDGPU_MAX_RINGS]; 319 unsigned tries[AMDGPU_MAX_RINGS];
320 unsigned count;
340 int i, r; 321 int i, r;
341 signed long t; 322 signed long t;
342 323
@@ -371,13 +352,18 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
371 /* see if we can skip over some allocations */ 352 /* see if we can skip over some allocations */
372 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 353 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
373 354
374 spin_unlock(&sa_manager->wq.lock); 355 for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
375 t = amdgpu_fence_wait_any(adev, fences, AMDGPU_MAX_RINGS, 356 if (fences[i])
376 false, MAX_SCHEDULE_TIMEOUT); 357 fences[count++] = fences[i];
377 r = (t > 0) ? 0 : t; 358
378 spin_lock(&sa_manager->wq.lock); 359 if (count) {
379 /* if we have nothing to wait for block */ 360 spin_unlock(&sa_manager->wq.lock);
380 if (r == -ENOENT) { 361 t = fence_wait_any_timeout(fences, count, false,
362 MAX_SCHEDULE_TIMEOUT);
363 r = (t > 0) ? 0 : t;
364 spin_lock(&sa_manager->wq.lock);
365 } else {
366 /* if we have nothing to wait for block */
381 r = wait_event_interruptible_locked( 367 r = wait_event_interruptible_locked(
382 sa_manager->wq, 368 sa_manager->wq,
383 amdgpu_sa_event(sa_manager, size, align) 369 amdgpu_sa_event(sa_manager, size, align)
@@ -406,7 +392,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
406 if (fence && !fence_is_signaled(fence)) { 392 if (fence && !fence_is_signaled(fence)) {
407 uint32_t idx; 393 uint32_t idx;
408 (*sa_bo)->fence = fence_get(fence); 394 (*sa_bo)->fence = fence_get(fence);
409 idx = amdgpu_sa_get_ring_from_fence(fence); 395 idx = amdgpu_ring_from_fence(fence)->idx;
410 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 396 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
411 } else { 397 } else {
412 amdgpu_sa_bo_remove_locked(*sa_bo); 398 amdgpu_sa_bo_remove_locked(*sa_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 2e946b2cad88..438c05254695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29#include "amdgpu_trace.h"
29 30
30static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) 31static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
31{ 32{
@@ -44,24 +45,20 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
44 return NULL; 45 return NULL;
45 } 46 }
46 job = to_amdgpu_job(sched_job); 47 job = to_amdgpu_job(sched_job);
47 mutex_lock(&job->job_lock); 48 trace_amdgpu_sched_run_job(job);
48 r = amdgpu_ib_schedule(job->adev, 49 r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
49 job->num_ibs,
50 job->ibs,
51 job->base.owner);
52 if (r) { 50 if (r) {
53 DRM_ERROR("Error scheduling IBs (%d)\n", r); 51 DRM_ERROR("Error scheduling IBs (%d)\n", r);
54 goto err; 52 goto err;
55 } 53 }
56 54
57 fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence); 55 fence = job->ibs[job->num_ibs - 1].fence;
56 fence_get(&fence->base);
58 57
59err: 58err:
60 if (job->free_job) 59 if (job->free_job)
61 job->free_job(job); 60 job->free_job(job);
62 61
63 mutex_unlock(&job->job_lock);
64 fence_put(&job->base.s_fence->base);
65 kfree(job); 62 kfree(job);
66 return fence ? &fence->base : NULL; 63 return fence ? &fence->base : NULL;
67} 64}
@@ -87,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
87 return -ENOMEM; 84 return -ENOMEM;
88 job->base.sched = &ring->sched; 85 job->base.sched = &ring->sched;
89 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; 86 job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
87 job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
88 if (!job->base.s_fence) {
89 kfree(job);
90 return -ENOMEM;
91 }
92 *f = fence_get(&job->base.s_fence->base);
93
90 job->adev = adev; 94 job->adev = adev;
91 job->ibs = ibs; 95 job->ibs = ibs;
92 job->num_ibs = num_ibs; 96 job->num_ibs = num_ibs;
93 job->base.owner = owner; 97 job->owner = owner;
94 mutex_init(&job->job_lock);
95 job->free_job = free_job; 98 job->free_job = free_job;
96 mutex_lock(&job->job_lock); 99 amd_sched_entity_push_job(&job->base);
97 r = amd_sched_entity_push_job(&job->base);
98 if (r) {
99 mutex_unlock(&job->job_lock);
100 kfree(job);
101 return r;
102 }
103 *f = fence_get(&job->base.s_fence->base);
104 mutex_unlock(&job->job_lock);
105 } else { 100 } else {
106 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); 101 r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
107 if (r) 102 if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index ff3ca52ec6fe..1caaf201b708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev,
40 if (*semaphore == NULL) { 40 if (*semaphore == NULL) {
41 return -ENOMEM; 41 return -ENOMEM;
42 } 42 }
43 r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, 43 r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
44 &(*semaphore)->sa_bo, 8, 8); 44 &(*semaphore)->sa_bo, 8, 8);
45 if (r) { 45 if (r) {
46 kfree(*semaphore); 46 kfree(*semaphore);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 4921de15b451..dd005c336c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -87,6 +87,15 @@ static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
87 return false; 87 return false;
88} 88}
89 89
90static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
91{
92 if (*keep && fence_is_later(*keep, fence))
93 return;
94
95 fence_put(*keep);
96 *keep = fence_get(fence);
97}
98
90/** 99/**
91 * amdgpu_sync_fence - remember to sync to this fence 100 * amdgpu_sync_fence - remember to sync to this fence
92 * 101 *
@@ -99,35 +108,21 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
99{ 108{
100 struct amdgpu_sync_entry *e; 109 struct amdgpu_sync_entry *e;
101 struct amdgpu_fence *fence; 110 struct amdgpu_fence *fence;
102 struct amdgpu_fence *other;
103 struct fence *tmp, *later;
104 111
105 if (!f) 112 if (!f)
106 return 0; 113 return 0;
107 114
108 if (amdgpu_sync_same_dev(adev, f) && 115 if (amdgpu_sync_same_dev(adev, f) &&
109 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { 116 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
110 if (sync->last_vm_update) { 117 amdgpu_sync_keep_later(&sync->last_vm_update, f);
111 tmp = sync->last_vm_update;
112 BUG_ON(f->context != tmp->context);
113 later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
114 sync->last_vm_update = fence_get(later);
115 fence_put(tmp);
116 } else
117 sync->last_vm_update = fence_get(f);
118 }
119 118
120 fence = to_amdgpu_fence(f); 119 fence = to_amdgpu_fence(f);
121 if (!fence || fence->ring->adev != adev) { 120 if (!fence || fence->ring->adev != adev) {
122 hash_for_each_possible(sync->fences, e, node, f->context) { 121 hash_for_each_possible(sync->fences, e, node, f->context) {
123 struct fence *new;
124 if (unlikely(e->fence->context != f->context)) 122 if (unlikely(e->fence->context != f->context))
125 continue; 123 continue;
126 new = fence_get(fence_later(e->fence, f)); 124
127 if (new) { 125 amdgpu_sync_keep_later(&e->fence, f);
128 fence_put(e->fence);
129 e->fence = new;
130 }
131 return 0; 126 return 0;
132 } 127 }
133 128
@@ -140,10 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
140 return 0; 135 return 0;
141 } 136 }
142 137
143 other = sync->sync_to[fence->ring->idx]; 138 amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
144 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
145 amdgpu_fence_later(fence, other));
146 amdgpu_fence_unref(&other);
147 139
148 return 0; 140 return 0;
149} 141}
@@ -199,8 +191,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
199 * for other VM updates and moves. 191 * for other VM updates and moves.
200 */ 192 */
201 fence_owner = amdgpu_sync_get_owner(f); 193 fence_owner = amdgpu_sync_get_owner(f);
202 if ((owner != AMDGPU_FENCE_OWNER_MOVE) && 194 if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
203 (fence_owner != AMDGPU_FENCE_OWNER_MOVE) && 195 (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
204 ((owner == AMDGPU_FENCE_OWNER_VM) != 196 ((owner == AMDGPU_FENCE_OWNER_VM) !=
205 (fence_owner == AMDGPU_FENCE_OWNER_VM))) 197 (fence_owner == AMDGPU_FENCE_OWNER_VM)))
206 continue; 198 continue;
@@ -262,11 +254,11 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
262 return 0; 254 return 0;
263 255
264 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 256 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
265 struct amdgpu_fence *fence = sync->sync_to[i]; 257 struct fence *fence = sync->sync_to[i];
266 if (!fence) 258 if (!fence)
267 continue; 259 continue;
268 260
269 r = fence_wait(&fence->base, false); 261 r = fence_wait(fence, false);
270 if (r) 262 if (r)
271 return r; 263 return r;
272 } 264 }
@@ -291,9 +283,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
291 int i, r; 283 int i, r;
292 284
293 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 285 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
294 struct amdgpu_fence *fence = sync->sync_to[i];
295 struct amdgpu_semaphore *semaphore;
296 struct amdgpu_ring *other = adev->rings[i]; 286 struct amdgpu_ring *other = adev->rings[i];
287 struct amdgpu_semaphore *semaphore;
288 struct amdgpu_fence *fence;
289
290 if (!sync->sync_to[i])
291 continue;
292
293 fence = to_amdgpu_fence(sync->sync_to[i]);
297 294
298 /* check if we really need to sync */ 295 /* check if we really need to sync */
299 if (!amdgpu_fence_need_sync(fence, ring)) 296 if (!amdgpu_fence_need_sync(fence, ring))
@@ -305,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
305 return -EINVAL; 302 return -EINVAL;
306 } 303 }
307 304
308 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || 305 if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
309 (count >= AMDGPU_NUM_SYNCS)) { 306 r = fence_wait(&fence->base, true);
307 if (r)
308 return r;
309 continue;
310 }
311
312 if (count >= AMDGPU_NUM_SYNCS) {
310 /* not enough room, wait manually */ 313 /* not enough room, wait manually */
311 r = fence_wait(&fence->base, false); 314 r = fence_wait(&fence->base, false);
312 if (r) 315 if (r)
@@ -378,7 +381,7 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
378 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); 381 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
379 382
380 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 383 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
381 amdgpu_fence_unref(&sync->sync_to[i]); 384 fence_put(sync->sync_to[i]);
382 385
383 fence_put(sync->last_vm_update); 386 fence_put(sync->last_vm_update);
384} 387}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 961d7265c286..8f9834ab1bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
48 __entry->fences) 48 __entry->fences)
49); 49);
50 50
51TRACE_EVENT(amdgpu_cs_ioctl,
52 TP_PROTO(struct amdgpu_job *job),
53 TP_ARGS(job),
54 TP_STRUCT__entry(
55 __field(struct amdgpu_device *, adev)
56 __field(struct amd_sched_job *, sched_job)
57 __field(struct amdgpu_ib *, ib)
58 __field(struct fence *, fence)
59 __field(char *, ring_name)
60 __field(u32, num_ibs)
61 ),
62
63 TP_fast_assign(
64 __entry->adev = job->adev;
65 __entry->sched_job = &job->base;
66 __entry->ib = job->ibs;
67 __entry->fence = &job->base.s_fence->base;
68 __entry->ring_name = job->ibs[0].ring->name;
69 __entry->num_ibs = job->num_ibs;
70 ),
71 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
72 __entry->adev, __entry->sched_job, __entry->ib,
73 __entry->fence, __entry->ring_name, __entry->num_ibs)
74);
75
76TRACE_EVENT(amdgpu_sched_run_job,
77 TP_PROTO(struct amdgpu_job *job),
78 TP_ARGS(job),
79 TP_STRUCT__entry(
80 __field(struct amdgpu_device *, adev)
81 __field(struct amd_sched_job *, sched_job)
82 __field(struct amdgpu_ib *, ib)
83 __field(struct fence *, fence)
84 __field(char *, ring_name)
85 __field(u32, num_ibs)
86 ),
87
88 TP_fast_assign(
89 __entry->adev = job->adev;
90 __entry->sched_job = &job->base;
91 __entry->ib = job->ibs;
92 __entry->fence = &job->base.s_fence->base;
93 __entry->ring_name = job->ibs[0].ring->name;
94 __entry->num_ibs = job->num_ibs;
95 ),
96 TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
97 __entry->adev, __entry->sched_job, __entry->ib,
98 __entry->fence, __entry->ring_name, __entry->num_ibs)
99);
100
101
51TRACE_EVENT(amdgpu_vm_grab_id, 102TRACE_EVENT(amdgpu_vm_grab_id,
52 TP_PROTO(unsigned vmid, int ring), 103 TP_PROTO(unsigned vmid, int ring),
53 TP_ARGS(vmid, ring), 104 TP_ARGS(vmid, ring),
@@ -111,7 +162,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
111 __entry->offset, __entry->flags) 162 __entry->offset, __entry->flags)
112); 163);
113 164
114TRACE_EVENT(amdgpu_vm_bo_update, 165DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
115 TP_PROTO(struct amdgpu_bo_va_mapping *mapping), 166 TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
116 TP_ARGS(mapping), 167 TP_ARGS(mapping),
117 TP_STRUCT__entry( 168 TP_STRUCT__entry(
@@ -129,6 +180,16 @@ TRACE_EVENT(amdgpu_vm_bo_update,
129 __entry->soffset, __entry->eoffset, __entry->flags) 180 __entry->soffset, __entry->eoffset, __entry->flags)
130); 181);
131 182
183DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_update,
184 TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
185 TP_ARGS(mapping)
186);
187
188DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
189 TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
190 TP_ARGS(mapping)
191);
192
132TRACE_EVENT(amdgpu_vm_set_page, 193TRACE_EVENT(amdgpu_vm_set_page,
133 TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, 194 TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
134 uint32_t incr, uint32_t flags), 195 uint32_t incr, uint32_t flags),
@@ -186,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
186 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) 247 TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
187); 248);
188 249
189DECLARE_EVENT_CLASS(amdgpu_fence_request,
190
191 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
192
193 TP_ARGS(dev, ring, seqno),
194
195 TP_STRUCT__entry(
196 __field(u32, dev)
197 __field(int, ring)
198 __field(u32, seqno)
199 ),
200
201 TP_fast_assign(
202 __entry->dev = dev->primary->index;
203 __entry->ring = ring;
204 __entry->seqno = seqno;
205 ),
206
207 TP_printk("dev=%u, ring=%d, seqno=%u",
208 __entry->dev, __entry->ring, __entry->seqno)
209);
210
211DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
212
213 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
214
215 TP_ARGS(dev, ring, seqno)
216);
217
218DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
219
220 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
221
222 TP_ARGS(dev, ring, seqno)
223);
224
225DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
226
227 TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
228
229 TP_ARGS(dev, ring, seqno)
230);
231
232DECLARE_EVENT_CLASS(amdgpu_semaphore_request, 250DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
233 251
234 TP_PROTO(int ring, struct amdgpu_semaphore *sem), 252 TP_PROTO(int ring, struct amdgpu_semaphore *sem),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 364cbe975332..8a1752ff3d8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -587,9 +587,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
587 uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); 587 uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
588 int r; 588 int r;
589 589
590 if (gtt->userptr) 590 if (gtt->userptr) {
591 amdgpu_ttm_tt_pin_userptr(ttm); 591 r = amdgpu_ttm_tt_pin_userptr(ttm);
592 592 if (r) {
593 DRM_ERROR("failed to pin userptr\n");
594 return r;
595 }
596 }
593 gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); 597 gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
594 if (!ttm->num_pages) { 598 if (!ttm->num_pages) {
595 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", 599 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -797,11 +801,12 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
797 if (mem && mem->mem_type != TTM_PL_SYSTEM) 801 if (mem && mem->mem_type != TTM_PL_SYSTEM)
798 flags |= AMDGPU_PTE_VALID; 802 flags |= AMDGPU_PTE_VALID;
799 803
800 if (mem && mem->mem_type == TTM_PL_TT) 804 if (mem && mem->mem_type == TTM_PL_TT) {
801 flags |= AMDGPU_PTE_SYSTEM; 805 flags |= AMDGPU_PTE_SYSTEM;
802 806
803 if (!ttm || ttm->caching_state == tt_cached) 807 if (ttm->caching_state == tt_cached)
804 flags |= AMDGPU_PTE_SNOOPED; 808 flags |= AMDGPU_PTE_SNOOPED;
809 }
805 810
806 if (adev->asic_type >= CHIP_TOPAZ) 811 if (adev->asic_type >= CHIP_TOPAZ)
807 flags |= AMDGPU_PTE_EXECUTABLE; 812 flags |= AMDGPU_PTE_EXECUTABLE;
@@ -1041,7 +1046,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
1041 WARN_ON(ib->length_dw > num_dw); 1046 WARN_ON(ib->length_dw > num_dw);
1042 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 1047 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
1043 &amdgpu_vm_free_job, 1048 &amdgpu_vm_free_job,
1044 AMDGPU_FENCE_OWNER_MOVE, 1049 AMDGPU_FENCE_OWNER_UNDEFINED,
1045 fence); 1050 fence);
1046 if (r) 1051 if (r)
1047 goto error_free; 1052 goto error_free;
@@ -1072,6 +1077,11 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
1072 spin_lock(&glob->lru_lock); 1077 spin_lock(&glob->lru_lock);
1073 ret = drm_mm_dump_table(m, mm); 1078 ret = drm_mm_dump_table(m, mm);
1074 spin_unlock(&glob->lru_lock); 1079 spin_unlock(&glob->lru_lock);
1080 if (ttm_pl == TTM_PL_VRAM)
1081 seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
1082 adev->mman.bdev.man[ttm_pl].size,
1083 (u64)atomic64_read(&adev->vram_usage) >> 20,
1084 (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
1075 return ret; 1085 return ret;
1076} 1086}
1077 1087
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d0312364d950..53f987aeeacf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -53,6 +53,7 @@
53#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin" 53#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
54#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin" 54#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
55#define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin" 55#define FIRMWARE_FIJI "amdgpu/fiji_uvd.bin"
56#define FIRMWARE_STONEY "amdgpu/stoney_uvd.bin"
56 57
57/** 58/**
58 * amdgpu_uvd_cs_ctx - Command submission parser context 59 * amdgpu_uvd_cs_ctx - Command submission parser context
@@ -83,6 +84,7 @@ MODULE_FIRMWARE(FIRMWARE_MULLINS);
83MODULE_FIRMWARE(FIRMWARE_TONGA); 84MODULE_FIRMWARE(FIRMWARE_TONGA);
84MODULE_FIRMWARE(FIRMWARE_CARRIZO); 85MODULE_FIRMWARE(FIRMWARE_CARRIZO);
85MODULE_FIRMWARE(FIRMWARE_FIJI); 86MODULE_FIRMWARE(FIRMWARE_FIJI);
87MODULE_FIRMWARE(FIRMWARE_STONEY);
86 88
87static void amdgpu_uvd_note_usage(struct amdgpu_device *adev); 89static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
88static void amdgpu_uvd_idle_work_handler(struct work_struct *work); 90static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
@@ -124,6 +126,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
124 case CHIP_CARRIZO: 126 case CHIP_CARRIZO:
125 fw_name = FIRMWARE_CARRIZO; 127 fw_name = FIRMWARE_CARRIZO;
126 break; 128 break;
129 case CHIP_STONEY:
130 fw_name = FIRMWARE_STONEY;
131 break;
127 default: 132 default:
128 return -EINVAL; 133 return -EINVAL;
129 } 134 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 74f2038ac747..a745eeeb5d82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -49,6 +49,7 @@
49#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" 49#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
50#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" 50#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
51#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" 51#define FIRMWARE_FIJI "amdgpu/fiji_vce.bin"
52#define FIRMWARE_STONEY "amdgpu/stoney_vce.bin"
52 53
53#ifdef CONFIG_DRM_AMDGPU_CIK 54#ifdef CONFIG_DRM_AMDGPU_CIK
54MODULE_FIRMWARE(FIRMWARE_BONAIRE); 55MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -60,6 +61,7 @@ MODULE_FIRMWARE(FIRMWARE_MULLINS);
60MODULE_FIRMWARE(FIRMWARE_TONGA); 61MODULE_FIRMWARE(FIRMWARE_TONGA);
61MODULE_FIRMWARE(FIRMWARE_CARRIZO); 62MODULE_FIRMWARE(FIRMWARE_CARRIZO);
62MODULE_FIRMWARE(FIRMWARE_FIJI); 63MODULE_FIRMWARE(FIRMWARE_FIJI);
64MODULE_FIRMWARE(FIRMWARE_STONEY);
63 65
64static void amdgpu_vce_idle_work_handler(struct work_struct *work); 66static void amdgpu_vce_idle_work_handler(struct work_struct *work);
65 67
@@ -106,6 +108,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
106 case CHIP_FIJI: 108 case CHIP_FIJI:
107 fw_name = FIRMWARE_FIJI; 109 fw_name = FIRMWARE_FIJI;
108 break; 110 break;
111 case CHIP_STONEY:
112 fw_name = FIRMWARE_STONEY;
113 break;
109 114
110 default: 115 default:
111 return -EINVAL; 116 return -EINVAL;
@@ -387,7 +392,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
387 ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ 392 ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
388 ib->ptr[ib->length_dw++] = handle; 393 ib->ptr[ib->length_dw++] = handle;
389 394
390 ib->ptr[ib->length_dw++] = 0x00000030; /* len */ 395 if ((ring->adev->vce.fw_version >> 24) >= 52)
396 ib->ptr[ib->length_dw++] = 0x00000040; /* len */
397 else
398 ib->ptr[ib->length_dw++] = 0x00000030; /* len */
391 ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ 399 ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
392 ib->ptr[ib->length_dw++] = 0x00000000; 400 ib->ptr[ib->length_dw++] = 0x00000000;
393 ib->ptr[ib->length_dw++] = 0x00000042; 401 ib->ptr[ib->length_dw++] = 0x00000042;
@@ -399,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
399 ib->ptr[ib->length_dw++] = 0x00000100; 407 ib->ptr[ib->length_dw++] = 0x00000100;
400 ib->ptr[ib->length_dw++] = 0x0000000c; 408 ib->ptr[ib->length_dw++] = 0x0000000c;
401 ib->ptr[ib->length_dw++] = 0x00000000; 409 ib->ptr[ib->length_dw++] = 0x00000000;
410 if ((ring->adev->vce.fw_version >> 24) >= 52) {
411 ib->ptr[ib->length_dw++] = 0x00000000;
412 ib->ptr[ib->length_dw++] = 0x00000000;
413 ib->ptr[ib->length_dw++] = 0x00000000;
414 ib->ptr[ib->length_dw++] = 0x00000000;
415 }
402 416
403 ib->ptr[ib->length_dw++] = 0x00000014; /* len */ 417 ib->ptr[ib->length_dw++] = 0x00000014; /* len */
404 ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ 418 ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1e14531353e0..b53d273eb7a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -90,11 +90,9 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
90 struct amdgpu_bo_list_entry *list; 90 struct amdgpu_bo_list_entry *list;
91 unsigned i, idx; 91 unsigned i, idx;
92 92
93 mutex_lock(&vm->mutex);
94 list = drm_malloc_ab(vm->max_pde_used + 2, 93 list = drm_malloc_ab(vm->max_pde_used + 2,
95 sizeof(struct amdgpu_bo_list_entry)); 94 sizeof(struct amdgpu_bo_list_entry));
96 if (!list) { 95 if (!list) {
97 mutex_unlock(&vm->mutex);
98 return NULL; 96 return NULL;
99 } 97 }
100 98
@@ -119,7 +117,6 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
119 list[idx].tv.shared = true; 117 list[idx].tv.shared = true;
120 list_add(&list[idx++].tv.head, head); 118 list_add(&list[idx++].tv.head, head);
121 } 119 }
122 mutex_unlock(&vm->mutex);
123 120
124 return list; 121 return list;
125} 122}
@@ -138,7 +135,7 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
138int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 135int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
139 struct amdgpu_sync *sync) 136 struct amdgpu_sync *sync)
140{ 137{
141 struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {}; 138 struct fence *best[AMDGPU_MAX_RINGS] = {};
142 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 139 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
143 struct amdgpu_device *adev = ring->adev; 140 struct amdgpu_device *adev = ring->adev;
144 141
@@ -146,16 +143,24 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
146 unsigned i; 143 unsigned i;
147 144
148 /* check if the id is still valid */ 145 /* check if the id is still valid */
149 if (vm_id->id && vm_id->last_id_use && 146 if (vm_id->id) {
150 vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) 147 unsigned id = vm_id->id;
151 return 0; 148 long owner;
149
150 owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
151 if (owner == (long)vm) {
152 trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
153 return 0;
154 }
155 }
152 156
153 /* we definately need to flush */ 157 /* we definately need to flush */
154 vm_id->pd_gpu_addr = ~0ll; 158 vm_id->pd_gpu_addr = ~0ll;
155 159
156 /* skip over VMID 0, since it is the system VM */ 160 /* skip over VMID 0, since it is the system VM */
157 for (i = 1; i < adev->vm_manager.nvm; ++i) { 161 for (i = 1; i < adev->vm_manager.nvm; ++i) {
158 struct amdgpu_fence *fence = adev->vm_manager.active[i]; 162 struct fence *fence = adev->vm_manager.ids[i].active;
163 struct amdgpu_ring *fring;
159 164
160 if (fence == NULL) { 165 if (fence == NULL) {
161 /* found a free one */ 166 /* found a free one */
@@ -164,21 +169,23 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
164 return 0; 169 return 0;
165 } 170 }
166 171
167 if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) { 172 fring = amdgpu_ring_from_fence(fence);
168 best[fence->ring->idx] = fence; 173 if (best[fring->idx] == NULL ||
169 choices[fence->ring == ring ? 0 : 1] = i; 174 fence_is_later(best[fring->idx], fence)) {
175 best[fring->idx] = fence;
176 choices[fring == ring ? 0 : 1] = i;
170 } 177 }
171 } 178 }
172 179
173 for (i = 0; i < 2; ++i) { 180 for (i = 0; i < 2; ++i) {
174 if (choices[i]) { 181 if (choices[i]) {
175 struct amdgpu_fence *fence; 182 struct fence *fence;
176 183
177 fence = adev->vm_manager.active[choices[i]]; 184 fence = adev->vm_manager.ids[choices[i]].active;
178 vm_id->id = choices[i]; 185 vm_id->id = choices[i];
179 186
180 trace_amdgpu_vm_grab_id(choices[i], ring->idx); 187 trace_amdgpu_vm_grab_id(choices[i], ring->idx);
181 return amdgpu_sync_fence(ring->adev, sync, &fence->base); 188 return amdgpu_sync_fence(ring->adev, sync, fence);
182 } 189 }
183 } 190 }
184 191
@@ -205,24 +212,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
205 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 212 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
206 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 213 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
207 struct fence *flushed_updates = vm_id->flushed_updates; 214 struct fence *flushed_updates = vm_id->flushed_updates;
208 bool is_earlier = false; 215 bool is_later;
209
210 if (flushed_updates && updates) {
211 BUG_ON(flushed_updates->context != updates->context);
212 is_earlier = (updates->seqno - flushed_updates->seqno <=
213 INT_MAX) ? true : false;
214 }
215 216
216 if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates || 217 if (!flushed_updates)
217 is_earlier) { 218 is_later = true;
219 else if (!updates)
220 is_later = false;
221 else
222 is_later = fence_is_later(updates, flushed_updates);
218 223
224 if (pd_addr != vm_id->pd_gpu_addr || is_later) {
219 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 225 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
220 if (is_earlier) { 226 if (is_later) {
221 vm_id->flushed_updates = fence_get(updates); 227 vm_id->flushed_updates = fence_get(updates);
222 fence_put(flushed_updates); 228 fence_put(flushed_updates);
223 } 229 }
224 if (!flushed_updates)
225 vm_id->flushed_updates = fence_get(updates);
226 vm_id->pd_gpu_addr = pd_addr; 230 vm_id->pd_gpu_addr = pd_addr;
227 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 231 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
228 } 232 }
@@ -242,16 +246,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
242 */ 246 */
243void amdgpu_vm_fence(struct amdgpu_device *adev, 247void amdgpu_vm_fence(struct amdgpu_device *adev,
244 struct amdgpu_vm *vm, 248 struct amdgpu_vm *vm,
245 struct amdgpu_fence *fence) 249 struct fence *fence)
246{ 250{
247 unsigned ridx = fence->ring->idx; 251 struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
248 unsigned vm_id = vm->ids[ridx].id; 252 unsigned vm_id = vm->ids[ring->idx].id;
249
250 amdgpu_fence_unref(&adev->vm_manager.active[vm_id]);
251 adev->vm_manager.active[vm_id] = amdgpu_fence_ref(fence);
252 253
253 amdgpu_fence_unref(&vm->ids[ridx].last_id_use); 254 fence_put(adev->vm_manager.ids[vm_id].active);
254 vm->ids[ridx].last_id_use = amdgpu_fence_ref(fence); 255 adev->vm_manager.ids[vm_id].active = fence_get(fence);
256 atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
255} 257}
256 258
257/** 259/**
@@ -330,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
330 * 332 *
331 * @adev: amdgpu_device pointer 333 * @adev: amdgpu_device pointer
332 * @bo: bo to clear 334 * @bo: bo to clear
335 *
336 * need to reserve bo first before calling it.
333 */ 337 */
334static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 338static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
335 struct amdgpu_bo *bo) 339 struct amdgpu_bo *bo)
@@ -341,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
341 uint64_t addr; 345 uint64_t addr;
342 int r; 346 int r;
343 347
344 r = amdgpu_bo_reserve(bo, false);
345 if (r)
346 return r;
347
348 r = reservation_object_reserve_shared(bo->tbo.resv); 348 r = reservation_object_reserve_shared(bo->tbo.resv);
349 if (r) 349 if (r)
350 return r; 350 return r;
351 351
352 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 352 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
353 if (r) 353 if (r)
354 goto error_unreserve; 354 goto error;
355 355
356 addr = amdgpu_bo_gpu_offset(bo); 356 addr = amdgpu_bo_gpu_offset(bo);
357 entries = amdgpu_bo_size(bo) / 8; 357 entries = amdgpu_bo_size(bo) / 8;
358 358
359 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 359 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
360 if (!ib) 360 if (!ib)
361 goto error_unreserve; 361 goto error;
362 362
363 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); 363 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
364 if (r) 364 if (r)
@@ -376,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
376 if (!r) 376 if (!r)
377 amdgpu_bo_fence(bo, fence, true); 377 amdgpu_bo_fence(bo, fence, true);
378 fence_put(fence); 378 fence_put(fence);
379 if (amdgpu_enable_scheduler) { 379 if (amdgpu_enable_scheduler)
380 amdgpu_bo_unreserve(bo);
381 return 0; 380 return 0;
382 } 381
383error_free: 382error_free:
384 amdgpu_ib_free(adev, ib); 383 amdgpu_ib_free(adev, ib);
385 kfree(ib); 384 kfree(ib);
386 385
387error_unreserve: 386error:
388 amdgpu_bo_unreserve(bo);
389 return r; 387 return r;
390} 388}
391 389
@@ -455,8 +453,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
455 return -ENOMEM; 453 return -ENOMEM;
456 454
457 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); 455 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
458 if (r) 456 if (r) {
457 kfree(ib);
459 return r; 458 return r;
459 }
460 ib->length_dw = 0; 460 ib->length_dw = 0;
461 461
462 /* walk over the address space and update the page directory */ 462 /* walk over the address space and update the page directory */
@@ -850,6 +850,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
850 return r; 850 return r;
851 } 851 }
852 852
853 if (trace_amdgpu_vm_bo_mapping_enabled()) {
854 list_for_each_entry(mapping, &bo_va->valids, list)
855 trace_amdgpu_vm_bo_mapping(mapping);
856
857 list_for_each_entry(mapping, &bo_va->invalids, list)
858 trace_amdgpu_vm_bo_mapping(mapping);
859 }
860
853 spin_lock(&vm->status_lock); 861 spin_lock(&vm->status_lock);
854 list_splice_init(&bo_va->invalids, &bo_va->valids); 862 list_splice_init(&bo_va->invalids, &bo_va->valids);
855 list_del_init(&bo_va->vm_status); 863 list_del_init(&bo_va->vm_status);
@@ -877,17 +885,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
877 struct amdgpu_bo_va_mapping *mapping; 885 struct amdgpu_bo_va_mapping *mapping;
878 int r; 886 int r;
879 887
888 spin_lock(&vm->freed_lock);
880 while (!list_empty(&vm->freed)) { 889 while (!list_empty(&vm->freed)) {
881 mapping = list_first_entry(&vm->freed, 890 mapping = list_first_entry(&vm->freed,
882 struct amdgpu_bo_va_mapping, list); 891 struct amdgpu_bo_va_mapping, list);
883 list_del(&mapping->list); 892 list_del(&mapping->list);
884 893 spin_unlock(&vm->freed_lock);
885 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); 894 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
886 kfree(mapping); 895 kfree(mapping);
887 if (r) 896 if (r)
888 return r; 897 return r;
889 898
899 spin_lock(&vm->freed_lock);
890 } 900 }
901 spin_unlock(&vm->freed_lock);
902
891 return 0; 903 return 0;
892 904
893} 905}
@@ -914,8 +926,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
914 bo_va = list_first_entry(&vm->invalidated, 926 bo_va = list_first_entry(&vm->invalidated,
915 struct amdgpu_bo_va, vm_status); 927 struct amdgpu_bo_va, vm_status);
916 spin_unlock(&vm->status_lock); 928 spin_unlock(&vm->status_lock);
917 929 mutex_lock(&bo_va->mutex);
918 r = amdgpu_vm_bo_update(adev, bo_va, NULL); 930 r = amdgpu_vm_bo_update(adev, bo_va, NULL);
931 mutex_unlock(&bo_va->mutex);
919 if (r) 932 if (r)
920 return r; 933 return r;
921 934
@@ -959,10 +972,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
959 INIT_LIST_HEAD(&bo_va->valids); 972 INIT_LIST_HEAD(&bo_va->valids);
960 INIT_LIST_HEAD(&bo_va->invalids); 973 INIT_LIST_HEAD(&bo_va->invalids);
961 INIT_LIST_HEAD(&bo_va->vm_status); 974 INIT_LIST_HEAD(&bo_va->vm_status);
962 975 mutex_init(&bo_va->mutex);
963 mutex_lock(&vm->mutex);
964 list_add_tail(&bo_va->bo_list, &bo->va); 976 list_add_tail(&bo_va->bo_list, &bo->va);
965 mutex_unlock(&vm->mutex);
966 977
967 return bo_va; 978 return bo_va;
968} 979}
@@ -979,7 +990,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
979 * Add a mapping of the BO at the specefied addr into the VM. 990 * Add a mapping of the BO at the specefied addr into the VM.
980 * Returns 0 for success, error for failure. 991 * Returns 0 for success, error for failure.
981 * 992 *
982 * Object has to be reserved and gets unreserved by this function! 993 * Object has to be reserved and unreserved outside!
983 */ 994 */
984int amdgpu_vm_bo_map(struct amdgpu_device *adev, 995int amdgpu_vm_bo_map(struct amdgpu_device *adev,
985 struct amdgpu_bo_va *bo_va, 996 struct amdgpu_bo_va *bo_va,
@@ -995,32 +1006,27 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
995 1006
996 /* validate the parameters */ 1007 /* validate the parameters */
997 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1008 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
998 size == 0 || size & AMDGPU_GPU_PAGE_MASK) { 1009 size == 0 || size & AMDGPU_GPU_PAGE_MASK)
999 amdgpu_bo_unreserve(bo_va->bo);
1000 return -EINVAL; 1010 return -EINVAL;
1001 }
1002 1011
1003 /* make sure object fit at this offset */ 1012 /* make sure object fit at this offset */
1004 eaddr = saddr + size; 1013 eaddr = saddr + size;
1005 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) { 1014 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
1006 amdgpu_bo_unreserve(bo_va->bo);
1007 return -EINVAL; 1015 return -EINVAL;
1008 }
1009 1016
1010 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 1017 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
1011 if (last_pfn > adev->vm_manager.max_pfn) { 1018 if (last_pfn > adev->vm_manager.max_pfn) {
1012 dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", 1019 dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
1013 last_pfn, adev->vm_manager.max_pfn); 1020 last_pfn, adev->vm_manager.max_pfn);
1014 amdgpu_bo_unreserve(bo_va->bo);
1015 return -EINVAL; 1021 return -EINVAL;
1016 } 1022 }
1017 1023
1018 mutex_lock(&vm->mutex);
1019
1020 saddr /= AMDGPU_GPU_PAGE_SIZE; 1024 saddr /= AMDGPU_GPU_PAGE_SIZE;
1021 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1025 eaddr /= AMDGPU_GPU_PAGE_SIZE;
1022 1026
1027 spin_lock(&vm->it_lock);
1023 it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); 1028 it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
1029 spin_unlock(&vm->it_lock);
1024 if (it) { 1030 if (it) {
1025 struct amdgpu_bo_va_mapping *tmp; 1031 struct amdgpu_bo_va_mapping *tmp;
1026 tmp = container_of(it, struct amdgpu_bo_va_mapping, it); 1032 tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1028,16 +1034,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1028 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1034 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1029 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, 1035 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
1030 tmp->it.start, tmp->it.last + 1); 1036 tmp->it.start, tmp->it.last + 1);
1031 amdgpu_bo_unreserve(bo_va->bo);
1032 r = -EINVAL; 1037 r = -EINVAL;
1033 goto error_unlock; 1038 goto error;
1034 } 1039 }
1035 1040
1036 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1041 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1037 if (!mapping) { 1042 if (!mapping) {
1038 amdgpu_bo_unreserve(bo_va->bo);
1039 r = -ENOMEM; 1043 r = -ENOMEM;
1040 goto error_unlock; 1044 goto error;
1041 } 1045 }
1042 1046
1043 INIT_LIST_HEAD(&mapping->list); 1047 INIT_LIST_HEAD(&mapping->list);
@@ -1046,8 +1050,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1046 mapping->offset = offset; 1050 mapping->offset = offset;
1047 mapping->flags = flags; 1051 mapping->flags = flags;
1048 1052
1053 mutex_lock(&bo_va->mutex);
1049 list_add(&mapping->list, &bo_va->invalids); 1054 list_add(&mapping->list, &bo_va->invalids);
1055 mutex_unlock(&bo_va->mutex);
1056 spin_lock(&vm->it_lock);
1050 interval_tree_insert(&mapping->it, &vm->va); 1057 interval_tree_insert(&mapping->it, &vm->va);
1058 spin_unlock(&vm->it_lock);
1051 trace_amdgpu_vm_bo_map(bo_va, mapping); 1059 trace_amdgpu_vm_bo_map(bo_va, mapping);
1052 1060
1053 /* Make sure the page tables are allocated */ 1061 /* Make sure the page tables are allocated */
@@ -1059,8 +1067,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1059 if (eaddr > vm->max_pde_used) 1067 if (eaddr > vm->max_pde_used)
1060 vm->max_pde_used = eaddr; 1068 vm->max_pde_used = eaddr;
1061 1069
1062 amdgpu_bo_unreserve(bo_va->bo);
1063
1064 /* walk over the address space and allocate the page tables */ 1070 /* walk over the address space and allocate the page tables */
1065 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { 1071 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
1066 struct reservation_object *resv = vm->page_directory->tbo.resv; 1072 struct reservation_object *resv = vm->page_directory->tbo.resv;
@@ -1069,51 +1075,40 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1069 if (vm->page_tables[pt_idx].bo) 1075 if (vm->page_tables[pt_idx].bo)
1070 continue; 1076 continue;
1071 1077
1072 /* drop mutex to allocate and clear page table */
1073 mutex_unlock(&vm->mutex);
1074
1075 ww_mutex_lock(&resv->lock, NULL);
1076 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1078 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
1077 AMDGPU_GPU_PAGE_SIZE, true, 1079 AMDGPU_GPU_PAGE_SIZE, true,
1078 AMDGPU_GEM_DOMAIN_VRAM, 1080 AMDGPU_GEM_DOMAIN_VRAM,
1079 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1081 AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
1080 NULL, resv, &pt); 1082 NULL, resv, &pt);
1081 ww_mutex_unlock(&resv->lock);
1082 if (r) 1083 if (r)
1083 goto error_free; 1084 goto error_free;
1084 1085
1086 /* Keep a reference to the page table to avoid freeing
1087 * them up in the wrong order.
1088 */
1089 pt->parent = amdgpu_bo_ref(vm->page_directory);
1090
1085 r = amdgpu_vm_clear_bo(adev, pt); 1091 r = amdgpu_vm_clear_bo(adev, pt);
1086 if (r) { 1092 if (r) {
1087 amdgpu_bo_unref(&pt); 1093 amdgpu_bo_unref(&pt);
1088 goto error_free; 1094 goto error_free;
1089 } 1095 }
1090 1096
1091 /* aquire mutex again */
1092 mutex_lock(&vm->mutex);
1093 if (vm->page_tables[pt_idx].bo) {
1094 /* someone else allocated the pt in the meantime */
1095 mutex_unlock(&vm->mutex);
1096 amdgpu_bo_unref(&pt);
1097 mutex_lock(&vm->mutex);
1098 continue;
1099 }
1100
1101 vm->page_tables[pt_idx].addr = 0; 1097 vm->page_tables[pt_idx].addr = 0;
1102 vm->page_tables[pt_idx].bo = pt; 1098 vm->page_tables[pt_idx].bo = pt;
1103 } 1099 }
1104 1100
1105 mutex_unlock(&vm->mutex);
1106 return 0; 1101 return 0;
1107 1102
1108error_free: 1103error_free:
1109 mutex_lock(&vm->mutex);
1110 list_del(&mapping->list); 1104 list_del(&mapping->list);
1105 spin_lock(&vm->it_lock);
1111 interval_tree_remove(&mapping->it, &vm->va); 1106 interval_tree_remove(&mapping->it, &vm->va);
1107 spin_unlock(&vm->it_lock);
1112 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1108 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1113 kfree(mapping); 1109 kfree(mapping);
1114 1110
1115error_unlock: 1111error:
1116 mutex_unlock(&vm->mutex);
1117 return r; 1112 return r;
1118} 1113}
1119 1114
@@ -1127,7 +1122,7 @@ error_unlock:
1127 * Remove a mapping of the BO at the specefied addr from the VM. 1122 * Remove a mapping of the BO at the specefied addr from the VM.
1128 * Returns 0 for success, error for failure. 1123 * Returns 0 for success, error for failure.
1129 * 1124 *
1130 * Object has to be reserved and gets unreserved by this function! 1125 * Object has to be reserved and unreserved outside!
1131 */ 1126 */
1132int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1127int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1133 struct amdgpu_bo_va *bo_va, 1128 struct amdgpu_bo_va *bo_va,
@@ -1138,7 +1133,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1138 bool valid = true; 1133 bool valid = true;
1139 1134
1140 saddr /= AMDGPU_GPU_PAGE_SIZE; 1135 saddr /= AMDGPU_GPU_PAGE_SIZE;
1141 1136 mutex_lock(&bo_va->mutex);
1142 list_for_each_entry(mapping, &bo_va->valids, list) { 1137 list_for_each_entry(mapping, &bo_va->valids, list) {
1143 if (mapping->it.start == saddr) 1138 if (mapping->it.start == saddr)
1144 break; 1139 break;
@@ -1153,22 +1148,24 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1153 } 1148 }
1154 1149
1155 if (&mapping->list == &bo_va->invalids) { 1150 if (&mapping->list == &bo_va->invalids) {
1156 amdgpu_bo_unreserve(bo_va->bo); 1151 mutex_unlock(&bo_va->mutex);
1157 return -ENOENT; 1152 return -ENOENT;
1158 } 1153 }
1159 } 1154 }
1160 1155 mutex_unlock(&bo_va->mutex);
1161 mutex_lock(&vm->mutex);
1162 list_del(&mapping->list); 1156 list_del(&mapping->list);
1157 spin_lock(&vm->it_lock);
1163 interval_tree_remove(&mapping->it, &vm->va); 1158 interval_tree_remove(&mapping->it, &vm->va);
1159 spin_unlock(&vm->it_lock);
1164 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1160 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1165 1161
1166 if (valid) 1162 if (valid) {
1163 spin_lock(&vm->freed_lock);
1167 list_add(&mapping->list, &vm->freed); 1164 list_add(&mapping->list, &vm->freed);
1168 else 1165 spin_unlock(&vm->freed_lock);
1166 } else {
1169 kfree(mapping); 1167 kfree(mapping);
1170 mutex_unlock(&vm->mutex); 1168 }
1171 amdgpu_bo_unreserve(bo_va->bo);
1172 1169
1173 return 0; 1170 return 0;
1174} 1171}
@@ -1191,28 +1188,30 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
1191 1188
1192 list_del(&bo_va->bo_list); 1189 list_del(&bo_va->bo_list);
1193 1190
1194 mutex_lock(&vm->mutex);
1195
1196 spin_lock(&vm->status_lock); 1191 spin_lock(&vm->status_lock);
1197 list_del(&bo_va->vm_status); 1192 list_del(&bo_va->vm_status);
1198 spin_unlock(&vm->status_lock); 1193 spin_unlock(&vm->status_lock);
1199 1194
1200 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 1195 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
1201 list_del(&mapping->list); 1196 list_del(&mapping->list);
1197 spin_lock(&vm->it_lock);
1202 interval_tree_remove(&mapping->it, &vm->va); 1198 interval_tree_remove(&mapping->it, &vm->va);
1199 spin_unlock(&vm->it_lock);
1203 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1200 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1201 spin_lock(&vm->freed_lock);
1204 list_add(&mapping->list, &vm->freed); 1202 list_add(&mapping->list, &vm->freed);
1203 spin_unlock(&vm->freed_lock);
1205 } 1204 }
1206 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 1205 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
1207 list_del(&mapping->list); 1206 list_del(&mapping->list);
1207 spin_lock(&vm->it_lock);
1208 interval_tree_remove(&mapping->it, &vm->va); 1208 interval_tree_remove(&mapping->it, &vm->va);
1209 spin_unlock(&vm->it_lock);
1209 kfree(mapping); 1210 kfree(mapping);
1210 } 1211 }
1211
1212 fence_put(bo_va->last_pt_update); 1212 fence_put(bo_va->last_pt_update);
1213 mutex_destroy(&bo_va->mutex);
1213 kfree(bo_va); 1214 kfree(bo_va);
1214
1215 mutex_unlock(&vm->mutex);
1216} 1215}
1217 1216
1218/** 1217/**
@@ -1255,15 +1254,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1255 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1254 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1256 vm->ids[i].id = 0; 1255 vm->ids[i].id = 0;
1257 vm->ids[i].flushed_updates = NULL; 1256 vm->ids[i].flushed_updates = NULL;
1258 vm->ids[i].last_id_use = NULL;
1259 } 1257 }
1260 mutex_init(&vm->mutex);
1261 vm->va = RB_ROOT; 1258 vm->va = RB_ROOT;
1262 spin_lock_init(&vm->status_lock); 1259 spin_lock_init(&vm->status_lock);
1263 INIT_LIST_HEAD(&vm->invalidated); 1260 INIT_LIST_HEAD(&vm->invalidated);
1264 INIT_LIST_HEAD(&vm->cleared); 1261 INIT_LIST_HEAD(&vm->cleared);
1265 INIT_LIST_HEAD(&vm->freed); 1262 INIT_LIST_HEAD(&vm->freed);
1266 1263 spin_lock_init(&vm->it_lock);
1264 spin_lock_init(&vm->freed_lock);
1267 pd_size = amdgpu_vm_directory_size(adev); 1265 pd_size = amdgpu_vm_directory_size(adev);
1268 pd_entries = amdgpu_vm_num_pdes(adev); 1266 pd_entries = amdgpu_vm_num_pdes(adev);
1269 1267
@@ -1283,8 +1281,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1283 NULL, NULL, &vm->page_directory); 1281 NULL, NULL, &vm->page_directory);
1284 if (r) 1282 if (r)
1285 return r; 1283 return r;
1286 1284 r = amdgpu_bo_reserve(vm->page_directory, false);
1285 if (r) {
1286 amdgpu_bo_unref(&vm->page_directory);
1287 vm->page_directory = NULL;
1288 return r;
1289 }
1287 r = amdgpu_vm_clear_bo(adev, vm->page_directory); 1290 r = amdgpu_vm_clear_bo(adev, vm->page_directory);
1291 amdgpu_bo_unreserve(vm->page_directory);
1288 if (r) { 1292 if (r) {
1289 amdgpu_bo_unref(&vm->page_directory); 1293 amdgpu_bo_unref(&vm->page_directory);
1290 vm->page_directory = NULL; 1294 vm->page_directory = NULL;
@@ -1327,11 +1331,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1327 1331
1328 amdgpu_bo_unref(&vm->page_directory); 1332 amdgpu_bo_unref(&vm->page_directory);
1329 fence_put(vm->page_directory_fence); 1333 fence_put(vm->page_directory_fence);
1330
1331 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1334 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1335 unsigned id = vm->ids[i].id;
1336
1337 atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
1338 (long)vm, 0);
1332 fence_put(vm->ids[i].flushed_updates); 1339 fence_put(vm->ids[i].flushed_updates);
1333 amdgpu_fence_unref(&vm->ids[i].last_id_use);
1334 } 1340 }
1335 1341
1336 mutex_destroy(&vm->mutex); 1342}
1343
1344/**
1345 * amdgpu_vm_manager_fini - cleanup VM manager
1346 *
1347 * @adev: amdgpu_device pointer
1348 *
1349 * Cleanup the VM manager and free resources.
1350 */
1351void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
1352{
1353 unsigned i;
1354
1355 for (i = 0; i < AMDGPU_NUM_VM; ++i)
1356 fence_put(adev->vm_manager.ids[i].active);
1337} 1357}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index a0346a90d805..1b50e6c13fb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -685,6 +685,27 @@ static void atom_op_div(atom_exec_context *ctx, int *ptr, int arg)
685 } 685 }
686} 686}
687 687
688static void atom_op_div32(atom_exec_context *ctx, int *ptr, int arg)
689{
690 uint64_t val64;
691 uint8_t attr = U8((*ptr)++);
692 uint32_t dst, src;
693 SDEBUG(" src1: ");
694 dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
695 SDEBUG(" src2: ");
696 src = atom_get_src(ctx, attr, ptr);
697 if (src != 0) {
698 val64 = dst;
699 val64 |= ((uint64_t)ctx->ctx->divmul[1]) << 32;
700 do_div(val64, src);
701 ctx->ctx->divmul[0] = lower_32_bits(val64);
702 ctx->ctx->divmul[1] = upper_32_bits(val64);
703 } else {
704 ctx->ctx->divmul[0] = 0;
705 ctx->ctx->divmul[1] = 0;
706 }
707}
708
688static void atom_op_eot(atom_exec_context *ctx, int *ptr, int arg) 709static void atom_op_eot(atom_exec_context *ctx, int *ptr, int arg)
689{ 710{
690 /* functionally, a nop */ 711 /* functionally, a nop */
@@ -788,6 +809,20 @@ static void atom_op_mul(atom_exec_context *ctx, int *ptr, int arg)
788 ctx->ctx->divmul[0] = dst * src; 809 ctx->ctx->divmul[0] = dst * src;
789} 810}
790 811
812static void atom_op_mul32(atom_exec_context *ctx, int *ptr, int arg)
813{
814 uint64_t val64;
815 uint8_t attr = U8((*ptr)++);
816 uint32_t dst, src;
817 SDEBUG(" src1: ");
818 dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
819 SDEBUG(" src2: ");
820 src = atom_get_src(ctx, attr, ptr);
821 val64 = (uint64_t)dst * (uint64_t)src;
822 ctx->ctx->divmul[0] = lower_32_bits(val64);
823 ctx->ctx->divmul[1] = upper_32_bits(val64);
824}
825
791static void atom_op_nop(atom_exec_context *ctx, int *ptr, int arg) 826static void atom_op_nop(atom_exec_context *ctx, int *ptr, int arg)
792{ 827{
793 /* nothing */ 828 /* nothing */
@@ -1022,7 +1057,15 @@ static void atom_op_xor(atom_exec_context *ctx, int *ptr, int arg)
1022 1057
1023static void atom_op_debug(atom_exec_context *ctx, int *ptr, int arg) 1058static void atom_op_debug(atom_exec_context *ctx, int *ptr, int arg)
1024{ 1059{
1025 printk(KERN_INFO "unimplemented!\n"); 1060 uint8_t val = U8((*ptr)++);
1061 SDEBUG("DEBUG output: 0x%02X\n", val);
1062}
1063
1064static void atom_op_processds(atom_exec_context *ctx, int *ptr, int arg)
1065{
1066 uint16_t val = U16(*ptr);
1067 (*ptr) += val + 2;
1068 SDEBUG("PROCESSDS output: 0x%02X\n", val);
1026} 1069}
1027 1070
1028static struct { 1071static struct {
@@ -1151,7 +1194,13 @@ static struct {
1151 atom_op_shr, ATOM_ARG_FB}, { 1194 atom_op_shr, ATOM_ARG_FB}, {
1152 atom_op_shr, ATOM_ARG_PLL}, { 1195 atom_op_shr, ATOM_ARG_PLL}, {
1153 atom_op_shr, ATOM_ARG_MC}, { 1196 atom_op_shr, ATOM_ARG_MC}, {
1154atom_op_debug, 0},}; 1197 atom_op_debug, 0}, {
1198 atom_op_processds, 0}, {
1199 atom_op_mul32, ATOM_ARG_PS}, {
1200 atom_op_mul32, ATOM_ARG_WS}, {
1201 atom_op_div32, ATOM_ARG_PS}, {
1202 atom_op_div32, ATOM_ARG_WS},
1203};
1155 1204
1156static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params) 1205static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params)
1157{ 1206{
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index 09d0f8230708..fece8f45dc7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -60,7 +60,7 @@
60#define ATOM_CT_PS_MASK 0x7F 60#define ATOM_CT_PS_MASK 0x7F
61#define ATOM_CT_CODE_PTR 6 61#define ATOM_CT_CODE_PTR 6
62 62
63#define ATOM_OP_CNT 123 63#define ATOM_OP_CNT 127
64#define ATOM_OP_EOT 91 64#define ATOM_OP_EOT 91
65 65
66#define ATOM_CASE_MAGIC 0x63 66#define ATOM_CASE_MAGIC 0x63
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 82e8d0730517..57a2e347f04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6185,6 +6185,11 @@ static int ci_dpm_late_init(void *handle)
6185 if (!amdgpu_dpm) 6185 if (!amdgpu_dpm)
6186 return 0; 6186 return 0;
6187 6187
6188 /* init the sysfs and debugfs files late */
6189 ret = amdgpu_pm_sysfs_init(adev);
6190 if (ret)
6191 return ret;
6192
6188 ret = ci_set_temperature_range(adev); 6193 ret = ci_set_temperature_range(adev);
6189 if (ret) 6194 if (ret)
6190 return ret; 6195 return ret;
@@ -6232,9 +6237,6 @@ static int ci_dpm_sw_init(void *handle)
6232 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; 6237 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
6233 if (amdgpu_dpm == 1) 6238 if (amdgpu_dpm == 1)
6234 amdgpu_pm_print_power_states(adev); 6239 amdgpu_pm_print_power_states(adev);
6235 ret = amdgpu_pm_sysfs_init(adev);
6236 if (ret)
6237 goto dpm_failed;
6238 mutex_unlock(&adev->pm.mutex); 6240 mutex_unlock(&adev->pm.mutex);
6239 DRM_INFO("amdgpu: dpm initialized\n"); 6241 DRM_INFO("amdgpu: dpm initialized\n");
6240 6242
@@ -6567,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
6567 switch (state) { 6569 switch (state) {
6568 case AMDGPU_IRQ_STATE_DISABLE: 6570 case AMDGPU_IRQ_STATE_DISABLE:
6569 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6571 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6570 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; 6572 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
6571 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6573 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6572 break; 6574 break;
6573 case AMDGPU_IRQ_STATE_ENABLE: 6575 case AMDGPU_IRQ_STATE_ENABLE:
6574 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6576 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6575 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; 6577 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
6576 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6578 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6577 break; 6579 break;
6578 default: 6580 default:
@@ -6584,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
6584 switch (state) { 6586 switch (state) {
6585 case AMDGPU_IRQ_STATE_DISABLE: 6587 case AMDGPU_IRQ_STATE_DISABLE:
6586 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6588 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6587 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; 6589 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
6588 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6590 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6589 break; 6591 break;
6590 case AMDGPU_IRQ_STATE_ENABLE: 6592 case AMDGPU_IRQ_STATE_ENABLE:
6591 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); 6593 cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
6592 cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; 6594 cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
6593 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); 6595 WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
6594 break; 6596 break;
6595 default: 6597 default:
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 4b6ce74753cd..484710cfdf82 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1567,6 +1567,9 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
1567 int ret, i; 1567 int ret, i;
1568 u16 tmp16; 1568 u16 tmp16;
1569 1569
1570 if (pci_is_root_bus(adev->pdev->bus))
1571 return;
1572
1570 if (amdgpu_pcie_gen2 == 0) 1573 if (amdgpu_pcie_gen2 == 0)
1571 return; 1574 return;
1572 1575
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 9ea9de457da3..5f712ceddf08 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -96,7 +96,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
96{ 96{
97 const char *chip_name; 97 const char *chip_name;
98 char fw_name[30]; 98 char fw_name[30];
99 int err, i; 99 int err = 0, i;
100 100
101 DRM_DEBUG("\n"); 101 DRM_DEBUG("\n");
102 102
@@ -119,24 +119,24 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
119 default: BUG(); 119 default: BUG();
120 } 120 }
121 121
122 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 122 for (i = 0; i < adev->sdma.num_instances; i++) {
123 if (i == 0) 123 if (i == 0)
124 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); 124 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
125 else 125 else
126 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name); 126 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name);
127 err = request_firmware(&adev->sdma[i].fw, fw_name, adev->dev); 127 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
128 if (err) 128 if (err)
129 goto out; 129 goto out;
130 err = amdgpu_ucode_validate(adev->sdma[i].fw); 130 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
131 } 131 }
132out: 132out:
133 if (err) { 133 if (err) {
134 printk(KERN_ERR 134 printk(KERN_ERR
135 "cik_sdma: Failed to load firmware \"%s\"\n", 135 "cik_sdma: Failed to load firmware \"%s\"\n",
136 fw_name); 136 fw_name);
137 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 137 for (i = 0; i < adev->sdma.num_instances; i++) {
138 release_firmware(adev->sdma[i].fw); 138 release_firmware(adev->sdma.instance[i].fw);
139 adev->sdma[i].fw = NULL; 139 adev->sdma.instance[i].fw = NULL;
140 } 140 }
141 } 141 }
142 return err; 142 return err;
@@ -168,7 +168,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
168static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) 168static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
169{ 169{
170 struct amdgpu_device *adev = ring->adev; 170 struct amdgpu_device *adev = ring->adev;
171 u32 me = (ring == &adev->sdma[0].ring) ? 0 : 1; 171 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
172 172
173 return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; 173 return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
174} 174}
@@ -183,14 +183,14 @@ static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
183static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) 183static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
184{ 184{
185 struct amdgpu_device *adev = ring->adev; 185 struct amdgpu_device *adev = ring->adev;
186 u32 me = (ring == &adev->sdma[0].ring) ? 0 : 1; 186 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
187 187
188 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 188 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
189} 189}
190 190
191static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 191static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
192{ 192{
193 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); 193 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
194 int i; 194 int i;
195 195
196 for (i = 0; i < count; i++) 196 for (i = 0; i < count; i++)
@@ -248,7 +248,7 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
248 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */ 248 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
249 u32 ref_and_mask; 249 u32 ref_and_mask;
250 250
251 if (ring == &ring->adev->sdma[0].ring) 251 if (ring == &ring->adev->sdma.instance[0].ring)
252 ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK; 252 ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
253 else 253 else
254 ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK; 254 ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
@@ -327,8 +327,8 @@ static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring,
327 */ 327 */
328static void cik_sdma_gfx_stop(struct amdgpu_device *adev) 328static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
329{ 329{
330 struct amdgpu_ring *sdma0 = &adev->sdma[0].ring; 330 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
331 struct amdgpu_ring *sdma1 = &adev->sdma[1].ring; 331 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
332 u32 rb_cntl; 332 u32 rb_cntl;
333 int i; 333 int i;
334 334
@@ -336,7 +336,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
336 (adev->mman.buffer_funcs_ring == sdma1)) 336 (adev->mman.buffer_funcs_ring == sdma1))
337 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 337 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
338 338
339 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 339 for (i = 0; i < adev->sdma.num_instances; i++) {
340 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 340 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
341 rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK; 341 rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
342 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 342 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
@@ -376,7 +376,7 @@ static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
376 cik_sdma_rlc_stop(adev); 376 cik_sdma_rlc_stop(adev);
377 } 377 }
378 378
379 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 379 for (i = 0; i < adev->sdma.num_instances; i++) {
380 me_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 380 me_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
381 if (enable) 381 if (enable)
382 me_cntl &= ~SDMA0_F32_CNTL__HALT_MASK; 382 me_cntl &= ~SDMA0_F32_CNTL__HALT_MASK;
@@ -402,8 +402,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
402 u32 wb_offset; 402 u32 wb_offset;
403 int i, j, r; 403 int i, j, r;
404 404
405 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 405 for (i = 0; i < adev->sdma.num_instances; i++) {
406 ring = &adev->sdma[i].ring; 406 ring = &adev->sdma.instance[i].ring;
407 wb_offset = (ring->rptr_offs * 4); 407 wb_offset = (ring->rptr_offs * 4);
408 408
409 mutex_lock(&adev->srbm_mutex); 409 mutex_lock(&adev->srbm_mutex);
@@ -502,26 +502,25 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev)
502 u32 fw_size; 502 u32 fw_size;
503 int i, j; 503 int i, j;
504 504
505 if (!adev->sdma[0].fw || !adev->sdma[1].fw)
506 return -EINVAL;
507
508 /* halt the MEs */ 505 /* halt the MEs */
509 cik_sdma_enable(adev, false); 506 cik_sdma_enable(adev, false);
510 507
511 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 508 for (i = 0; i < adev->sdma.num_instances; i++) {
512 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 509 if (!adev->sdma.instance[i].fw)
510 return -EINVAL;
511 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
513 amdgpu_ucode_print_sdma_hdr(&hdr->header); 512 amdgpu_ucode_print_sdma_hdr(&hdr->header);
514 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 513 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
515 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 514 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
516 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 515 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
517 if (adev->sdma[i].feature_version >= 20) 516 if (adev->sdma.instance[i].feature_version >= 20)
518 adev->sdma[i].burst_nop = true; 517 adev->sdma.instance[i].burst_nop = true;
519 fw_data = (const __le32 *) 518 fw_data = (const __le32 *)
520 (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 519 (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
521 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 520 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
522 for (j = 0; j < fw_size; j++) 521 for (j = 0; j < fw_size; j++)
523 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 522 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
524 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma[i].fw_version); 523 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
525 } 524 }
526 525
527 return 0; 526 return 0;
@@ -830,7 +829,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
830 */ 829 */
831static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) 830static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
832{ 831{
833 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); 832 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
834 u32 pad_count; 833 u32 pad_count;
835 int i; 834 int i;
836 835
@@ -934,6 +933,8 @@ static int cik_sdma_early_init(void *handle)
934{ 933{
935 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 934 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
936 935
936 adev->sdma.num_instances = SDMA_MAX_INSTANCE;
937
937 cik_sdma_set_ring_funcs(adev); 938 cik_sdma_set_ring_funcs(adev);
938 cik_sdma_set_irq_funcs(adev); 939 cik_sdma_set_irq_funcs(adev);
939 cik_sdma_set_buffer_funcs(adev); 940 cik_sdma_set_buffer_funcs(adev);
@@ -946,7 +947,7 @@ static int cik_sdma_sw_init(void *handle)
946{ 947{
947 struct amdgpu_ring *ring; 948 struct amdgpu_ring *ring;
948 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 949 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
949 int r; 950 int r, i;
950 951
951 r = cik_sdma_init_microcode(adev); 952 r = cik_sdma_init_microcode(adev);
952 if (r) { 953 if (r) {
@@ -955,43 +956,33 @@ static int cik_sdma_sw_init(void *handle)
955 } 956 }
956 957
957 /* SDMA trap event */ 958 /* SDMA trap event */
958 r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq); 959 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq);
959 if (r) 960 if (r)
960 return r; 961 return r;
961 962
962 /* SDMA Privileged inst */ 963 /* SDMA Privileged inst */
963 r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq); 964 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq);
964 if (r) 965 if (r)
965 return r; 966 return r;
966 967
967 /* SDMA Privileged inst */ 968 /* SDMA Privileged inst */
968 r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq); 969 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq);
969 if (r) 970 if (r)
970 return r; 971 return r;
971 972
972 ring = &adev->sdma[0].ring; 973 for (i = 0; i < adev->sdma.num_instances; i++) {
973 ring->ring_obj = NULL; 974 ring = &adev->sdma.instance[i].ring;
974 975 ring->ring_obj = NULL;
975 ring = &adev->sdma[1].ring; 976 sprintf(ring->name, "sdma%d", i);
976 ring->ring_obj = NULL; 977 r = amdgpu_ring_init(adev, ring, 256 * 1024,
977 978 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf,
978 ring = &adev->sdma[0].ring; 979 &adev->sdma.trap_irq,
979 sprintf(ring->name, "sdma0"); 980 (i == 0) ?
980 r = amdgpu_ring_init(adev, ring, 256 * 1024, 981 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
981 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf, 982 AMDGPU_RING_TYPE_SDMA);
982 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0, 983 if (r)
983 AMDGPU_RING_TYPE_SDMA); 984 return r;
984 if (r) 985 }
985 return r;
986
987 ring = &adev->sdma[1].ring;
988 sprintf(ring->name, "sdma1");
989 r = amdgpu_ring_init(adev, ring, 256 * 1024,
990 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf,
991 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1,
992 AMDGPU_RING_TYPE_SDMA);
993 if (r)
994 return r;
995 986
996 return r; 987 return r;
997} 988}
@@ -999,9 +990,10 @@ static int cik_sdma_sw_init(void *handle)
999static int cik_sdma_sw_fini(void *handle) 990static int cik_sdma_sw_fini(void *handle)
1000{ 991{
1001 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 992 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
993 int i;
1002 994
1003 amdgpu_ring_fini(&adev->sdma[0].ring); 995 for (i = 0; i < adev->sdma.num_instances; i++)
1004 amdgpu_ring_fini(&adev->sdma[1].ring); 996 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1005 997
1006 return 0; 998 return 0;
1007} 999}
@@ -1078,7 +1070,7 @@ static void cik_sdma_print_status(void *handle)
1078 dev_info(adev->dev, "CIK SDMA registers\n"); 1070 dev_info(adev->dev, "CIK SDMA registers\n");
1079 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", 1071 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
1080 RREG32(mmSRBM_STATUS2)); 1072 RREG32(mmSRBM_STATUS2));
1081 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 1073 for (i = 0; i < adev->sdma.num_instances; i++) {
1082 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", 1074 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n",
1083 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); 1075 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
1084 dev_info(adev->dev, " SDMA%d_ME_CNTL=0x%08X\n", 1076 dev_info(adev->dev, " SDMA%d_ME_CNTL=0x%08X\n",
@@ -1223,7 +1215,7 @@ static int cik_sdma_process_trap_irq(struct amdgpu_device *adev,
1223 case 0: 1215 case 0:
1224 switch (queue_id) { 1216 switch (queue_id) {
1225 case 0: 1217 case 0:
1226 amdgpu_fence_process(&adev->sdma[0].ring); 1218 amdgpu_fence_process(&adev->sdma.instance[0].ring);
1227 break; 1219 break;
1228 case 1: 1220 case 1:
1229 /* XXX compute */ 1221 /* XXX compute */
@@ -1236,7 +1228,7 @@ static int cik_sdma_process_trap_irq(struct amdgpu_device *adev,
1236 case 1: 1228 case 1:
1237 switch (queue_id) { 1229 switch (queue_id) {
1238 case 0: 1230 case 0:
1239 amdgpu_fence_process(&adev->sdma[1].ring); 1231 amdgpu_fence_process(&adev->sdma.instance[1].ring);
1240 break; 1232 break;
1241 case 1: 1233 case 1:
1242 /* XXX compute */ 1234 /* XXX compute */
@@ -1298,24 +1290,6 @@ const struct amd_ip_funcs cik_sdma_ip_funcs = {
1298 .set_powergating_state = cik_sdma_set_powergating_state, 1290 .set_powergating_state = cik_sdma_set_powergating_state,
1299}; 1291};
1300 1292
1301/**
1302 * cik_sdma_ring_is_lockup - Check if the DMA engine is locked up
1303 *
1304 * @ring: amdgpu_ring structure holding ring information
1305 *
1306 * Check if the async DMA engine is locked up (CIK).
1307 * Returns true if the engine appears to be locked up, false if not.
1308 */
1309static bool cik_sdma_ring_is_lockup(struct amdgpu_ring *ring)
1310{
1311
1312 if (cik_sdma_is_idle(ring->adev)) {
1313 amdgpu_ring_lockup_update(ring);
1314 return false;
1315 }
1316 return amdgpu_ring_test_lockup(ring);
1317}
1318
1319static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { 1293static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1320 .get_rptr = cik_sdma_ring_get_rptr, 1294 .get_rptr = cik_sdma_ring_get_rptr,
1321 .get_wptr = cik_sdma_ring_get_wptr, 1295 .get_wptr = cik_sdma_ring_get_wptr,
@@ -1328,14 +1302,15 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1328 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, 1302 .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
1329 .test_ring = cik_sdma_ring_test_ring, 1303 .test_ring = cik_sdma_ring_test_ring,
1330 .test_ib = cik_sdma_ring_test_ib, 1304 .test_ib = cik_sdma_ring_test_ib,
1331 .is_lockup = cik_sdma_ring_is_lockup,
1332 .insert_nop = cik_sdma_ring_insert_nop, 1305 .insert_nop = cik_sdma_ring_insert_nop,
1333}; 1306};
1334 1307
1335static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) 1308static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
1336{ 1309{
1337 adev->sdma[0].ring.funcs = &cik_sdma_ring_funcs; 1310 int i;
1338 adev->sdma[1].ring.funcs = &cik_sdma_ring_funcs; 1311
1312 for (i = 0; i < adev->sdma.num_instances; i++)
1313 adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs;
1339} 1314}
1340 1315
1341static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = { 1316static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = {
@@ -1349,9 +1324,9 @@ static const struct amdgpu_irq_src_funcs cik_sdma_illegal_inst_irq_funcs = {
1349 1324
1350static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) 1325static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
1351{ 1326{
1352 adev->sdma_trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1327 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1353 adev->sdma_trap_irq.funcs = &cik_sdma_trap_irq_funcs; 1328 adev->sdma.trap_irq.funcs = &cik_sdma_trap_irq_funcs;
1354 adev->sdma_illegal_inst_irq.funcs = &cik_sdma_illegal_inst_irq_funcs; 1329 adev->sdma.illegal_inst_irq.funcs = &cik_sdma_illegal_inst_irq_funcs;
1355} 1330}
1356 1331
1357/** 1332/**
@@ -1416,7 +1391,7 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
1416{ 1391{
1417 if (adev->mman.buffer_funcs == NULL) { 1392 if (adev->mman.buffer_funcs == NULL) {
1418 adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; 1393 adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
1419 adev->mman.buffer_funcs_ring = &adev->sdma[0].ring; 1394 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1420 } 1395 }
1421} 1396}
1422 1397
@@ -1431,7 +1406,7 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
1431{ 1406{
1432 if (adev->vm_manager.vm_pte_funcs == NULL) { 1407 if (adev->vm_manager.vm_pte_funcs == NULL) {
1433 adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; 1408 adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
1434 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; 1409 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
1435 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1410 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
1436 } 1411 }
1437} 1412}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 44fa96ad4709..8035d4d6a4f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -596,6 +596,12 @@ static int cz_dpm_late_init(void *handle)
596 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 596 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
597 597
598 if (amdgpu_dpm) { 598 if (amdgpu_dpm) {
599 int ret;
600 /* init the sysfs and debugfs files late */
601 ret = amdgpu_pm_sysfs_init(adev);
602 if (ret)
603 return ret;
604
599 /* powerdown unused blocks for now */ 605 /* powerdown unused blocks for now */
600 cz_dpm_powergate_uvd(adev, true); 606 cz_dpm_powergate_uvd(adev, true);
601 cz_dpm_powergate_vce(adev, true); 607 cz_dpm_powergate_vce(adev, true);
@@ -632,10 +638,6 @@ static int cz_dpm_sw_init(void *handle)
632 if (amdgpu_dpm == 1) 638 if (amdgpu_dpm == 1)
633 amdgpu_pm_print_power_states(adev); 639 amdgpu_pm_print_power_states(adev);
634 640
635 ret = amdgpu_pm_sysfs_init(adev);
636 if (ret)
637 goto dpm_init_failed;
638
639 mutex_unlock(&adev->pm.mutex); 641 mutex_unlock(&adev->pm.mutex);
640 DRM_INFO("amdgpu: dpm initialized\n"); 642 DRM_INFO("amdgpu: dpm initialized\n");
641 643
@@ -1262,6 +1264,7 @@ static void cz_apply_state_adjust_rules(struct amdgpu_device *adev,
1262 1264
1263static int cz_dpm_enable(struct amdgpu_device *adev) 1265static int cz_dpm_enable(struct amdgpu_device *adev)
1264{ 1266{
1267 const char *chip_name;
1265 int ret = 0; 1268 int ret = 0;
1266 1269
1267 /* renable will hang up SMU, so check first */ 1270 /* renable will hang up SMU, so check first */
@@ -1270,21 +1273,33 @@ static int cz_dpm_enable(struct amdgpu_device *adev)
1270 1273
1271 cz_program_voting_clients(adev); 1274 cz_program_voting_clients(adev);
1272 1275
1276 switch (adev->asic_type) {
1277 case CHIP_CARRIZO:
1278 chip_name = "carrizo";
1279 break;
1280 case CHIP_STONEY:
1281 chip_name = "stoney";
1282 break;
1283 default:
1284 BUG();
1285 }
1286
1287
1273 ret = cz_start_dpm(adev); 1288 ret = cz_start_dpm(adev);
1274 if (ret) { 1289 if (ret) {
1275 DRM_ERROR("Carrizo DPM enable failed\n"); 1290 DRM_ERROR("%s DPM enable failed\n", chip_name);
1276 return -EINVAL; 1291 return -EINVAL;
1277 } 1292 }
1278 1293
1279 ret = cz_program_bootup_state(adev); 1294 ret = cz_program_bootup_state(adev);
1280 if (ret) { 1295 if (ret) {
1281 DRM_ERROR("Carrizo bootup state program failed\n"); 1296 DRM_ERROR("%s bootup state program failed\n", chip_name);
1282 return -EINVAL; 1297 return -EINVAL;
1283 } 1298 }
1284 1299
1285 ret = cz_enable_didt(adev, true); 1300 ret = cz_enable_didt(adev, true);
1286 if (ret) { 1301 if (ret) {
1287 DRM_ERROR("Carrizo enable di/dt failed\n"); 1302 DRM_ERROR("%s enable di/dt failed\n", chip_name);
1288 return -EINVAL; 1303 return -EINVAL;
1289 } 1304 }
1290 1305
@@ -1351,7 +1366,7 @@ static int cz_dpm_disable(struct amdgpu_device *adev)
1351 1366
1352 ret = cz_enable_didt(adev, false); 1367 ret = cz_enable_didt(adev, false);
1353 if (ret) { 1368 if (ret) {
1354 DRM_ERROR("Carrizo disable di/dt failed\n"); 1369 DRM_ERROR("disable di/dt failed\n");
1355 return -EINVAL; 1370 return -EINVAL;
1356 } 1371 }
1357 1372
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
index e33180d3314a..ac7fee7b7eca 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
@@ -312,13 +312,16 @@ int cz_smu_start(struct amdgpu_device *adev)
312 UCODE_ID_CP_MEC_JT1_MASK | 312 UCODE_ID_CP_MEC_JT1_MASK |
313 UCODE_ID_CP_MEC_JT2_MASK; 313 UCODE_ID_CP_MEC_JT2_MASK;
314 314
315 if (adev->asic_type == CHIP_STONEY)
316 fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
317
315 cz_smu_request_load_fw(adev); 318 cz_smu_request_load_fw(adev);
316 ret = cz_smu_check_fw_load_finish(adev, fw_to_check); 319 ret = cz_smu_check_fw_load_finish(adev, fw_to_check);
317 if (ret) 320 if (ret)
318 return ret; 321 return ret;
319 322
320 /* manually load MEC firmware for CZ */ 323 /* manually load MEC firmware for CZ */
321 if (adev->asic_type == CHIP_CARRIZO) { 324 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) {
322 ret = cz_load_mec_firmware(adev); 325 ret = cz_load_mec_firmware(adev);
323 if (ret) { 326 if (ret) {
324 dev_err(adev->dev, "(%d) Mec Firmware load failed\n", ret); 327 dev_err(adev->dev, "(%d) Mec Firmware load failed\n", ret);
@@ -336,6 +339,9 @@ int cz_smu_start(struct amdgpu_device *adev)
336 AMDGPU_CPMEC2_UCODE_LOADED | 339 AMDGPU_CPMEC2_UCODE_LOADED |
337 AMDGPU_CPRLC_UCODE_LOADED; 340 AMDGPU_CPRLC_UCODE_LOADED;
338 341
342 if (adev->asic_type == CHIP_STONEY)
343 adev->smu.fw_flags &= ~(AMDGPU_SDMA1_UCODE_LOADED | AMDGPU_CPMEC2_UCODE_LOADED);
344
339 return ret; 345 return ret;
340} 346}
341 347
@@ -601,8 +607,13 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct amdgpu_device *adev)
601 CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false); 607 CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
602 cz_smu_populate_single_ucode_load_task(adev, 608 cz_smu_populate_single_ucode_load_task(adev,
603 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); 609 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
604 cz_smu_populate_single_ucode_load_task(adev, 610 if (adev->asic_type == CHIP_STONEY) {
611 cz_smu_populate_single_ucode_load_task(adev,
612 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
613 } else {
614 cz_smu_populate_single_ucode_load_task(adev,
605 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false); 615 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
616 }
606 cz_smu_populate_single_ucode_load_task(adev, 617 cz_smu_populate_single_ucode_load_task(adev,
607 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false); 618 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
608 } 619 }
@@ -642,8 +653,13 @@ static int cz_smu_construct_toc_for_bootup(struct amdgpu_device *adev)
642 if (adev->firmware.smu_load) { 653 if (adev->firmware.smu_load) {
643 cz_smu_populate_single_ucode_load_task(adev, 654 cz_smu_populate_single_ucode_load_task(adev,
644 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false); 655 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
645 cz_smu_populate_single_ucode_load_task(adev, 656 if (adev->asic_type == CHIP_STONEY) {
657 cz_smu_populate_single_ucode_load_task(adev,
658 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
659 } else {
660 cz_smu_populate_single_ucode_load_task(adev,
646 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false); 661 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
662 }
647 cz_smu_populate_single_ucode_load_task(adev, 663 cz_smu_populate_single_ucode_load_task(adev,
648 CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false); 664 CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
649 cz_smu_populate_single_ucode_load_task(adev, 665 cz_smu_populate_single_ucode_load_task(adev,
@@ -652,8 +668,13 @@ static int cz_smu_construct_toc_for_bootup(struct amdgpu_device *adev)
652 CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false); 668 CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
653 cz_smu_populate_single_ucode_load_task(adev, 669 cz_smu_populate_single_ucode_load_task(adev,
654 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false); 670 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
655 cz_smu_populate_single_ucode_load_task(adev, 671 if (adev->asic_type == CHIP_STONEY) {
672 cz_smu_populate_single_ucode_load_task(adev,
673 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
674 } else {
675 cz_smu_populate_single_ucode_load_task(adev,
656 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false); 676 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
677 }
657 cz_smu_populate_single_ucode_load_task(adev, 678 cz_smu_populate_single_ucode_load_task(adev,
658 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true); 679 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
659 } 680 }
@@ -888,10 +909,18 @@ int cz_smu_init(struct amdgpu_device *adev)
888 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, 909 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0,
889 &priv->driver_buffer[priv->driver_buffer_length++])) 910 &priv->driver_buffer[priv->driver_buffer_length++]))
890 goto smu_init_failed; 911 goto smu_init_failed;
891 if (cz_smu_populate_single_firmware_entry(adev, 912
892 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, 913 if (adev->asic_type == CHIP_STONEY) {
893 &priv->driver_buffer[priv->driver_buffer_length++])) 914 if (cz_smu_populate_single_firmware_entry(adev,
894 goto smu_init_failed; 915 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0,
916 &priv->driver_buffer[priv->driver_buffer_length++]))
917 goto smu_init_failed;
918 } else {
919 if (cz_smu_populate_single_firmware_entry(adev,
920 CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1,
921 &priv->driver_buffer[priv->driver_buffer_length++]))
922 goto smu_init_failed;
923 }
895 if (cz_smu_populate_single_firmware_entry(adev, 924 if (cz_smu_populate_single_firmware_entry(adev,
896 CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, 925 CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE,
897 &priv->driver_buffer[priv->driver_buffer_length++])) 926 &priv->driver_buffer[priv->driver_buffer_length++]))
@@ -908,10 +937,17 @@ int cz_smu_init(struct amdgpu_device *adev)
908 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, 937 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1,
909 &priv->driver_buffer[priv->driver_buffer_length++])) 938 &priv->driver_buffer[priv->driver_buffer_length++]))
910 goto smu_init_failed; 939 goto smu_init_failed;
911 if (cz_smu_populate_single_firmware_entry(adev, 940 if (adev->asic_type == CHIP_STONEY) {
912 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, 941 if (cz_smu_populate_single_firmware_entry(adev,
913 &priv->driver_buffer[priv->driver_buffer_length++])) 942 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1,
914 goto smu_init_failed; 943 &priv->driver_buffer[priv->driver_buffer_length++]))
944 goto smu_init_failed;
945 } else {
946 if (cz_smu_populate_single_firmware_entry(adev,
947 CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2,
948 &priv->driver_buffer[priv->driver_buffer_length++]))
949 goto smu_init_failed;
950 }
915 if (cz_smu_populate_single_firmware_entry(adev, 951 if (cz_smu_populate_single_firmware_entry(adev,
916 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, 952 CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G,
917 &priv->driver_buffer[priv->driver_buffer_length++])) 953 &priv->driver_buffer[priv->driver_buffer_length++]))
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index e4d101b1252a..4dcc8fba5792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -255,6 +255,24 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
255 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); 255 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
256} 256}
257 257
258static void dce_v10_0_pageflip_interrupt_init(struct amdgpu_device *adev)
259{
260 unsigned i;
261
262 /* Enable pflip interrupts */
263 for (i = 0; i < adev->mode_info.num_crtc; i++)
264 amdgpu_irq_get(adev, &adev->pageflip_irq, i);
265}
266
267static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
268{
269 unsigned i;
270
271 /* Disable pflip interrupts */
272 for (i = 0; i < adev->mode_info.num_crtc; i++)
273 amdgpu_irq_put(adev, &adev->pageflip_irq, i);
274}
275
258/** 276/**
259 * dce_v10_0_page_flip - pageflip callback. 277 * dce_v10_0_page_flip - pageflip callback.
260 * 278 *
@@ -262,46 +280,22 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
262 * @crtc_id: crtc to cleanup pageflip on 280 * @crtc_id: crtc to cleanup pageflip on
263 * @crtc_base: new address of the crtc (GPU MC address) 281 * @crtc_base: new address of the crtc (GPU MC address)
264 * 282 *
265 * Does the actual pageflip (evergreen+). 283 * Triggers the actual pageflip by updating the primary
266 * During vblank we take the crtc lock and wait for the update_pending 284 * surface base address.
267 * bit to go high, when it does, we release the lock, and allow the
268 * double buffered update to take place.
269 * Returns the current update pending status.
270 */ 285 */
271static void dce_v10_0_page_flip(struct amdgpu_device *adev, 286static void dce_v10_0_page_flip(struct amdgpu_device *adev,
272 int crtc_id, u64 crtc_base) 287 int crtc_id, u64 crtc_base)
273{ 288{
274 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 289 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
275 u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
276 int i;
277
278 /* Lock the graphics update lock */
279 tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
280 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
281
282 /* update the scanout addresses */
283 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
284 upper_32_bits(crtc_base));
285 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
286 lower_32_bits(crtc_base));
287 290
291 /* update the primary scanout address */
288 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, 292 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
289 upper_32_bits(crtc_base)); 293 upper_32_bits(crtc_base));
294 /* writing to the low address triggers the update */
290 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, 295 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
291 lower_32_bits(crtc_base)); 296 lower_32_bits(crtc_base));
292 297 /* post the write */
293 /* Wait for update_pending to go high. */ 298 RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
294 for (i = 0; i < adev->usec_timeout; i++) {
295 if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
296 GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
297 break;
298 udelay(1);
299 }
300 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
301
302 /* Unlock the lock, so double-buffering can take place inside vblank */
303 tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
304 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
305} 299}
306 300
307static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, 301static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1256,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
1256 u32 pixel_period; 1250 u32 pixel_period;
1257 u32 line_time = 0; 1251 u32 line_time = 0;
1258 u32 latency_watermark_a = 0, latency_watermark_b = 0; 1252 u32 latency_watermark_a = 0, latency_watermark_b = 0;
1259 u32 tmp, wm_mask; 1253 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1260 1254
1261 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1255 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1262 pixel_period = 1000000 / (u32)mode->clock; 1256 pixel_period = 1000000 / (u32)mode->clock;
@@ -1339,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
1339 (adev->mode_info.disp_priority == 2)) { 1333 (adev->mode_info.disp_priority == 2)) {
1340 DRM_DEBUG_KMS("force priority to high\n"); 1334 DRM_DEBUG_KMS("force priority to high\n");
1341 } 1335 }
1336 lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
1342 } 1337 }
1343 1338
1344 /* select wm A */ 1339 /* select wm A */
@@ -1363,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
1363 amdgpu_crtc->line_time = line_time; 1358 amdgpu_crtc->line_time = line_time;
1364 amdgpu_crtc->wm_high = latency_watermark_a; 1359 amdgpu_crtc->wm_high = latency_watermark_a;
1365 amdgpu_crtc->wm_low = latency_watermark_b; 1360 amdgpu_crtc->wm_low = latency_watermark_b;
1361 /* Save number of lines the linebuffer leads before the scanout */
1362 amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
1366} 1363}
1367 1364
1368/** 1365/**
@@ -2499,26 +2496,19 @@ static void dce_v10_0_show_cursor(struct drm_crtc *crtc)
2499 struct amdgpu_device *adev = crtc->dev->dev_private; 2496 struct amdgpu_device *adev = crtc->dev->dev_private;
2500 u32 tmp; 2497 u32 tmp;
2501 2498
2499 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2500 upper_32_bits(amdgpu_crtc->cursor_addr));
2501 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2502 lower_32_bits(amdgpu_crtc->cursor_addr));
2503
2502 tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); 2504 tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
2503 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1); 2505 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
2504 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2); 2506 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
2505 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); 2507 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
2506} 2508}
2507 2509
2508static void dce_v10_0_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj, 2510static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,
2509 uint64_t gpu_addr) 2511 int x, int y)
2510{
2511 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2512 struct amdgpu_device *adev = crtc->dev->dev_private;
2513
2514 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2515 upper_32_bits(gpu_addr));
2516 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2517 lower_32_bits(gpu_addr));
2518}
2519
2520static int dce_v10_0_crtc_cursor_move(struct drm_crtc *crtc,
2521 int x, int y)
2522{ 2512{
2523 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2513 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2524 struct amdgpu_device *adev = crtc->dev->dev_private; 2514 struct amdgpu_device *adev = crtc->dev->dev_private;
@@ -2538,26 +2528,40 @@ static int dce_v10_0_crtc_cursor_move(struct drm_crtc *crtc,
2538 y = 0; 2528 y = 0;
2539 } 2529 }
2540 2530
2541 dce_v10_0_lock_cursor(crtc, true);
2542 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); 2531 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
2543 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); 2532 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
2544 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, 2533 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
2545 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); 2534 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
2546 dce_v10_0_lock_cursor(crtc, false); 2535
2536 amdgpu_crtc->cursor_x = x;
2537 amdgpu_crtc->cursor_y = y;
2547 2538
2548 return 0; 2539 return 0;
2549} 2540}
2550 2541
2551static int dce_v10_0_crtc_cursor_set(struct drm_crtc *crtc, 2542static int dce_v10_0_crtc_cursor_move(struct drm_crtc *crtc,
2552 struct drm_file *file_priv, 2543 int x, int y)
2553 uint32_t handle, 2544{
2554 uint32_t width, 2545 int ret;
2555 uint32_t height) 2546
2547 dce_v10_0_lock_cursor(crtc, true);
2548 ret = dce_v10_0_cursor_move_locked(crtc, x, y);
2549 dce_v10_0_lock_cursor(crtc, false);
2550
2551 return ret;
2552}
2553
2554static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
2555 struct drm_file *file_priv,
2556 uint32_t handle,
2557 uint32_t width,
2558 uint32_t height,
2559 int32_t hot_x,
2560 int32_t hot_y)
2556{ 2561{
2557 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2562 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2558 struct drm_gem_object *obj; 2563 struct drm_gem_object *obj;
2559 struct amdgpu_bo *robj; 2564 struct amdgpu_bo *aobj;
2560 uint64_t gpu_addr;
2561 int ret; 2565 int ret;
2562 2566
2563 if (!handle) { 2567 if (!handle) {
@@ -2579,41 +2583,71 @@ static int dce_v10_0_crtc_cursor_set(struct drm_crtc *crtc,
2579 return -ENOENT; 2583 return -ENOENT;
2580 } 2584 }
2581 2585
2582 robj = gem_to_amdgpu_bo(obj); 2586 aobj = gem_to_amdgpu_bo(obj);
2583 ret = amdgpu_bo_reserve(robj, false); 2587 ret = amdgpu_bo_reserve(aobj, false);
2584 if (unlikely(ret != 0)) 2588 if (ret != 0) {
2585 goto fail; 2589 drm_gem_object_unreference_unlocked(obj);
2586 ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, 2590 return ret;
2587 0, 0, &gpu_addr); 2591 }
2588 amdgpu_bo_unreserve(robj); 2592
2589 if (ret) 2593 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
2590 goto fail; 2594 amdgpu_bo_unreserve(aobj);
2595 if (ret) {
2596 DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
2597 drm_gem_object_unreference_unlocked(obj);
2598 return ret;
2599 }
2591 2600
2592 amdgpu_crtc->cursor_width = width; 2601 amdgpu_crtc->cursor_width = width;
2593 amdgpu_crtc->cursor_height = height; 2602 amdgpu_crtc->cursor_height = height;
2594 2603
2595 dce_v10_0_lock_cursor(crtc, true); 2604 dce_v10_0_lock_cursor(crtc, true);
2596 dce_v10_0_set_cursor(crtc, obj, gpu_addr); 2605
2606 if (hot_x != amdgpu_crtc->cursor_hot_x ||
2607 hot_y != amdgpu_crtc->cursor_hot_y) {
2608 int x, y;
2609
2610 x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
2611 y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
2612
2613 dce_v10_0_cursor_move_locked(crtc, x, y);
2614
2615 amdgpu_crtc->cursor_hot_x = hot_x;
2616 amdgpu_crtc->cursor_hot_y = hot_y;
2617 }
2618
2597 dce_v10_0_show_cursor(crtc); 2619 dce_v10_0_show_cursor(crtc);
2598 dce_v10_0_lock_cursor(crtc, false); 2620 dce_v10_0_lock_cursor(crtc, false);
2599 2621
2600unpin: 2622unpin:
2601 if (amdgpu_crtc->cursor_bo) { 2623 if (amdgpu_crtc->cursor_bo) {
2602 robj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2624 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2603 ret = amdgpu_bo_reserve(robj, false); 2625 ret = amdgpu_bo_reserve(aobj, false);
2604 if (likely(ret == 0)) { 2626 if (likely(ret == 0)) {
2605 amdgpu_bo_unpin(robj); 2627 amdgpu_bo_unpin(aobj);
2606 amdgpu_bo_unreserve(robj); 2628 amdgpu_bo_unreserve(aobj);
2607 } 2629 }
2608 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); 2630 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo);
2609 } 2631 }
2610 2632
2611 amdgpu_crtc->cursor_bo = obj; 2633 amdgpu_crtc->cursor_bo = obj;
2612 return 0; 2634 return 0;
2613fail: 2635}
2614 drm_gem_object_unreference_unlocked(obj);
2615 2636
2616 return ret; 2637static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
2638{
2639 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2640
2641 if (amdgpu_crtc->cursor_bo) {
2642 dce_v10_0_lock_cursor(crtc, true);
2643
2644 dce_v10_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
2645 amdgpu_crtc->cursor_y);
2646
2647 dce_v10_0_show_cursor(crtc);
2648
2649 dce_v10_0_lock_cursor(crtc, false);
2650 }
2617} 2651}
2618 2652
2619static void dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, 2653static void dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
@@ -2641,7 +2675,7 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
2641} 2675}
2642 2676
2643static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = { 2677static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
2644 .cursor_set = dce_v10_0_crtc_cursor_set, 2678 .cursor_set2 = dce_v10_0_crtc_cursor_set2,
2645 .cursor_move = dce_v10_0_crtc_cursor_move, 2679 .cursor_move = dce_v10_0_crtc_cursor_move,
2646 .gamma_set = dce_v10_0_crtc_gamma_set, 2680 .gamma_set = dce_v10_0_crtc_gamma_set,
2647 .set_config = amdgpu_crtc_set_config, 2681 .set_config = amdgpu_crtc_set_config,
@@ -2663,9 +2697,10 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2663 dce_v10_0_vga_enable(crtc, true); 2697 dce_v10_0_vga_enable(crtc, true);
2664 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2698 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2665 dce_v10_0_vga_enable(crtc, false); 2699 dce_v10_0_vga_enable(crtc, false);
2666 /* Make sure VBLANK interrupt is still enabled */ 2700 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2667 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2701 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
2668 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2702 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2703 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2669 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id); 2704 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
2670 dce_v10_0_crtc_load_lut(crtc); 2705 dce_v10_0_crtc_load_lut(crtc);
2671 break; 2706 break;
@@ -2774,6 +2809,7 @@ static int dce_v10_0_crtc_mode_set(struct drm_crtc *crtc,
2774 dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0); 2809 dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
2775 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); 2810 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
2776 amdgpu_atombios_crtc_scaler_setup(crtc); 2811 amdgpu_atombios_crtc_scaler_setup(crtc);
2812 dce_v10_0_cursor_reset(crtc);
2777 /* update the hw version fpr dpm */ 2813 /* update the hw version fpr dpm */
2778 amdgpu_crtc->hw_mode = *adjusted_mode; 2814 amdgpu_crtc->hw_mode = *adjusted_mode;
2779 2815
@@ -3025,6 +3061,8 @@ static int dce_v10_0_hw_init(void *handle)
3025 dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 3061 dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
3026 } 3062 }
3027 3063
3064 dce_v10_0_pageflip_interrupt_init(adev);
3065
3028 return 0; 3066 return 0;
3029} 3067}
3030 3068
@@ -3039,6 +3077,8 @@ static int dce_v10_0_hw_fini(void *handle)
3039 dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 3077 dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
3040 } 3078 }
3041 3079
3080 dce_v10_0_pageflip_interrupt_fini(adev);
3081
3042 return 0; 3082 return 0;
3043} 3083}
3044 3084
@@ -3048,22 +3088,18 @@ static int dce_v10_0_suspend(void *handle)
3048 3088
3049 amdgpu_atombios_scratch_regs_save(adev); 3089 amdgpu_atombios_scratch_regs_save(adev);
3050 3090
3051 dce_v10_0_hpd_fini(adev); 3091 return dce_v10_0_hw_fini(handle);
3052
3053 return 0;
3054} 3092}
3055 3093
3056static int dce_v10_0_resume(void *handle) 3094static int dce_v10_0_resume(void *handle)
3057{ 3095{
3058 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3096 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3097 int ret;
3059 3098
3060 dce_v10_0_init_golden_registers(adev); 3099 ret = dce_v10_0_hw_init(handle);
3061 3100
3062 amdgpu_atombios_scratch_regs_restore(adev); 3101 amdgpu_atombios_scratch_regs_restore(adev);
3063 3102
3064 /* init dig PHYs, disp eng pll */
3065 amdgpu_atombios_encoder_init_dig(adev);
3066 amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
3067 /* turn on the BL */ 3103 /* turn on the BL */
3068 if (adev->mode_info.bl_encoder) { 3104 if (adev->mode_info.bl_encoder) {
3069 u8 bl_level = amdgpu_display_backlight_get_level(adev, 3105 u8 bl_level = amdgpu_display_backlight_get_level(adev,
@@ -3072,10 +3108,7 @@ static int dce_v10_0_resume(void *handle)
3072 bl_level); 3108 bl_level);
3073 } 3109 }
3074 3110
3075 /* initialize hpd */ 3111 return ret;
3076 dce_v10_0_hpd_init(adev);
3077
3078 return 0;
3079} 3112}
3080 3113
3081static bool dce_v10_0_is_idle(void *handle) 3114static bool dce_v10_0_is_idle(void *handle)
@@ -3267,37 +3300,20 @@ static int dce_v10_0_set_pageflip_irq_state(struct amdgpu_device *adev,
3267 unsigned type, 3300 unsigned type,
3268 enum amdgpu_interrupt_state state) 3301 enum amdgpu_interrupt_state state)
3269{ 3302{
3270 u32 reg, reg_block; 3303 u32 reg;
3271 /* now deal with page flip IRQ */ 3304
3272 switch (type) { 3305 if (type >= adev->mode_info.num_crtc) {
3273 case AMDGPU_PAGEFLIP_IRQ_D1: 3306 DRM_ERROR("invalid pageflip crtc %d\n", type);
3274 reg_block = CRTC0_REGISTER_OFFSET; 3307 return -EINVAL;
3275 break;
3276 case AMDGPU_PAGEFLIP_IRQ_D2:
3277 reg_block = CRTC1_REGISTER_OFFSET;
3278 break;
3279 case AMDGPU_PAGEFLIP_IRQ_D3:
3280 reg_block = CRTC2_REGISTER_OFFSET;
3281 break;
3282 case AMDGPU_PAGEFLIP_IRQ_D4:
3283 reg_block = CRTC3_REGISTER_OFFSET;
3284 break;
3285 case AMDGPU_PAGEFLIP_IRQ_D5:
3286 reg_block = CRTC4_REGISTER_OFFSET;
3287 break;
3288 case AMDGPU_PAGEFLIP_IRQ_D6:
3289 reg_block = CRTC5_REGISTER_OFFSET;
3290 break;
3291 default:
3292 DRM_ERROR("invalid pageflip crtc %d\n", type);
3293 return -EINVAL;
3294 } 3308 }
3295 3309
3296 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + reg_block); 3310 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
3297 if (state == AMDGPU_IRQ_STATE_DISABLE) 3311 if (state == AMDGPU_IRQ_STATE_DISABLE)
3298 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3312 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3313 reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3299 else 3314 else
3300 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3315 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3316 reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3301 3317
3302 return 0; 3318 return 0;
3303} 3319}
@@ -3306,7 +3322,6 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
3306 struct amdgpu_irq_src *source, 3322 struct amdgpu_irq_src *source,
3307 struct amdgpu_iv_entry *entry) 3323 struct amdgpu_iv_entry *entry)
3308{ 3324{
3309 int reg_block;
3310 unsigned long flags; 3325 unsigned long flags;
3311 unsigned crtc_id; 3326 unsigned crtc_id;
3312 struct amdgpu_crtc *amdgpu_crtc; 3327 struct amdgpu_crtc *amdgpu_crtc;
@@ -3315,33 +3330,15 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
3315 crtc_id = (entry->src_id - 8) >> 1; 3330 crtc_id = (entry->src_id - 8) >> 1;
3316 amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 3331 amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
3317 3332
3318 /* ack the interrupt */ 3333 if (crtc_id >= adev->mode_info.num_crtc) {
3319 switch(crtc_id){ 3334 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3320 case AMDGPU_PAGEFLIP_IRQ_D1: 3335 return -EINVAL;
3321 reg_block = CRTC0_REGISTER_OFFSET;
3322 break;
3323 case AMDGPU_PAGEFLIP_IRQ_D2:
3324 reg_block = CRTC1_REGISTER_OFFSET;
3325 break;
3326 case AMDGPU_PAGEFLIP_IRQ_D3:
3327 reg_block = CRTC2_REGISTER_OFFSET;
3328 break;
3329 case AMDGPU_PAGEFLIP_IRQ_D4:
3330 reg_block = CRTC3_REGISTER_OFFSET;
3331 break;
3332 case AMDGPU_PAGEFLIP_IRQ_D5:
3333 reg_block = CRTC4_REGISTER_OFFSET;
3334 break;
3335 case AMDGPU_PAGEFLIP_IRQ_D6:
3336 reg_block = CRTC5_REGISTER_OFFSET;
3337 break;
3338 default:
3339 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3340 return -EINVAL;
3341 } 3336 }
3342 3337
3343 if (RREG32(mmGRPH_INTERRUPT_STATUS + reg_block) & GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK) 3338 if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
3344 WREG32(mmGRPH_INTERRUPT_STATUS + reg_block, GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK); 3339 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
3340 WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
3341 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
3345 3342
3346 /* IRQ could occur when in initial stage */ 3343 /* IRQ could occur when in initial stage */
3347 if (amdgpu_crtc == NULL) 3344 if (amdgpu_crtc == NULL)
@@ -3369,7 +3366,6 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
3369 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3366 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3370 3367
3371 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3368 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3372 amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
3373 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3369 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
3374 3370
3375 return 0; 3371 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 6411e8244671..8f1e51128b33 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -126,6 +126,13 @@ static const u32 cz_mgcg_cgcg_init[] =
126 mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000, 126 mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
127}; 127};
128 128
129static const u32 stoney_golden_settings_a11[] =
130{
131 mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000,
132 mmFBC_MISC, 0x1f311fff, 0x14302000,
133};
134
135
129static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) 136static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
130{ 137{
131 switch (adev->asic_type) { 138 switch (adev->asic_type) {
@@ -137,6 +144,11 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev)
137 cz_golden_settings_a11, 144 cz_golden_settings_a11,
138 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 145 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
139 break; 146 break;
147 case CHIP_STONEY:
148 amdgpu_program_register_sequence(adev,
149 stoney_golden_settings_a11,
150 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
151 break;
140 default: 152 default:
141 break; 153 break;
142 } 154 }
@@ -233,6 +245,24 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
233 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); 245 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
234} 246}
235 247
248static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev)
249{
250 unsigned i;
251
252 /* Enable pflip interrupts */
253 for (i = 0; i < adev->mode_info.num_crtc; i++)
254 amdgpu_irq_get(adev, &adev->pageflip_irq, i);
255}
256
257static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
258{
259 unsigned i;
260
261 /* Disable pflip interrupts */
262 for (i = 0; i < adev->mode_info.num_crtc; i++)
263 amdgpu_irq_put(adev, &adev->pageflip_irq, i);
264}
265
236/** 266/**
237 * dce_v11_0_page_flip - pageflip callback. 267 * dce_v11_0_page_flip - pageflip callback.
238 * 268 *
@@ -240,46 +270,22 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
240 * @crtc_id: crtc to cleanup pageflip on 270 * @crtc_id: crtc to cleanup pageflip on
241 * @crtc_base: new address of the crtc (GPU MC address) 271 * @crtc_base: new address of the crtc (GPU MC address)
242 * 272 *
243 * Does the actual pageflip (evergreen+). 273 * Triggers the actual pageflip by updating the primary
244 * During vblank we take the crtc lock and wait for the update_pending 274 * surface base address.
245 * bit to go high, when it does, we release the lock, and allow the
246 * double buffered update to take place.
247 * Returns the current update pending status.
248 */ 275 */
249static void dce_v11_0_page_flip(struct amdgpu_device *adev, 276static void dce_v11_0_page_flip(struct amdgpu_device *adev,
250 int crtc_id, u64 crtc_base) 277 int crtc_id, u64 crtc_base)
251{ 278{
252 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 279 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
253 u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
254 int i;
255
256 /* Lock the graphics update lock */
257 tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
258 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
259 280
260 /* update the scanout addresses */ 281 /* update the scanout addresses */
261 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
262 upper_32_bits(crtc_base));
263 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
264 lower_32_bits(crtc_base));
265
266 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, 282 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
267 upper_32_bits(crtc_base)); 283 upper_32_bits(crtc_base));
284 /* writing to the low address triggers the update */
268 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, 285 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
269 lower_32_bits(crtc_base)); 286 lower_32_bits(crtc_base));
270 287 /* post the write */
271 /* Wait for update_pending to go high. */ 288 RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
272 for (i = 0; i < adev->usec_timeout; i++) {
273 if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
274 GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
275 break;
276 udelay(1);
277 }
278 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
279
280 /* Unlock the lock, so double-buffering can take place inside vblank */
281 tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
282 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
283} 289}
284 290
285static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, 291static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1232,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
1232 u32 pixel_period; 1238 u32 pixel_period;
1233 u32 line_time = 0; 1239 u32 line_time = 0;
1234 u32 latency_watermark_a = 0, latency_watermark_b = 0; 1240 u32 latency_watermark_a = 0, latency_watermark_b = 0;
1235 u32 tmp, wm_mask; 1241 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1236 1242
1237 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1243 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1238 pixel_period = 1000000 / (u32)mode->clock; 1244 pixel_period = 1000000 / (u32)mode->clock;
@@ -1315,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
1315 (adev->mode_info.disp_priority == 2)) { 1321 (adev->mode_info.disp_priority == 2)) {
1316 DRM_DEBUG_KMS("force priority to high\n"); 1322 DRM_DEBUG_KMS("force priority to high\n");
1317 } 1323 }
1324 lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
1318 } 1325 }
1319 1326
1320 /* select wm A */ 1327 /* select wm A */
@@ -1339,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
1339 amdgpu_crtc->line_time = line_time; 1346 amdgpu_crtc->line_time = line_time;
1340 amdgpu_crtc->wm_high = latency_watermark_a; 1347 amdgpu_crtc->wm_high = latency_watermark_a;
1341 amdgpu_crtc->wm_low = latency_watermark_b; 1348 amdgpu_crtc->wm_low = latency_watermark_b;
1349 /* Save number of lines the linebuffer leads before the scanout */
1350 amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
1342} 1351}
1343 1352
1344/** 1353/**
@@ -2425,7 +2434,7 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)
2425 2434
2426 /* XXX need to determine what plls are available on each DCE11 part */ 2435 /* XXX need to determine what plls are available on each DCE11 part */
2427 pll_in_use = amdgpu_pll_get_use_mask(crtc); 2436 pll_in_use = amdgpu_pll_get_use_mask(crtc);
2428 if (adev->asic_type == CHIP_CARRIZO) { 2437 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) {
2429 if (!(pll_in_use & (1 << ATOM_PPLL1))) 2438 if (!(pll_in_use & (1 << ATOM_PPLL1)))
2430 return ATOM_PPLL1; 2439 return ATOM_PPLL1;
2431 if (!(pll_in_use & (1 << ATOM_PPLL0))) 2440 if (!(pll_in_use & (1 << ATOM_PPLL0)))
@@ -2476,26 +2485,19 @@ static void dce_v11_0_show_cursor(struct drm_crtc *crtc)
2476 struct amdgpu_device *adev = crtc->dev->dev_private; 2485 struct amdgpu_device *adev = crtc->dev->dev_private;
2477 u32 tmp; 2486 u32 tmp;
2478 2487
2488 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2489 upper_32_bits(amdgpu_crtc->cursor_addr));
2490 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2491 lower_32_bits(amdgpu_crtc->cursor_addr));
2492
2479 tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); 2493 tmp = RREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
2480 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1); 2494 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
2481 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2); 2495 tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
2482 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); 2496 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
2483} 2497}
2484 2498
2485static void dce_v11_0_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj, 2499static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
2486 uint64_t gpu_addr) 2500 int x, int y)
2487{
2488 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2489 struct amdgpu_device *adev = crtc->dev->dev_private;
2490
2491 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2492 upper_32_bits(gpu_addr));
2493 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2494 lower_32_bits(gpu_addr));
2495}
2496
2497static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
2498 int x, int y)
2499{ 2501{
2500 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2502 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2501 struct amdgpu_device *adev = crtc->dev->dev_private; 2503 struct amdgpu_device *adev = crtc->dev->dev_private;
@@ -2515,26 +2517,40 @@ static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
2515 y = 0; 2517 y = 0;
2516 } 2518 }
2517 2519
2518 dce_v11_0_lock_cursor(crtc, true);
2519 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); 2520 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
2520 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); 2521 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
2521 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, 2522 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
2522 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); 2523 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
2523 dce_v11_0_lock_cursor(crtc, false); 2524
2525 amdgpu_crtc->cursor_x = x;
2526 amdgpu_crtc->cursor_y = y;
2524 2527
2525 return 0; 2528 return 0;
2526} 2529}
2527 2530
2528static int dce_v11_0_crtc_cursor_set(struct drm_crtc *crtc, 2531static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc,
2529 struct drm_file *file_priv, 2532 int x, int y)
2530 uint32_t handle, 2533{
2531 uint32_t width, 2534 int ret;
2532 uint32_t height) 2535
2536 dce_v11_0_lock_cursor(crtc, true);
2537 ret = dce_v11_0_cursor_move_locked(crtc, x, y);
2538 dce_v11_0_lock_cursor(crtc, false);
2539
2540 return ret;
2541}
2542
2543static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
2544 struct drm_file *file_priv,
2545 uint32_t handle,
2546 uint32_t width,
2547 uint32_t height,
2548 int32_t hot_x,
2549 int32_t hot_y)
2533{ 2550{
2534 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2551 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2535 struct drm_gem_object *obj; 2552 struct drm_gem_object *obj;
2536 struct amdgpu_bo *robj; 2553 struct amdgpu_bo *aobj;
2537 uint64_t gpu_addr;
2538 int ret; 2554 int ret;
2539 2555
2540 if (!handle) { 2556 if (!handle) {
@@ -2556,41 +2572,71 @@ static int dce_v11_0_crtc_cursor_set(struct drm_crtc *crtc,
2556 return -ENOENT; 2572 return -ENOENT;
2557 } 2573 }
2558 2574
2559 robj = gem_to_amdgpu_bo(obj); 2575 aobj = gem_to_amdgpu_bo(obj);
2560 ret = amdgpu_bo_reserve(robj, false); 2576 ret = amdgpu_bo_reserve(aobj, false);
2561 if (unlikely(ret != 0)) 2577 if (ret != 0) {
2562 goto fail; 2578 drm_gem_object_unreference_unlocked(obj);
2563 ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, 2579 return ret;
2564 0, 0, &gpu_addr); 2580 }
2565 amdgpu_bo_unreserve(robj); 2581
2566 if (ret) 2582 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
2567 goto fail; 2583 amdgpu_bo_unreserve(aobj);
2584 if (ret) {
2585 DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
2586 drm_gem_object_unreference_unlocked(obj);
2587 return ret;
2588 }
2568 2589
2569 amdgpu_crtc->cursor_width = width; 2590 amdgpu_crtc->cursor_width = width;
2570 amdgpu_crtc->cursor_height = height; 2591 amdgpu_crtc->cursor_height = height;
2571 2592
2572 dce_v11_0_lock_cursor(crtc, true); 2593 dce_v11_0_lock_cursor(crtc, true);
2573 dce_v11_0_set_cursor(crtc, obj, gpu_addr); 2594
2595 if (hot_x != amdgpu_crtc->cursor_hot_x ||
2596 hot_y != amdgpu_crtc->cursor_hot_y) {
2597 int x, y;
2598
2599 x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
2600 y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
2601
2602 dce_v11_0_cursor_move_locked(crtc, x, y);
2603
2604 amdgpu_crtc->cursor_hot_x = hot_x;
2605 amdgpu_crtc->cursor_hot_y = hot_y;
2606 }
2607
2574 dce_v11_0_show_cursor(crtc); 2608 dce_v11_0_show_cursor(crtc);
2575 dce_v11_0_lock_cursor(crtc, false); 2609 dce_v11_0_lock_cursor(crtc, false);
2576 2610
2577unpin: 2611unpin:
2578 if (amdgpu_crtc->cursor_bo) { 2612 if (amdgpu_crtc->cursor_bo) {
2579 robj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2613 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2580 ret = amdgpu_bo_reserve(robj, false); 2614 ret = amdgpu_bo_reserve(aobj, false);
2581 if (likely(ret == 0)) { 2615 if (likely(ret == 0)) {
2582 amdgpu_bo_unpin(robj); 2616 amdgpu_bo_unpin(aobj);
2583 amdgpu_bo_unreserve(robj); 2617 amdgpu_bo_unreserve(aobj);
2584 } 2618 }
2585 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); 2619 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo);
2586 } 2620 }
2587 2621
2588 amdgpu_crtc->cursor_bo = obj; 2622 amdgpu_crtc->cursor_bo = obj;
2589 return 0; 2623 return 0;
2590fail: 2624}
2591 drm_gem_object_unreference_unlocked(obj);
2592 2625
2593 return ret; 2626static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
2627{
2628 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2629
2630 if (amdgpu_crtc->cursor_bo) {
2631 dce_v11_0_lock_cursor(crtc, true);
2632
2633 dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
2634 amdgpu_crtc->cursor_y);
2635
2636 dce_v11_0_show_cursor(crtc);
2637
2638 dce_v11_0_lock_cursor(crtc, false);
2639 }
2594} 2640}
2595 2641
2596static void dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, 2642static void dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
@@ -2618,7 +2664,7 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
2618} 2664}
2619 2665
2620static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { 2666static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
2621 .cursor_set = dce_v11_0_crtc_cursor_set, 2667 .cursor_set2 = dce_v11_0_crtc_cursor_set2,
2622 .cursor_move = dce_v11_0_crtc_cursor_move, 2668 .cursor_move = dce_v11_0_crtc_cursor_move,
2623 .gamma_set = dce_v11_0_crtc_gamma_set, 2669 .gamma_set = dce_v11_0_crtc_gamma_set,
2624 .set_config = amdgpu_crtc_set_config, 2670 .set_config = amdgpu_crtc_set_config,
@@ -2640,9 +2686,10 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2640 dce_v11_0_vga_enable(crtc, true); 2686 dce_v11_0_vga_enable(crtc, true);
2641 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2687 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2642 dce_v11_0_vga_enable(crtc, false); 2688 dce_v11_0_vga_enable(crtc, false);
2643 /* Make sure VBLANK interrupt is still enabled */ 2689 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2644 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2690 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
2645 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2691 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2692 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2646 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id); 2693 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
2647 dce_v11_0_crtc_load_lut(crtc); 2694 dce_v11_0_crtc_load_lut(crtc);
2648 break; 2695 break;
@@ -2751,6 +2798,7 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc,
2751 dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); 2798 dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
2752 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); 2799 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
2753 amdgpu_atombios_crtc_scaler_setup(crtc); 2800 amdgpu_atombios_crtc_scaler_setup(crtc);
2801 dce_v11_0_cursor_reset(crtc);
2754 /* update the hw version fpr dpm */ 2802 /* update the hw version fpr dpm */
2755 amdgpu_crtc->hw_mode = *adjusted_mode; 2803 amdgpu_crtc->hw_mode = *adjusted_mode;
2756 2804
@@ -2888,7 +2936,12 @@ static int dce_v11_0_early_init(void *handle)
2888 2936
2889 switch (adev->asic_type) { 2937 switch (adev->asic_type) {
2890 case CHIP_CARRIZO: 2938 case CHIP_CARRIZO:
2891 adev->mode_info.num_crtc = 4; 2939 adev->mode_info.num_crtc = 3;
2940 adev->mode_info.num_hpd = 6;
2941 adev->mode_info.num_dig = 9;
2942 break;
2943 case CHIP_STONEY:
2944 adev->mode_info.num_crtc = 2;
2892 adev->mode_info.num_hpd = 6; 2945 adev->mode_info.num_hpd = 6;
2893 adev->mode_info.num_dig = 9; 2946 adev->mode_info.num_dig = 9;
2894 break; 2947 break;
@@ -2990,6 +3043,7 @@ static int dce_v11_0_hw_init(void *handle)
2990 dce_v11_0_init_golden_registers(adev); 3043 dce_v11_0_init_golden_registers(adev);
2991 3044
2992 /* init dig PHYs, disp eng pll */ 3045 /* init dig PHYs, disp eng pll */
3046 amdgpu_atombios_crtc_powergate_init(adev);
2993 amdgpu_atombios_encoder_init_dig(adev); 3047 amdgpu_atombios_encoder_init_dig(adev);
2994 amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); 3048 amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
2995 3049
@@ -3000,6 +3054,8 @@ static int dce_v11_0_hw_init(void *handle)
3000 dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 3054 dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
3001 } 3055 }
3002 3056
3057 dce_v11_0_pageflip_interrupt_init(adev);
3058
3003 return 0; 3059 return 0;
3004} 3060}
3005 3061
@@ -3014,6 +3070,8 @@ static int dce_v11_0_hw_fini(void *handle)
3014 dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 3070 dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
3015 } 3071 }
3016 3072
3073 dce_v11_0_pageflip_interrupt_fini(adev);
3074
3017 return 0; 3075 return 0;
3018} 3076}
3019 3077
@@ -3023,23 +3081,18 @@ static int dce_v11_0_suspend(void *handle)
3023 3081
3024 amdgpu_atombios_scratch_regs_save(adev); 3082 amdgpu_atombios_scratch_regs_save(adev);
3025 3083
3026 dce_v11_0_hpd_fini(adev); 3084 return dce_v11_0_hw_fini(handle);
3027
3028 return 0;
3029} 3085}
3030 3086
3031static int dce_v11_0_resume(void *handle) 3087static int dce_v11_0_resume(void *handle)
3032{ 3088{
3033 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3089 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3090 int ret;
3034 3091
3035 dce_v11_0_init_golden_registers(adev); 3092 ret = dce_v11_0_hw_init(handle);
3036 3093
3037 amdgpu_atombios_scratch_regs_restore(adev); 3094 amdgpu_atombios_scratch_regs_restore(adev);
3038 3095
3039 /* init dig PHYs, disp eng pll */
3040 amdgpu_atombios_crtc_powergate_init(adev);
3041 amdgpu_atombios_encoder_init_dig(adev);
3042 amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
3043 /* turn on the BL */ 3096 /* turn on the BL */
3044 if (adev->mode_info.bl_encoder) { 3097 if (adev->mode_info.bl_encoder) {
3045 u8 bl_level = amdgpu_display_backlight_get_level(adev, 3098 u8 bl_level = amdgpu_display_backlight_get_level(adev,
@@ -3048,10 +3101,7 @@ static int dce_v11_0_resume(void *handle)
3048 bl_level); 3101 bl_level);
3049 } 3102 }
3050 3103
3051 /* initialize hpd */ 3104 return ret;
3052 dce_v11_0_hpd_init(adev);
3053
3054 return 0;
3055} 3105}
3056 3106
3057static bool dce_v11_0_is_idle(void *handle) 3107static bool dce_v11_0_is_idle(void *handle)
@@ -3243,37 +3293,20 @@ static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev,
3243 unsigned type, 3293 unsigned type,
3244 enum amdgpu_interrupt_state state) 3294 enum amdgpu_interrupt_state state)
3245{ 3295{
3246 u32 reg, reg_block; 3296 u32 reg;
3247 /* now deal with page flip IRQ */ 3297
3248 switch (type) { 3298 if (type >= adev->mode_info.num_crtc) {
3249 case AMDGPU_PAGEFLIP_IRQ_D1: 3299 DRM_ERROR("invalid pageflip crtc %d\n", type);
3250 reg_block = CRTC0_REGISTER_OFFSET; 3300 return -EINVAL;
3251 break;
3252 case AMDGPU_PAGEFLIP_IRQ_D2:
3253 reg_block = CRTC1_REGISTER_OFFSET;
3254 break;
3255 case AMDGPU_PAGEFLIP_IRQ_D3:
3256 reg_block = CRTC2_REGISTER_OFFSET;
3257 break;
3258 case AMDGPU_PAGEFLIP_IRQ_D4:
3259 reg_block = CRTC3_REGISTER_OFFSET;
3260 break;
3261 case AMDGPU_PAGEFLIP_IRQ_D5:
3262 reg_block = CRTC4_REGISTER_OFFSET;
3263 break;
3264 case AMDGPU_PAGEFLIP_IRQ_D6:
3265 reg_block = CRTC5_REGISTER_OFFSET;
3266 break;
3267 default:
3268 DRM_ERROR("invalid pageflip crtc %d\n", type);
3269 return -EINVAL;
3270 } 3301 }
3271 3302
3272 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + reg_block); 3303 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
3273 if (state == AMDGPU_IRQ_STATE_DISABLE) 3304 if (state == AMDGPU_IRQ_STATE_DISABLE)
3274 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3305 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3306 reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3275 else 3307 else
3276 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3308 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3309 reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3277 3310
3278 return 0; 3311 return 0;
3279} 3312}
@@ -3282,7 +3315,6 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
3282 struct amdgpu_irq_src *source, 3315 struct amdgpu_irq_src *source,
3283 struct amdgpu_iv_entry *entry) 3316 struct amdgpu_iv_entry *entry)
3284{ 3317{
3285 int reg_block;
3286 unsigned long flags; 3318 unsigned long flags;
3287 unsigned crtc_id; 3319 unsigned crtc_id;
3288 struct amdgpu_crtc *amdgpu_crtc; 3320 struct amdgpu_crtc *amdgpu_crtc;
@@ -3291,33 +3323,15 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
3291 crtc_id = (entry->src_id - 8) >> 1; 3323 crtc_id = (entry->src_id - 8) >> 1;
3292 amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 3324 amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
3293 3325
3294 /* ack the interrupt */ 3326 if (crtc_id >= adev->mode_info.num_crtc) {
3295 switch(crtc_id){ 3327 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3296 case AMDGPU_PAGEFLIP_IRQ_D1: 3328 return -EINVAL;
3297 reg_block = CRTC0_REGISTER_OFFSET;
3298 break;
3299 case AMDGPU_PAGEFLIP_IRQ_D2:
3300 reg_block = CRTC1_REGISTER_OFFSET;
3301 break;
3302 case AMDGPU_PAGEFLIP_IRQ_D3:
3303 reg_block = CRTC2_REGISTER_OFFSET;
3304 break;
3305 case AMDGPU_PAGEFLIP_IRQ_D4:
3306 reg_block = CRTC3_REGISTER_OFFSET;
3307 break;
3308 case AMDGPU_PAGEFLIP_IRQ_D5:
3309 reg_block = CRTC4_REGISTER_OFFSET;
3310 break;
3311 case AMDGPU_PAGEFLIP_IRQ_D6:
3312 reg_block = CRTC5_REGISTER_OFFSET;
3313 break;
3314 default:
3315 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3316 return -EINVAL;
3317 } 3329 }
3318 3330
3319 if (RREG32(mmGRPH_INTERRUPT_STATUS + reg_block) & GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK) 3331 if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
3320 WREG32(mmGRPH_INTERRUPT_STATUS + reg_block, GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK); 3332 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
3333 WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
3334 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
3321 3335
3322 /* IRQ could occur when in initial stage */ 3336 /* IRQ could occur when in initial stage */
3323 if(amdgpu_crtc == NULL) 3337 if(amdgpu_crtc == NULL)
@@ -3345,7 +3359,6 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
3345 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3359 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3346 3360
3347 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3361 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3348 amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
3349 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3362 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
3350 3363
3351 return 0; 3364 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index c86911c2ea2a..42d954dc436d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -204,6 +204,24 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
204 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); 204 return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
205} 205}
206 206
207static void dce_v8_0_pageflip_interrupt_init(struct amdgpu_device *adev)
208{
209 unsigned i;
210
211 /* Enable pflip interrupts */
212 for (i = 0; i < adev->mode_info.num_crtc; i++)
213 amdgpu_irq_get(adev, &adev->pageflip_irq, i);
214}
215
216static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
217{
218 unsigned i;
219
220 /* Disable pflip interrupts */
221 for (i = 0; i < adev->mode_info.num_crtc; i++)
222 amdgpu_irq_put(adev, &adev->pageflip_irq, i);
223}
224
207/** 225/**
208 * dce_v8_0_page_flip - pageflip callback. 226 * dce_v8_0_page_flip - pageflip callback.
209 * 227 *
@@ -211,46 +229,22 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
211 * @crtc_id: crtc to cleanup pageflip on 229 * @crtc_id: crtc to cleanup pageflip on
212 * @crtc_base: new address of the crtc (GPU MC address) 230 * @crtc_base: new address of the crtc (GPU MC address)
213 * 231 *
214 * Does the actual pageflip (evergreen+). 232 * Triggers the actual pageflip by updating the primary
215 * During vblank we take the crtc lock and wait for the update_pending 233 * surface base address.
216 * bit to go high, when it does, we release the lock, and allow the
217 * double buffered update to take place.
218 * Returns the current update pending status.
219 */ 234 */
220static void dce_v8_0_page_flip(struct amdgpu_device *adev, 235static void dce_v8_0_page_flip(struct amdgpu_device *adev,
221 int crtc_id, u64 crtc_base) 236 int crtc_id, u64 crtc_base)
222{ 237{
223 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 238 struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
224 u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
225 int i;
226
227 /* Lock the graphics update lock */
228 tmp |= GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
229 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
230
231 /* update the scanout addresses */
232 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
233 upper_32_bits(crtc_base));
234 WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
235 (u32)crtc_base);
236 239
240 /* update the primary scanout addresses */
237 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, 241 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
238 upper_32_bits(crtc_base)); 242 upper_32_bits(crtc_base));
243 /* writing to the low address triggers the update */
239 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, 244 WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
240 (u32)crtc_base); 245 lower_32_bits(crtc_base));
241 246 /* post the write */
242 /* Wait for update_pending to go high. */ 247 RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
243 for (i = 0; i < adev->usec_timeout; i++) {
244 if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
245 GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
246 break;
247 udelay(1);
248 }
249 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
250
251 /* Unlock the lock, so double-buffering can take place inside vblank */
252 tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
253 WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
254} 248}
255 249
256static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, 250static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
@@ -1199,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
1199 u32 pixel_period; 1193 u32 pixel_period;
1200 u32 line_time = 0; 1194 u32 line_time = 0;
1201 u32 latency_watermark_a = 0, latency_watermark_b = 0; 1195 u32 latency_watermark_a = 0, latency_watermark_b = 0;
1202 u32 tmp, wm_mask; 1196 u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
1203 1197
1204 if (amdgpu_crtc->base.enabled && num_heads && mode) { 1198 if (amdgpu_crtc->base.enabled && num_heads && mode) {
1205 pixel_period = 1000000 / (u32)mode->clock; 1199 pixel_period = 1000000 / (u32)mode->clock;
@@ -1282,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
1282 (adev->mode_info.disp_priority == 2)) { 1276 (adev->mode_info.disp_priority == 2)) {
1283 DRM_DEBUG_KMS("force priority to high\n"); 1277 DRM_DEBUG_KMS("force priority to high\n");
1284 } 1278 }
1279 lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
1285 } 1280 }
1286 1281
1287 /* select wm A */ 1282 /* select wm A */
@@ -1308,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
1308 amdgpu_crtc->line_time = line_time; 1303 amdgpu_crtc->line_time = line_time;
1309 amdgpu_crtc->wm_high = latency_watermark_a; 1304 amdgpu_crtc->wm_high = latency_watermark_a;
1310 amdgpu_crtc->wm_low = latency_watermark_b; 1305 amdgpu_crtc->wm_low = latency_watermark_b;
1306 /* Save number of lines the linebuffer leads before the scanout */
1307 amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
1311} 1308}
1312 1309
1313/** 1310/**
@@ -2411,26 +2408,19 @@ static void dce_v8_0_show_cursor(struct drm_crtc *crtc)
2411 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2408 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2412 struct amdgpu_device *adev = crtc->dev->dev_private; 2409 struct amdgpu_device *adev = crtc->dev->dev_private;
2413 2410
2411 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2412 upper_32_bits(amdgpu_crtc->cursor_addr));
2413 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2414 lower_32_bits(amdgpu_crtc->cursor_addr));
2415
2414 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, 2416 WREG32_IDX(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
2415 CUR_CONTROL__CURSOR_EN_MASK | 2417 CUR_CONTROL__CURSOR_EN_MASK |
2416 (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | 2418 (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
2417 (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); 2419 (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
2418} 2420}
2419 2421
2420static void dce_v8_0_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj, 2422static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,
2421 uint64_t gpu_addr) 2423 int x, int y)
2422{
2423 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2424 struct amdgpu_device *adev = crtc->dev->dev_private;
2425
2426 WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
2427 upper_32_bits(gpu_addr));
2428 WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
2429 gpu_addr & 0xffffffff);
2430}
2431
2432static int dce_v8_0_crtc_cursor_move(struct drm_crtc *crtc,
2433 int x, int y)
2434{ 2424{
2435 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2425 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2436 struct amdgpu_device *adev = crtc->dev->dev_private; 2426 struct amdgpu_device *adev = crtc->dev->dev_private;
@@ -2450,26 +2440,40 @@ static int dce_v8_0_crtc_cursor_move(struct drm_crtc *crtc,
2450 y = 0; 2440 y = 0;
2451 } 2441 }
2452 2442
2453 dce_v8_0_lock_cursor(crtc, true);
2454 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); 2443 WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
2455 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); 2444 WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
2456 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, 2445 WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
2457 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); 2446 ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
2458 dce_v8_0_lock_cursor(crtc, false); 2447
2448 amdgpu_crtc->cursor_x = x;
2449 amdgpu_crtc->cursor_y = y;
2459 2450
2460 return 0; 2451 return 0;
2461} 2452}
2462 2453
2463static int dce_v8_0_crtc_cursor_set(struct drm_crtc *crtc, 2454static int dce_v8_0_crtc_cursor_move(struct drm_crtc *crtc,
2464 struct drm_file *file_priv, 2455 int x, int y)
2465 uint32_t handle, 2456{
2466 uint32_t width, 2457 int ret;
2467 uint32_t height) 2458
2459 dce_v8_0_lock_cursor(crtc, true);
2460 ret = dce_v8_0_cursor_move_locked(crtc, x, y);
2461 dce_v8_0_lock_cursor(crtc, false);
2462
2463 return ret;
2464}
2465
2466static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
2467 struct drm_file *file_priv,
2468 uint32_t handle,
2469 uint32_t width,
2470 uint32_t height,
2471 int32_t hot_x,
2472 int32_t hot_y)
2468{ 2473{
2469 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2474 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2470 struct drm_gem_object *obj; 2475 struct drm_gem_object *obj;
2471 struct amdgpu_bo *robj; 2476 struct amdgpu_bo *aobj;
2472 uint64_t gpu_addr;
2473 int ret; 2477 int ret;
2474 2478
2475 if (!handle) { 2479 if (!handle) {
@@ -2491,41 +2495,71 @@ static int dce_v8_0_crtc_cursor_set(struct drm_crtc *crtc,
2491 return -ENOENT; 2495 return -ENOENT;
2492 } 2496 }
2493 2497
2494 robj = gem_to_amdgpu_bo(obj); 2498 aobj = gem_to_amdgpu_bo(obj);
2495 ret = amdgpu_bo_reserve(robj, false); 2499 ret = amdgpu_bo_reserve(aobj, false);
2496 if (unlikely(ret != 0)) 2500 if (ret != 0) {
2497 goto fail; 2501 drm_gem_object_unreference_unlocked(obj);
2498 ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, 2502 return ret;
2499 0, 0, &gpu_addr); 2503 }
2500 amdgpu_bo_unreserve(robj); 2504
2501 if (ret) 2505 ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
2502 goto fail; 2506 amdgpu_bo_unreserve(aobj);
2507 if (ret) {
2508 DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
2509 drm_gem_object_unreference_unlocked(obj);
2510 return ret;
2511 }
2503 2512
2504 amdgpu_crtc->cursor_width = width; 2513 amdgpu_crtc->cursor_width = width;
2505 amdgpu_crtc->cursor_height = height; 2514 amdgpu_crtc->cursor_height = height;
2506 2515
2507 dce_v8_0_lock_cursor(crtc, true); 2516 dce_v8_0_lock_cursor(crtc, true);
2508 dce_v8_0_set_cursor(crtc, obj, gpu_addr); 2517
2518 if (hot_x != amdgpu_crtc->cursor_hot_x ||
2519 hot_y != amdgpu_crtc->cursor_hot_y) {
2520 int x, y;
2521
2522 x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
2523 y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
2524
2525 dce_v8_0_cursor_move_locked(crtc, x, y);
2526
2527 amdgpu_crtc->cursor_hot_x = hot_x;
2528 amdgpu_crtc->cursor_hot_y = hot_y;
2529 }
2530
2509 dce_v8_0_show_cursor(crtc); 2531 dce_v8_0_show_cursor(crtc);
2510 dce_v8_0_lock_cursor(crtc, false); 2532 dce_v8_0_lock_cursor(crtc, false);
2511 2533
2512unpin: 2534unpin:
2513 if (amdgpu_crtc->cursor_bo) { 2535 if (amdgpu_crtc->cursor_bo) {
2514 robj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2536 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2515 ret = amdgpu_bo_reserve(robj, false); 2537 ret = amdgpu_bo_reserve(aobj, false);
2516 if (likely(ret == 0)) { 2538 if (likely(ret == 0)) {
2517 amdgpu_bo_unpin(robj); 2539 amdgpu_bo_unpin(aobj);
2518 amdgpu_bo_unreserve(robj); 2540 amdgpu_bo_unreserve(aobj);
2519 } 2541 }
2520 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); 2542 drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo);
2521 } 2543 }
2522 2544
2523 amdgpu_crtc->cursor_bo = obj; 2545 amdgpu_crtc->cursor_bo = obj;
2524 return 0; 2546 return 0;
2525fail: 2547}
2526 drm_gem_object_unreference_unlocked(obj);
2527 2548
2528 return ret; 2549static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
2550{
2551 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2552
2553 if (amdgpu_crtc->cursor_bo) {
2554 dce_v8_0_lock_cursor(crtc, true);
2555
2556 dce_v8_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
2557 amdgpu_crtc->cursor_y);
2558
2559 dce_v8_0_show_cursor(crtc);
2560
2561 dce_v8_0_lock_cursor(crtc, false);
2562 }
2529} 2563}
2530 2564
2531static void dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, 2565static void dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
@@ -2553,7 +2587,7 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
2553} 2587}
2554 2588
2555static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = { 2589static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
2556 .cursor_set = dce_v8_0_crtc_cursor_set, 2590 .cursor_set2 = dce_v8_0_crtc_cursor_set2,
2557 .cursor_move = dce_v8_0_crtc_cursor_move, 2591 .cursor_move = dce_v8_0_crtc_cursor_move,
2558 .gamma_set = dce_v8_0_crtc_gamma_set, 2592 .gamma_set = dce_v8_0_crtc_gamma_set,
2559 .set_config = amdgpu_crtc_set_config, 2593 .set_config = amdgpu_crtc_set_config,
@@ -2575,9 +2609,10 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
2575 dce_v8_0_vga_enable(crtc, true); 2609 dce_v8_0_vga_enable(crtc, true);
2576 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); 2610 amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
2577 dce_v8_0_vga_enable(crtc, false); 2611 dce_v8_0_vga_enable(crtc, false);
2578 /* Make sure VBLANK interrupt is still enabled */ 2612 /* Make sure VBLANK and PFLIP interrupts are still enabled */
2579 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); 2613 type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
2580 amdgpu_irq_update(adev, &adev->crtc_irq, type); 2614 amdgpu_irq_update(adev, &adev->crtc_irq, type);
2615 amdgpu_irq_update(adev, &adev->pageflip_irq, type);
2581 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id); 2616 drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
2582 dce_v8_0_crtc_load_lut(crtc); 2617 dce_v8_0_crtc_load_lut(crtc);
2583 break; 2618 break;
@@ -2693,6 +2728,7 @@ static int dce_v8_0_crtc_mode_set(struct drm_crtc *crtc,
2693 dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0); 2728 dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
2694 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); 2729 amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
2695 amdgpu_atombios_crtc_scaler_setup(crtc); 2730 amdgpu_atombios_crtc_scaler_setup(crtc);
2731 dce_v8_0_cursor_reset(crtc);
2696 /* update the hw version fpr dpm */ 2732 /* update the hw version fpr dpm */
2697 amdgpu_crtc->hw_mode = *adjusted_mode; 2733 amdgpu_crtc->hw_mode = *adjusted_mode;
2698 2734
@@ -2933,6 +2969,8 @@ static int dce_v8_0_hw_init(void *handle)
2933 dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 2969 dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
2934 } 2970 }
2935 2971
2972 dce_v8_0_pageflip_interrupt_init(adev);
2973
2936 return 0; 2974 return 0;
2937} 2975}
2938 2976
@@ -2947,6 +2985,8 @@ static int dce_v8_0_hw_fini(void *handle)
2947 dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); 2985 dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
2948 } 2986 }
2949 2987
2988 dce_v8_0_pageflip_interrupt_fini(adev);
2989
2950 return 0; 2990 return 0;
2951} 2991}
2952 2992
@@ -2956,20 +2996,18 @@ static int dce_v8_0_suspend(void *handle)
2956 2996
2957 amdgpu_atombios_scratch_regs_save(adev); 2997 amdgpu_atombios_scratch_regs_save(adev);
2958 2998
2959 dce_v8_0_hpd_fini(adev); 2999 return dce_v8_0_hw_fini(handle);
2960
2961 return 0;
2962} 3000}
2963 3001
2964static int dce_v8_0_resume(void *handle) 3002static int dce_v8_0_resume(void *handle)
2965{ 3003{
2966 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3004 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3005 int ret;
3006
3007 ret = dce_v8_0_hw_init(handle);
2967 3008
2968 amdgpu_atombios_scratch_regs_restore(adev); 3009 amdgpu_atombios_scratch_regs_restore(adev);
2969 3010
2970 /* init dig PHYs, disp eng pll */
2971 amdgpu_atombios_encoder_init_dig(adev);
2972 amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
2973 /* turn on the BL */ 3011 /* turn on the BL */
2974 if (adev->mode_info.bl_encoder) { 3012 if (adev->mode_info.bl_encoder) {
2975 u8 bl_level = amdgpu_display_backlight_get_level(adev, 3013 u8 bl_level = amdgpu_display_backlight_get_level(adev,
@@ -2978,10 +3016,7 @@ static int dce_v8_0_resume(void *handle)
2978 bl_level); 3016 bl_level);
2979 } 3017 }
2980 3018
2981 /* initialize hpd */ 3019 return ret;
2982 dce_v8_0_hpd_init(adev);
2983
2984 return 0;
2985} 3020}
2986 3021
2987static bool dce_v8_0_is_idle(void *handle) 3022static bool dce_v8_0_is_idle(void *handle)
@@ -3274,37 +3309,20 @@ static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
3274 unsigned type, 3309 unsigned type,
3275 enum amdgpu_interrupt_state state) 3310 enum amdgpu_interrupt_state state)
3276{ 3311{
3277 u32 reg, reg_block; 3312 u32 reg;
3278 /* now deal with page flip IRQ */ 3313
3279 switch (type) { 3314 if (type >= adev->mode_info.num_crtc) {
3280 case AMDGPU_PAGEFLIP_IRQ_D1: 3315 DRM_ERROR("invalid pageflip crtc %d\n", type);
3281 reg_block = CRTC0_REGISTER_OFFSET; 3316 return -EINVAL;
3282 break;
3283 case AMDGPU_PAGEFLIP_IRQ_D2:
3284 reg_block = CRTC1_REGISTER_OFFSET;
3285 break;
3286 case AMDGPU_PAGEFLIP_IRQ_D3:
3287 reg_block = CRTC2_REGISTER_OFFSET;
3288 break;
3289 case AMDGPU_PAGEFLIP_IRQ_D4:
3290 reg_block = CRTC3_REGISTER_OFFSET;
3291 break;
3292 case AMDGPU_PAGEFLIP_IRQ_D5:
3293 reg_block = CRTC4_REGISTER_OFFSET;
3294 break;
3295 case AMDGPU_PAGEFLIP_IRQ_D6:
3296 reg_block = CRTC5_REGISTER_OFFSET;
3297 break;
3298 default:
3299 DRM_ERROR("invalid pageflip crtc %d\n", type);
3300 return -EINVAL;
3301 } 3317 }
3302 3318
3303 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + reg_block); 3319 reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
3304 if (state == AMDGPU_IRQ_STATE_DISABLE) 3320 if (state == AMDGPU_IRQ_STATE_DISABLE)
3305 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3321 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3322 reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3306 else 3323 else
3307 WREG32(mmGRPH_INTERRUPT_CONTROL + reg_block, reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); 3324 WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
3325 reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
3308 3326
3309 return 0; 3327 return 0;
3310} 3328}
@@ -3313,7 +3331,6 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
3313 struct amdgpu_irq_src *source, 3331 struct amdgpu_irq_src *source,
3314 struct amdgpu_iv_entry *entry) 3332 struct amdgpu_iv_entry *entry)
3315{ 3333{
3316 int reg_block;
3317 unsigned long flags; 3334 unsigned long flags;
3318 unsigned crtc_id; 3335 unsigned crtc_id;
3319 struct amdgpu_crtc *amdgpu_crtc; 3336 struct amdgpu_crtc *amdgpu_crtc;
@@ -3322,33 +3339,15 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
3322 crtc_id = (entry->src_id - 8) >> 1; 3339 crtc_id = (entry->src_id - 8) >> 1;
3323 amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; 3340 amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
3324 3341
3325 /* ack the interrupt */ 3342 if (crtc_id >= adev->mode_info.num_crtc) {
3326 switch(crtc_id){ 3343 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3327 case AMDGPU_PAGEFLIP_IRQ_D1: 3344 return -EINVAL;
3328 reg_block = CRTC0_REGISTER_OFFSET;
3329 break;
3330 case AMDGPU_PAGEFLIP_IRQ_D2:
3331 reg_block = CRTC1_REGISTER_OFFSET;
3332 break;
3333 case AMDGPU_PAGEFLIP_IRQ_D3:
3334 reg_block = CRTC2_REGISTER_OFFSET;
3335 break;
3336 case AMDGPU_PAGEFLIP_IRQ_D4:
3337 reg_block = CRTC3_REGISTER_OFFSET;
3338 break;
3339 case AMDGPU_PAGEFLIP_IRQ_D5:
3340 reg_block = CRTC4_REGISTER_OFFSET;
3341 break;
3342 case AMDGPU_PAGEFLIP_IRQ_D6:
3343 reg_block = CRTC5_REGISTER_OFFSET;
3344 break;
3345 default:
3346 DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
3347 return -EINVAL;
3348 } 3345 }
3349 3346
3350 if (RREG32(mmGRPH_INTERRUPT_STATUS + reg_block) & GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK) 3347 if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
3351 WREG32(mmGRPH_INTERRUPT_STATUS + reg_block, GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK); 3348 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
3349 WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
3350 GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
3352 3351
3353 /* IRQ could occur when in initial stage */ 3352 /* IRQ could occur when in initial stage */
3354 if (amdgpu_crtc == NULL) 3353 if (amdgpu_crtc == NULL)
@@ -3376,7 +3375,6 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
3376 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 3375 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
3377 3376
3378 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); 3377 drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
3379 amdgpu_irq_put(adev, &adev->pageflip_irq, crtc_id);
3380 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); 3378 queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
3381 3379
3382 return 0; 3380 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e992bf2ff66c..72793f93e2fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5542,24 +5542,6 @@ const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
5542 .set_powergating_state = gfx_v7_0_set_powergating_state, 5542 .set_powergating_state = gfx_v7_0_set_powergating_state,
5543}; 5543};
5544 5544
5545/**
5546 * gfx_v7_0_ring_is_lockup - check if the 3D engine is locked up
5547 *
5548 * @adev: amdgpu_device pointer
5549 * @ring: amdgpu_ring structure holding ring information
5550 *
5551 * Check if the 3D engine is locked up (CIK).
5552 * Returns true if the engine is locked, false if not.
5553 */
5554static bool gfx_v7_0_ring_is_lockup(struct amdgpu_ring *ring)
5555{
5556 if (gfx_v7_0_is_idle(ring->adev)) {
5557 amdgpu_ring_lockup_update(ring);
5558 return false;
5559 }
5560 return amdgpu_ring_test_lockup(ring);
5561}
5562
5563static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { 5545static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5564 .get_rptr = gfx_v7_0_ring_get_rptr_gfx, 5546 .get_rptr = gfx_v7_0_ring_get_rptr_gfx,
5565 .get_wptr = gfx_v7_0_ring_get_wptr_gfx, 5547 .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
@@ -5573,7 +5555,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5573 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5555 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5574 .test_ring = gfx_v7_0_ring_test_ring, 5556 .test_ring = gfx_v7_0_ring_test_ring,
5575 .test_ib = gfx_v7_0_ring_test_ib, 5557 .test_ib = gfx_v7_0_ring_test_ib,
5576 .is_lockup = gfx_v7_0_ring_is_lockup,
5577 .insert_nop = amdgpu_ring_insert_nop, 5558 .insert_nop = amdgpu_ring_insert_nop,
5578}; 5559};
5579 5560
@@ -5590,7 +5571,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5590 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, 5571 .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
5591 .test_ring = gfx_v7_0_ring_test_ring, 5572 .test_ring = gfx_v7_0_ring_test_ring,
5592 .test_ib = gfx_v7_0_ring_test_ib, 5573 .test_ib = gfx_v7_0_ring_test_ib,
5593 .is_lockup = gfx_v7_0_ring_is_lockup,
5594 .insert_nop = amdgpu_ring_insert_nop, 5574 .insert_nop = amdgpu_ring_insert_nop,
5595}; 5575};
5596 5576
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cb4f68f53f24..e1dcab98e249 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -73,6 +73,12 @@ MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 73MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 74MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
75 75
76MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
81
76MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 82MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
77MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 83MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
78MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 84MODULE_FIRMWARE("amdgpu/tonga_me.bin");
@@ -229,11 +235,13 @@ static const u32 fiji_golden_common_all[] =
229 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 235 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
230 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 236 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
231 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 237 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
232 mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003, 238 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
233 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 239 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
234 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 240 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
235 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 241 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
236 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 242 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
243 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
244 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
237}; 245};
238 246
239static const u32 golden_settings_fiji_a10[] = 247static const u32 golden_settings_fiji_a10[] =
@@ -241,18 +249,19 @@ static const u32 golden_settings_fiji_a10[] =
241 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 249 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
242 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 250 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
243 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 251 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
244 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x00000100,
245 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 252 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
253 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
254 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
246 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 255 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
247 mmTCC_CTRL, 0x00100000, 0xf30fff7f, 256 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
257 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
248 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 258 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
249 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x7d6cf5e4, 259 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
250 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x3928b1a0,
251}; 260};
252 261
253static const u32 fiji_mgcg_cgcg_init[] = 262static const u32 fiji_mgcg_cgcg_init[] =
254{ 263{
255 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffc0, 264 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
256 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 265 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 266 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 267 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -493,6 +502,42 @@ static const u32 cz_mgcg_cgcg_init[] =
493 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 502 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
494}; 503};
495 504
505static const u32 stoney_golden_settings_a11[] =
506{
507 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
508 mmGB_GPU_ID, 0x0000000f, 0x00000000,
509 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
510 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
511 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
512 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
513 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
517};
518
519static const u32 stoney_golden_common_all[] =
520{
521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
529};
530
531static const u32 stoney_mgcg_cgcg_init[] =
532{
533 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
535 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
536 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
537 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
538 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
539};
540
496static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 541static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
497static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 542static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
498static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 543static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
@@ -545,6 +590,17 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
545 cz_golden_common_all, 590 cz_golden_common_all,
546 (const u32)ARRAY_SIZE(cz_golden_common_all)); 591 (const u32)ARRAY_SIZE(cz_golden_common_all));
547 break; 592 break;
593 case CHIP_STONEY:
594 amdgpu_program_register_sequence(adev,
595 stoney_mgcg_cgcg_init,
596 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
597 amdgpu_program_register_sequence(adev,
598 stoney_golden_settings_a11,
599 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
600 amdgpu_program_register_sequence(adev,
601 stoney_golden_common_all,
602 (const u32)ARRAY_SIZE(stoney_golden_common_all));
603 break;
548 default: 604 default:
549 break; 605 break;
550 } 606 }
@@ -691,6 +747,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
691 case CHIP_FIJI: 747 case CHIP_FIJI:
692 chip_name = "fiji"; 748 chip_name = "fiji";
693 break; 749 break;
750 case CHIP_STONEY:
751 chip_name = "stoney";
752 break;
694 default: 753 default:
695 BUG(); 754 BUG();
696 } 755 }
@@ -748,21 +807,23 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
748 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 807 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
749 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 808 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
750 809
751 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 810 if (adev->asic_type != CHIP_STONEY) {
752 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 811 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
753 if (!err) { 812 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
754 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 813 if (!err) {
755 if (err) 814 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
756 goto out; 815 if (err)
757 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 816 goto out;
758 adev->gfx.mec2_fw->data; 817 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
759 adev->gfx.mec2_fw_version = le32_to_cpu( 818 adev->gfx.mec2_fw->data;
760 cp_hdr->header.ucode_version); 819 adev->gfx.mec2_fw_version =
761 adev->gfx.mec2_feature_version = le32_to_cpu( 820 le32_to_cpu(cp_hdr->header.ucode_version);
762 cp_hdr->ucode_feature_version); 821 adev->gfx.mec2_feature_version =
763 } else { 822 le32_to_cpu(cp_hdr->ucode_feature_version);
764 err = 0; 823 } else {
765 adev->gfx.mec2_fw = NULL; 824 err = 0;
825 adev->gfx.mec2_fw = NULL;
826 }
766 } 827 }
767 828
768 if (adev->firmware.smu_load) { 829 if (adev->firmware.smu_load) {
@@ -903,6 +964,232 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
903 return 0; 964 return 0;
904} 965}
905 966
967static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
968{
969 u32 gb_addr_config;
970 u32 mc_shared_chmap, mc_arb_ramcfg;
971 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
972 u32 tmp;
973
974 switch (adev->asic_type) {
975 case CHIP_TOPAZ:
976 adev->gfx.config.max_shader_engines = 1;
977 adev->gfx.config.max_tile_pipes = 2;
978 adev->gfx.config.max_cu_per_sh = 6;
979 adev->gfx.config.max_sh_per_se = 1;
980 adev->gfx.config.max_backends_per_se = 2;
981 adev->gfx.config.max_texture_channel_caches = 2;
982 adev->gfx.config.max_gprs = 256;
983 adev->gfx.config.max_gs_threads = 32;
984 adev->gfx.config.max_hw_contexts = 8;
985
986 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
987 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
988 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
989 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
990 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
991 break;
992 case CHIP_FIJI:
993 adev->gfx.config.max_shader_engines = 4;
994 adev->gfx.config.max_tile_pipes = 16;
995 adev->gfx.config.max_cu_per_sh = 16;
996 adev->gfx.config.max_sh_per_se = 1;
997 adev->gfx.config.max_backends_per_se = 4;
998 adev->gfx.config.max_texture_channel_caches = 16;
999 adev->gfx.config.max_gprs = 256;
1000 adev->gfx.config.max_gs_threads = 32;
1001 adev->gfx.config.max_hw_contexts = 8;
1002
1003 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1004 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1005 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1006 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1007 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1008 break;
1009 case CHIP_TONGA:
1010 adev->gfx.config.max_shader_engines = 4;
1011 adev->gfx.config.max_tile_pipes = 8;
1012 adev->gfx.config.max_cu_per_sh = 8;
1013 adev->gfx.config.max_sh_per_se = 1;
1014 adev->gfx.config.max_backends_per_se = 2;
1015 adev->gfx.config.max_texture_channel_caches = 8;
1016 adev->gfx.config.max_gprs = 256;
1017 adev->gfx.config.max_gs_threads = 32;
1018 adev->gfx.config.max_hw_contexts = 8;
1019
1020 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1021 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1022 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1023 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1024 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1025 break;
1026 case CHIP_CARRIZO:
1027 adev->gfx.config.max_shader_engines = 1;
1028 adev->gfx.config.max_tile_pipes = 2;
1029 adev->gfx.config.max_sh_per_se = 1;
1030 adev->gfx.config.max_backends_per_se = 2;
1031
1032 switch (adev->pdev->revision) {
1033 case 0xc4:
1034 case 0x84:
1035 case 0xc8:
1036 case 0xcc:
1037 case 0xe1:
1038 case 0xe3:
1039 /* B10 */
1040 adev->gfx.config.max_cu_per_sh = 8;
1041 break;
1042 case 0xc5:
1043 case 0x81:
1044 case 0x85:
1045 case 0xc9:
1046 case 0xcd:
1047 case 0xe2:
1048 case 0xe4:
1049 /* B8 */
1050 adev->gfx.config.max_cu_per_sh = 6;
1051 break;
1052 case 0xc6:
1053 case 0xca:
1054 case 0xce:
1055 case 0x88:
1056 /* B6 */
1057 adev->gfx.config.max_cu_per_sh = 6;
1058 break;
1059 case 0xc7:
1060 case 0x87:
1061 case 0xcb:
1062 case 0xe5:
1063 case 0x89:
1064 default:
1065 /* B4 */
1066 adev->gfx.config.max_cu_per_sh = 4;
1067 break;
1068 }
1069
1070 adev->gfx.config.max_texture_channel_caches = 2;
1071 adev->gfx.config.max_gprs = 256;
1072 adev->gfx.config.max_gs_threads = 32;
1073 adev->gfx.config.max_hw_contexts = 8;
1074
1075 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1076 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1077 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1078 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1079 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1080 break;
1081 case CHIP_STONEY:
1082 adev->gfx.config.max_shader_engines = 1;
1083 adev->gfx.config.max_tile_pipes = 2;
1084 adev->gfx.config.max_sh_per_se = 1;
1085 adev->gfx.config.max_backends_per_se = 1;
1086
1087 switch (adev->pdev->revision) {
1088 case 0xc0:
1089 case 0xc1:
1090 case 0xc2:
1091 case 0xc4:
1092 case 0xc8:
1093 case 0xc9:
1094 adev->gfx.config.max_cu_per_sh = 3;
1095 break;
1096 case 0xd0:
1097 case 0xd1:
1098 case 0xd2:
1099 default:
1100 adev->gfx.config.max_cu_per_sh = 2;
1101 break;
1102 }
1103
1104 adev->gfx.config.max_texture_channel_caches = 2;
1105 adev->gfx.config.max_gprs = 256;
1106 adev->gfx.config.max_gs_threads = 16;
1107 adev->gfx.config.max_hw_contexts = 8;
1108
1109 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1110 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1111 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1112 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1113 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1114 break;
1115 default:
1116 adev->gfx.config.max_shader_engines = 2;
1117 adev->gfx.config.max_tile_pipes = 4;
1118 adev->gfx.config.max_cu_per_sh = 2;
1119 adev->gfx.config.max_sh_per_se = 1;
1120 adev->gfx.config.max_backends_per_se = 2;
1121 adev->gfx.config.max_texture_channel_caches = 4;
1122 adev->gfx.config.max_gprs = 256;
1123 adev->gfx.config.max_gs_threads = 32;
1124 adev->gfx.config.max_hw_contexts = 8;
1125
1126 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1127 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1128 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1129 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1130 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1131 break;
1132 }
1133
1134 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1135 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1136 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1137
1138 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1139 adev->gfx.config.mem_max_burst_length_bytes = 256;
1140 if (adev->flags & AMD_IS_APU) {
1141 /* Get memory bank mapping mode. */
1142 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1143 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1144 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1145
1146 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1147 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1148 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1149
1150 /* Validate settings in case only one DIMM installed. */
1151 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1152 dimm00_addr_map = 0;
1153 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1154 dimm01_addr_map = 0;
1155 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1156 dimm10_addr_map = 0;
1157 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1158 dimm11_addr_map = 0;
1159
1160 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1161 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1162 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1163 adev->gfx.config.mem_row_size_in_kb = 2;
1164 else
1165 adev->gfx.config.mem_row_size_in_kb = 1;
1166 } else {
1167 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1168 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1169 if (adev->gfx.config.mem_row_size_in_kb > 4)
1170 adev->gfx.config.mem_row_size_in_kb = 4;
1171 }
1172
1173 adev->gfx.config.shader_engine_tile_size = 32;
1174 adev->gfx.config.num_gpus = 1;
1175 adev->gfx.config.multi_gpu_tile_size = 64;
1176
1177 /* fix up row size */
1178 switch (adev->gfx.config.mem_row_size_in_kb) {
1179 case 1:
1180 default:
1181 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1182 break;
1183 case 2:
1184 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1185 break;
1186 case 4:
1187 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1188 break;
1189 }
1190 adev->gfx.config.gb_addr_config = gb_addr_config;
1191}
1192
906static int gfx_v8_0_sw_init(void *handle) 1193static int gfx_v8_0_sw_init(void *handle)
907{ 1194{
908 int i, r; 1195 int i, r;
@@ -1010,6 +1297,8 @@ static int gfx_v8_0_sw_init(void *handle)
1010 1297
1011 adev->gfx.ce_ram_size = 0x8000; 1298 adev->gfx.ce_ram_size = 0x8000;
1012 1299
1300 gfx_v8_0_gpu_early_init(adev);
1301
1013 return 0; 1302 return 0;
1014} 1303}
1015 1304
@@ -1319,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1319 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); 1608 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1320 } 1609 }
1321 case CHIP_FIJI: 1610 case CHIP_FIJI:
1611 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1612 switch (reg_offset) {
1613 case 0:
1614 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1615 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1616 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1617 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1618 break;
1619 case 1:
1620 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1621 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1624 break;
1625 case 2:
1626 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1627 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1628 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1629 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1630 break;
1631 case 3:
1632 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1633 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1636 break;
1637 case 4:
1638 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1639 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1640 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1641 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1642 break;
1643 case 5:
1644 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1645 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1648 break;
1649 case 6:
1650 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1651 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1652 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1654 break;
1655 case 7:
1656 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1660 break;
1661 case 8:
1662 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1663 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1664 break;
1665 case 9:
1666 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1667 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1670 break;
1671 case 10:
1672 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1673 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1674 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1676 break;
1677 case 11:
1678 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1679 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1680 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1682 break;
1683 case 12:
1684 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1686 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1688 break;
1689 case 13:
1690 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1691 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1694 break;
1695 case 14:
1696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1697 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1700 break;
1701 case 15:
1702 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1703 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1706 break;
1707 case 16:
1708 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1709 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1710 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1712 break;
1713 case 17:
1714 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1716 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1718 break;
1719 case 18:
1720 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1721 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1722 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1724 break;
1725 case 19:
1726 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1727 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1728 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1730 break;
1731 case 20:
1732 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1733 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1734 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736 break;
1737 case 21:
1738 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1739 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1740 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1742 break;
1743 case 22:
1744 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1745 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748 break;
1749 case 23:
1750 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1752 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1754 break;
1755 case 24:
1756 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1757 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760 break;
1761 case 25:
1762 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1763 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1764 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1766 break;
1767 case 26:
1768 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1769 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1770 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1772 break;
1773 case 27:
1774 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1775 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1776 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1778 break;
1779 case 28:
1780 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1781 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1782 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1784 break;
1785 case 29:
1786 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1787 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1790 break;
1791 case 30:
1792 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1793 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1796 break;
1797 default:
1798 gb_tile_moden = 0;
1799 break;
1800 }
1801 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1802 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1803 }
1804 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1805 switch (reg_offset) {
1806 case 0:
1807 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1810 NUM_BANKS(ADDR_SURF_8_BANK));
1811 break;
1812 case 1:
1813 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1816 NUM_BANKS(ADDR_SURF_8_BANK));
1817 break;
1818 case 2:
1819 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1822 NUM_BANKS(ADDR_SURF_8_BANK));
1823 break;
1824 case 3:
1825 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1828 NUM_BANKS(ADDR_SURF_8_BANK));
1829 break;
1830 case 4:
1831 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1834 NUM_BANKS(ADDR_SURF_8_BANK));
1835 break;
1836 case 5:
1837 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1840 NUM_BANKS(ADDR_SURF_8_BANK));
1841 break;
1842 case 6:
1843 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1846 NUM_BANKS(ADDR_SURF_8_BANK));
1847 break;
1848 case 8:
1849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1852 NUM_BANKS(ADDR_SURF_8_BANK));
1853 break;
1854 case 9:
1855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1858 NUM_BANKS(ADDR_SURF_8_BANK));
1859 break;
1860 case 10:
1861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1864 NUM_BANKS(ADDR_SURF_8_BANK));
1865 break;
1866 case 11:
1867 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1870 NUM_BANKS(ADDR_SURF_8_BANK));
1871 break;
1872 case 12:
1873 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1876 NUM_BANKS(ADDR_SURF_8_BANK));
1877 break;
1878 case 13:
1879 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1882 NUM_BANKS(ADDR_SURF_8_BANK));
1883 break;
1884 case 14:
1885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1888 NUM_BANKS(ADDR_SURF_4_BANK));
1889 break;
1890 case 7:
1891 /* unused idx */
1892 continue;
1893 default:
1894 gb_tile_moden = 0;
1895 break;
1896 }
1897 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1898 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1899 }
1900 break;
1322 case CHIP_TONGA: 1901 case CHIP_TONGA:
1323 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 1902 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1324 switch (reg_offset) { 1903 switch (reg_offset) {
@@ -1610,6 +2189,273 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1610 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); 2189 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1611 } 2190 }
1612 break; 2191 break;
2192 case CHIP_STONEY:
2193 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2194 switch (reg_offset) {
2195 case 0:
2196 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197 PIPE_CONFIG(ADDR_SURF_P2) |
2198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2200 break;
2201 case 1:
2202 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203 PIPE_CONFIG(ADDR_SURF_P2) |
2204 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2206 break;
2207 case 2:
2208 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209 PIPE_CONFIG(ADDR_SURF_P2) |
2210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212 break;
2213 case 3:
2214 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2218 break;
2219 case 4:
2220 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 PIPE_CONFIG(ADDR_SURF_P2) |
2222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224 break;
2225 case 5:
2226 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 break;
2231 case 6:
2232 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 break;
2237 case 8:
2238 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2239 PIPE_CONFIG(ADDR_SURF_P2));
2240 break;
2241 case 9:
2242 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246 break;
2247 case 10:
2248 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2252 break;
2253 case 11:
2254 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P2) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2258 break;
2259 case 13:
2260 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264 break;
2265 case 14:
2266 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 break;
2271 case 15:
2272 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 break;
2277 case 16:
2278 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282 break;
2283 case 18:
2284 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 break;
2289 case 19:
2290 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 break;
2295 case 20:
2296 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2300 break;
2301 case 21:
2302 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 break;
2307 case 22:
2308 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2309 PIPE_CONFIG(ADDR_SURF_P2) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312 break;
2313 case 24:
2314 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 break;
2319 case 25:
2320 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2324 break;
2325 case 26:
2326 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2330 break;
2331 case 27:
2332 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P2) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336 break;
2337 case 28:
2338 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2342 break;
2343 case 29:
2344 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345 PIPE_CONFIG(ADDR_SURF_P2) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348 break;
2349 case 7:
2350 case 12:
2351 case 17:
2352 case 23:
2353 /* unused idx */
2354 continue;
2355 default:
2356 gb_tile_moden = 0;
2357 break;
2358 };
2359 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2360 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2361 }
2362 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2363 switch (reg_offset) {
2364 case 0:
2365 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2368 NUM_BANKS(ADDR_SURF_8_BANK));
2369 break;
2370 case 1:
2371 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 break;
2376 case 2:
2377 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2380 NUM_BANKS(ADDR_SURF_8_BANK));
2381 break;
2382 case 3:
2383 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2387 break;
2388 case 4:
2389 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2392 NUM_BANKS(ADDR_SURF_8_BANK));
2393 break;
2394 case 5:
2395 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2398 NUM_BANKS(ADDR_SURF_8_BANK));
2399 break;
2400 case 6:
2401 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404 NUM_BANKS(ADDR_SURF_8_BANK));
2405 break;
2406 case 8:
2407 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2410 NUM_BANKS(ADDR_SURF_16_BANK));
2411 break;
2412 case 9:
2413 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2416 NUM_BANKS(ADDR_SURF_16_BANK));
2417 break;
2418 case 10:
2419 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2422 NUM_BANKS(ADDR_SURF_16_BANK));
2423 break;
2424 case 11:
2425 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2428 NUM_BANKS(ADDR_SURF_16_BANK));
2429 break;
2430 case 12:
2431 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2434 NUM_BANKS(ADDR_SURF_16_BANK));
2435 break;
2436 case 13:
2437 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2440 NUM_BANKS(ADDR_SURF_16_BANK));
2441 break;
2442 case 14:
2443 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446 NUM_BANKS(ADDR_SURF_8_BANK));
2447 break;
2448 case 7:
2449 /* unused idx */
2450 continue;
2451 default:
2452 gb_tile_moden = 0;
2453 break;
2454 };
2455 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2456 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2457 }
2458 break;
1613 case CHIP_CARRIZO: 2459 case CHIP_CARRIZO:
1614 default: 2460 default:
1615 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { 2461 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
@@ -2043,203 +2889,23 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2043 2889
2044static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 2890static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2045{ 2891{
2046 u32 gb_addr_config;
2047 u32 mc_shared_chmap, mc_arb_ramcfg;
2048 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
2049 u32 tmp; 2892 u32 tmp;
2050 int i; 2893 int i;
2051 2894
2052 switch (adev->asic_type) {
2053 case CHIP_TOPAZ:
2054 adev->gfx.config.max_shader_engines = 1;
2055 adev->gfx.config.max_tile_pipes = 2;
2056 adev->gfx.config.max_cu_per_sh = 6;
2057 adev->gfx.config.max_sh_per_se = 1;
2058 adev->gfx.config.max_backends_per_se = 2;
2059 adev->gfx.config.max_texture_channel_caches = 2;
2060 adev->gfx.config.max_gprs = 256;
2061 adev->gfx.config.max_gs_threads = 32;
2062 adev->gfx.config.max_hw_contexts = 8;
2063
2064 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2065 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2066 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2067 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2068 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
2069 break;
2070 case CHIP_FIJI:
2071 adev->gfx.config.max_shader_engines = 4;
2072 adev->gfx.config.max_tile_pipes = 16;
2073 adev->gfx.config.max_cu_per_sh = 16;
2074 adev->gfx.config.max_sh_per_se = 1;
2075 adev->gfx.config.max_backends_per_se = 4;
2076 adev->gfx.config.max_texture_channel_caches = 8;
2077 adev->gfx.config.max_gprs = 256;
2078 adev->gfx.config.max_gs_threads = 32;
2079 adev->gfx.config.max_hw_contexts = 8;
2080
2081 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2082 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2083 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2084 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2085 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2086 break;
2087 case CHIP_TONGA:
2088 adev->gfx.config.max_shader_engines = 4;
2089 adev->gfx.config.max_tile_pipes = 8;
2090 adev->gfx.config.max_cu_per_sh = 8;
2091 adev->gfx.config.max_sh_per_se = 1;
2092 adev->gfx.config.max_backends_per_se = 2;
2093 adev->gfx.config.max_texture_channel_caches = 8;
2094 adev->gfx.config.max_gprs = 256;
2095 adev->gfx.config.max_gs_threads = 32;
2096 adev->gfx.config.max_hw_contexts = 8;
2097
2098 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2099 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2100 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2101 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2102 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2103 break;
2104 case CHIP_CARRIZO:
2105 adev->gfx.config.max_shader_engines = 1;
2106 adev->gfx.config.max_tile_pipes = 2;
2107 adev->gfx.config.max_sh_per_se = 1;
2108 adev->gfx.config.max_backends_per_se = 2;
2109
2110 switch (adev->pdev->revision) {
2111 case 0xc4:
2112 case 0x84:
2113 case 0xc8:
2114 case 0xcc:
2115 /* B10 */
2116 adev->gfx.config.max_cu_per_sh = 8;
2117 break;
2118 case 0xc5:
2119 case 0x81:
2120 case 0x85:
2121 case 0xc9:
2122 case 0xcd:
2123 /* B8 */
2124 adev->gfx.config.max_cu_per_sh = 6;
2125 break;
2126 case 0xc6:
2127 case 0xca:
2128 case 0xce:
2129 /* B6 */
2130 adev->gfx.config.max_cu_per_sh = 6;
2131 break;
2132 case 0xc7:
2133 case 0x87:
2134 case 0xcb:
2135 default:
2136 /* B4 */
2137 adev->gfx.config.max_cu_per_sh = 4;
2138 break;
2139 }
2140
2141 adev->gfx.config.max_texture_channel_caches = 2;
2142 adev->gfx.config.max_gprs = 256;
2143 adev->gfx.config.max_gs_threads = 32;
2144 adev->gfx.config.max_hw_contexts = 8;
2145
2146 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2147 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2148 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2149 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2150 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
2151 break;
2152 default:
2153 adev->gfx.config.max_shader_engines = 2;
2154 adev->gfx.config.max_tile_pipes = 4;
2155 adev->gfx.config.max_cu_per_sh = 2;
2156 adev->gfx.config.max_sh_per_se = 1;
2157 adev->gfx.config.max_backends_per_se = 2;
2158 adev->gfx.config.max_texture_channel_caches = 4;
2159 adev->gfx.config.max_gprs = 256;
2160 adev->gfx.config.max_gs_threads = 32;
2161 adev->gfx.config.max_hw_contexts = 8;
2162
2163 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2164 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2165 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2166 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2167 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2168 break;
2169 }
2170
2171 tmp = RREG32(mmGRBM_CNTL); 2895 tmp = RREG32(mmGRBM_CNTL);
2172 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff); 2896 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2173 WREG32(mmGRBM_CNTL, tmp); 2897 WREG32(mmGRBM_CNTL, tmp);
2174 2898
2175 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 2899 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2176 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 2900 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2177 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 2901 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2178
2179 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2180 adev->gfx.config.mem_max_burst_length_bytes = 256;
2181 if (adev->flags & AMD_IS_APU) {
2182 /* Get memory bank mapping mode. */
2183 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2184 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2185 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2186
2187 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2188 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2189 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2190
2191 /* Validate settings in case only one DIMM installed. */
2192 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2193 dimm00_addr_map = 0;
2194 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2195 dimm01_addr_map = 0;
2196 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2197 dimm10_addr_map = 0;
2198 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2199 dimm11_addr_map = 0;
2200
2201 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2202 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2203 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2204 adev->gfx.config.mem_row_size_in_kb = 2;
2205 else
2206 adev->gfx.config.mem_row_size_in_kb = 1;
2207 } else {
2208 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2209 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2210 if (adev->gfx.config.mem_row_size_in_kb > 4)
2211 adev->gfx.config.mem_row_size_in_kb = 4;
2212 }
2213
2214 adev->gfx.config.shader_engine_tile_size = 32;
2215 adev->gfx.config.num_gpus = 1;
2216 adev->gfx.config.multi_gpu_tile_size = 64;
2217
2218 /* fix up row size */
2219 switch (adev->gfx.config.mem_row_size_in_kb) {
2220 case 1:
2221 default:
2222 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2223 break;
2224 case 2:
2225 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2226 break;
2227 case 4:
2228 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2229 break;
2230 }
2231 adev->gfx.config.gb_addr_config = gb_addr_config;
2232
2233 WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
2234 WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
2235 WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
2236 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, 2902 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2237 gb_addr_config & 0x70); 2903 adev->gfx.config.gb_addr_config & 0x70);
2238 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, 2904 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2239 gb_addr_config & 0x70); 2905 adev->gfx.config.gb_addr_config & 0x70);
2240 WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config); 2906 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2241 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); 2907 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2242 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); 2908 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2243 2909
2244 gfx_v8_0_tiling_mode_table_init(adev); 2910 gfx_v8_0_tiling_mode_table_init(adev);
2245 2911
@@ -2256,13 +2922,13 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2256 if (i == 0) { 2922 if (i == 0) {
2257 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 2923 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2258 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 2924 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2259 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 2925 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2260 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2926 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2261 WREG32(mmSH_MEM_CONFIG, tmp); 2927 WREG32(mmSH_MEM_CONFIG, tmp);
2262 } else { 2928 } else {
2263 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 2929 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2264 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 2930 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2265 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 2931 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2266 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2932 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2267 WREG32(mmSH_MEM_CONFIG, tmp); 2933 WREG32(mmSH_MEM_CONFIG, tmp);
2268 } 2934 }
@@ -2377,7 +3043,7 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2377 WREG32(mmRLC_CNTL, tmp); 3043 WREG32(mmRLC_CNTL, tmp);
2378 3044
2379 /* carrizo do enable cp interrupt after cp inited */ 3045 /* carrizo do enable cp interrupt after cp inited */
2380 if (adev->asic_type != CHIP_CARRIZO) 3046 if (!(adev->flags & AMD_IS_APU))
2381 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 3047 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2382 3048
2383 udelay(50); 3049 udelay(50);
@@ -2590,15 +3256,22 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2590 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 3256 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2591 switch (adev->asic_type) { 3257 switch (adev->asic_type) {
2592 case CHIP_TONGA: 3258 case CHIP_TONGA:
2593 case CHIP_FIJI:
2594 amdgpu_ring_write(ring, 0x16000012); 3259 amdgpu_ring_write(ring, 0x16000012);
2595 amdgpu_ring_write(ring, 0x0000002A); 3260 amdgpu_ring_write(ring, 0x0000002A);
2596 break; 3261 break;
3262 case CHIP_FIJI:
3263 amdgpu_ring_write(ring, 0x3a00161a);
3264 amdgpu_ring_write(ring, 0x0000002e);
3265 break;
2597 case CHIP_TOPAZ: 3266 case CHIP_TOPAZ:
2598 case CHIP_CARRIZO: 3267 case CHIP_CARRIZO:
2599 amdgpu_ring_write(ring, 0x00000002); 3268 amdgpu_ring_write(ring, 0x00000002);
2600 amdgpu_ring_write(ring, 0x00000000); 3269 amdgpu_ring_write(ring, 0x00000000);
2601 break; 3270 break;
3271 case CHIP_STONEY:
3272 amdgpu_ring_write(ring, 0x00000000);
3273 amdgpu_ring_write(ring, 0x00000000);
3274 break;
2602 default: 3275 default:
2603 BUG(); 3276 BUG();
2604 } 3277 }
@@ -3233,7 +3906,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3233 /* enable the doorbell if requested */ 3906 /* enable the doorbell if requested */
3234 if (use_doorbell) { 3907 if (use_doorbell) {
3235 if ((adev->asic_type == CHIP_CARRIZO) || 3908 if ((adev->asic_type == CHIP_CARRIZO) ||
3236 (adev->asic_type == CHIP_FIJI)) { 3909 (adev->asic_type == CHIP_FIJI) ||
3910 (adev->asic_type == CHIP_STONEY)) {
3237 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 3911 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3238 AMDGPU_DOORBELL_KIQ << 2); 3912 AMDGPU_DOORBELL_KIQ << 2);
3239 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 3913 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
@@ -3305,7 +3979,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3305{ 3979{
3306 int r; 3980 int r;
3307 3981
3308 if (adev->asic_type != CHIP_CARRIZO) 3982 if (!(adev->flags & AMD_IS_APU))
3309 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 3983 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3310 3984
3311 if (!adev->firmware.smu_load) { 3985 if (!adev->firmware.smu_load) {
@@ -4068,15 +4742,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4068 } 4742 }
4069} 4743}
4070 4744
4071static bool gfx_v8_0_ring_is_lockup(struct amdgpu_ring *ring)
4072{
4073 if (gfx_v8_0_is_idle(ring->adev)) {
4074 amdgpu_ring_lockup_update(ring);
4075 return false;
4076 }
4077 return amdgpu_ring_test_lockup(ring);
4078}
4079
4080static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4745static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4081{ 4746{
4082 return ring->adev->wb.wb[ring->rptr_offs]; 4747 return ring->adev->wb.wb[ring->rptr_offs];
@@ -4107,6 +4772,7 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4107 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 4772 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4108 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 4773 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4109 EOP_TC_ACTION_EN | 4774 EOP_TC_ACTION_EN |
4775 EOP_TC_WB_ACTION_EN |
4110 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4776 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4111 EVENT_INDEX(5))); 4777 EVENT_INDEX(5)));
4112 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4778 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4357,7 +5023,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4357 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5023 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4358 .test_ring = gfx_v8_0_ring_test_ring, 5024 .test_ring = gfx_v8_0_ring_test_ring,
4359 .test_ib = gfx_v8_0_ring_test_ib, 5025 .test_ib = gfx_v8_0_ring_test_ib,
4360 .is_lockup = gfx_v8_0_ring_is_lockup,
4361 .insert_nop = amdgpu_ring_insert_nop, 5026 .insert_nop = amdgpu_ring_insert_nop,
4362}; 5027};
4363 5028
@@ -4374,7 +5039,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4374 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 5039 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4375 .test_ring = gfx_v8_0_ring_test_ring, 5040 .test_ring = gfx_v8_0_ring_test_ring,
4376 .test_ib = gfx_v8_0_ring_test_ib, 5041 .test_ib = gfx_v8_0_ring_test_ib,
4377 .is_lockup = gfx_v8_0_ring_is_lockup,
4378 .insert_nop = amdgpu_ring_insert_nop, 5042 .insert_nop = amdgpu_ring_insert_nop,
4379}; 5043};
4380 5044
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index fab5471d25d7..ed8abb58a785 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -40,7 +40,7 @@
40static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); 40static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
41static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); 41static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
42 42
43MODULE_FIRMWARE("radeon/boniare_mc.bin"); 43MODULE_FIRMWARE("radeon/bonaire_mc.bin");
44MODULE_FIRMWARE("radeon/hawaii_mc.bin"); 44MODULE_FIRMWARE("radeon/hawaii_mc.bin");
45 45
46/** 46/**
@@ -436,6 +436,33 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
436} 436}
437 437
438/** 438/**
439 * gmc_v8_0_set_fault_enable_default - update VM fault handling
440 *
441 * @adev: amdgpu_device pointer
442 * @value: true redirects VM faults to the default page
443 */
444static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev,
445 bool value)
446{
447 u32 tmp;
448
449 tmp = RREG32(mmVM_CONTEXT1_CNTL);
450 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
451 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
452 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
453 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
454 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
455 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
456 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
457 VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
458 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
459 READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
460 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
461 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
462 WREG32(mmVM_CONTEXT1_CNTL, tmp);
463}
464
465/**
439 * gmc_v7_0_gart_enable - gart enable 466 * gmc_v7_0_gart_enable - gart enable
440 * 467 *
441 * @adev: amdgpu_device pointer 468 * @adev: amdgpu_device pointer
@@ -474,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
474 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); 501 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
475 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); 502 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
476 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); 503 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
504 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
477 WREG32(mmVM_L2_CNTL, tmp); 505 WREG32(mmVM_L2_CNTL, tmp);
478 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 506 tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
479 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); 507 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
@@ -485,7 +513,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
485 WREG32(mmVM_L2_CNTL3, tmp); 513 WREG32(mmVM_L2_CNTL3, tmp);
486 /* setup context0 */ 514 /* setup context0 */
487 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); 515 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
488 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1); 516 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
489 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); 517 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
490 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 518 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
491 (u32)(adev->dummy_page.addr >> 12)); 519 (u32)(adev->dummy_page.addr >> 12));
@@ -523,15 +551,13 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
523 tmp = RREG32(mmVM_CONTEXT1_CNTL); 551 tmp = RREG32(mmVM_CONTEXT1_CNTL);
524 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); 552 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
525 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1); 553 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
526 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
527 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
528 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
529 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
530 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
531 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
532 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, 554 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
533 amdgpu_vm_block_size - 9); 555 amdgpu_vm_block_size - 9);
534 WREG32(mmVM_CONTEXT1_CNTL, tmp); 556 WREG32(mmVM_CONTEXT1_CNTL, tmp);
557 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
558 gmc_v7_0_set_fault_enable_default(adev, false);
559 else
560 gmc_v7_0_set_fault_enable_default(adev, true);
535 561
536 if (adev->asic_type == CHIP_KAVERI) { 562 if (adev->asic_type == CHIP_KAVERI) {
537 tmp = RREG32(mmCHUB_CONTROL); 563 tmp = RREG32(mmCHUB_CONTROL);
@@ -935,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle)
935 961
936static int gmc_v7_0_sw_fini(void *handle) 962static int gmc_v7_0_sw_fini(void *handle)
937{ 963{
938 int i;
939 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 964 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
940 965
941 if (adev->vm_manager.enabled) { 966 if (adev->vm_manager.enabled) {
942 for (i = 0; i < AMDGPU_NUM_VM; ++i) 967 amdgpu_vm_manager_fini(adev);
943 amdgpu_fence_unref(&adev->vm_manager.active[i]);
944 gmc_v7_0_vm_fini(adev); 968 gmc_v7_0_vm_fini(adev);
945 adev->vm_manager.enabled = false; 969 adev->vm_manager.enabled = false;
946 } 970 }
@@ -985,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle)
985 1009
986static int gmc_v7_0_suspend(void *handle) 1010static int gmc_v7_0_suspend(void *handle)
987{ 1011{
988 int i;
989 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1012 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
990 1013
991 if (adev->vm_manager.enabled) { 1014 if (adev->vm_manager.enabled) {
992 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1015 amdgpu_vm_manager_fini(adev);
993 amdgpu_fence_unref(&adev->vm_manager.active[i]);
994 gmc_v7_0_vm_fini(adev); 1016 gmc_v7_0_vm_fini(adev);
995 adev->vm_manager.enabled = false; 1017 adev->vm_manager.enabled = false;
996 } 1018 }
@@ -1268,6 +1290,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
1268 if (!addr && !status) 1290 if (!addr && !status)
1269 return 0; 1291 return 0;
1270 1292
1293 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
1294 gmc_v7_0_set_fault_enable_default(adev, false);
1295
1271 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", 1296 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
1272 entry->src_id, entry->src_data); 1297 entry->src_id, entry->src_data);
1273 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 1298 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 7bc9e9fcf3d2..d39028440814 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -93,6 +93,12 @@ static const u32 cz_mgcg_cgcg_init[] =
93 mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 93 mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
94}; 94};
95 95
96static const u32 stoney_mgcg_cgcg_init[] =
97{
98 mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
99};
100
101
96static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) 102static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
97{ 103{
98 switch (adev->asic_type) { 104 switch (adev->asic_type) {
@@ -125,6 +131,11 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
125 cz_mgcg_cgcg_init, 131 cz_mgcg_cgcg_init,
126 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 132 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
127 break; 133 break;
134 case CHIP_STONEY:
135 amdgpu_program_register_sequence(adev,
136 stoney_mgcg_cgcg_init,
137 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
138 break;
128 default: 139 default:
129 break; 140 break;
130 } 141 }
@@ -228,6 +239,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
228 chip_name = "fiji"; 239 chip_name = "fiji";
229 break; 240 break;
230 case CHIP_CARRIZO: 241 case CHIP_CARRIZO:
242 case CHIP_STONEY:
231 return 0; 243 return 0;
232 default: BUG(); 244 default: BUG();
233 } 245 }
@@ -550,6 +562,35 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
550} 562}
551 563
552/** 564/**
565 * gmc_v8_0_set_fault_enable_default - update VM fault handling
566 *
567 * @adev: amdgpu_device pointer
568 * @value: true redirects VM faults to the default page
569 */
570static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev,
571 bool value)
572{
573 u32 tmp;
574
575 tmp = RREG32(mmVM_CONTEXT1_CNTL);
576 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
577 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
578 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
579 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
580 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
581 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
582 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
583 VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
584 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
585 READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
586 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
587 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
588 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
589 EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
590 WREG32(mmVM_CONTEXT1_CNTL, tmp);
591}
592
593/**
553 * gmc_v8_0_gart_enable - gart enable 594 * gmc_v8_0_gart_enable - gart enable
554 * 595 *
555 * @adev: amdgpu_device pointer 596 * @adev: amdgpu_device pointer
@@ -588,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
588 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); 629 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
589 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); 630 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
590 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); 631 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
632 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
591 WREG32(mmVM_L2_CNTL, tmp); 633 WREG32(mmVM_L2_CNTL, tmp);
592 tmp = RREG32(mmVM_L2_CNTL2); 634 tmp = RREG32(mmVM_L2_CNTL2);
593 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); 635 tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
@@ -615,7 +657,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
615 WREG32(mmVM_L2_CNTL4, tmp); 657 WREG32(mmVM_L2_CNTL4, tmp);
616 /* setup context0 */ 658 /* setup context0 */
617 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); 659 WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
618 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1); 660 WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
619 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); 661 WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
620 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 662 WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
621 (u32)(adev->dummy_page.addr >> 12)); 663 (u32)(adev->dummy_page.addr >> 12));
@@ -663,6 +705,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
663 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, 705 tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
664 amdgpu_vm_block_size - 9); 706 amdgpu_vm_block_size - 9);
665 WREG32(mmVM_CONTEXT1_CNTL, tmp); 707 WREG32(mmVM_CONTEXT1_CNTL, tmp);
708 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
709 gmc_v8_0_set_fault_enable_default(adev, false);
710 else
711 gmc_v8_0_set_fault_enable_default(adev, true);
666 712
667 gmc_v8_0_gart_flush_gpu_tlb(adev, 0); 713 gmc_v8_0_gart_flush_gpu_tlb(adev, 0);
668 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 714 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -934,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle)
934 980
935static int gmc_v8_0_sw_fini(void *handle) 981static int gmc_v8_0_sw_fini(void *handle)
936{ 982{
937 int i;
938 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 983 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
939 984
940 if (adev->vm_manager.enabled) { 985 if (adev->vm_manager.enabled) {
941 for (i = 0; i < AMDGPU_NUM_VM; ++i) 986 amdgpu_vm_manager_fini(adev);
942 amdgpu_fence_unref(&adev->vm_manager.active[i]);
943 gmc_v8_0_vm_fini(adev); 987 gmc_v8_0_vm_fini(adev);
944 adev->vm_manager.enabled = false; 988 adev->vm_manager.enabled = false;
945 } 989 }
@@ -986,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle)
986 1030
987static int gmc_v8_0_suspend(void *handle) 1031static int gmc_v8_0_suspend(void *handle)
988{ 1032{
989 int i;
990 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1033 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
991 1034
992 if (adev->vm_manager.enabled) { 1035 if (adev->vm_manager.enabled) {
993 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1036 amdgpu_vm_manager_fini(adev);
994 amdgpu_fence_unref(&adev->vm_manager.active[i]);
995 gmc_v8_0_vm_fini(adev); 1037 gmc_v8_0_vm_fini(adev);
996 adev->vm_manager.enabled = false; 1038 adev->vm_manager.enabled = false;
997 } 1039 }
@@ -1268,6 +1310,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
1268 if (!addr && !status) 1310 if (!addr && !status)
1269 return 0; 1311 return 0;
1270 1312
1313 if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
1314 gmc_v8_0_set_fault_enable_default(adev, false);
1315
1271 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", 1316 dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
1272 entry->src_id, entry->src_data); 1317 entry->src_id, entry->src_data);
1273 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", 1318 dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 94ec04a9c4d5..7e9154c7f1db 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2995,6 +2995,15 @@ static int kv_dpm_late_init(void *handle)
2995{ 2995{
2996 /* powerdown unused blocks for now */ 2996 /* powerdown unused blocks for now */
2997 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2997 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2998 int ret;
2999
3000 if (!amdgpu_dpm)
3001 return 0;
3002
3003 /* init the sysfs and debugfs files late */
3004 ret = amdgpu_pm_sysfs_init(adev);
3005 if (ret)
3006 return ret;
2998 3007
2999 kv_dpm_powergate_acp(adev, true); 3008 kv_dpm_powergate_acp(adev, true);
3000 kv_dpm_powergate_samu(adev, true); 3009 kv_dpm_powergate_samu(adev, true);
@@ -3038,9 +3047,6 @@ static int kv_dpm_sw_init(void *handle)
3038 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; 3047 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
3039 if (amdgpu_dpm == 1) 3048 if (amdgpu_dpm == 1)
3040 amdgpu_pm_print_power_states(adev); 3049 amdgpu_pm_print_power_states(adev);
3041 ret = amdgpu_pm_sysfs_init(adev);
3042 if (ret)
3043 goto dpm_failed;
3044 mutex_unlock(&adev->pm.mutex); 3050 mutex_unlock(&adev->pm.mutex);
3045 DRM_INFO("amdgpu: dpm initialized\n"); 3051 DRM_INFO("amdgpu: dpm initialized\n");
3046 3052
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 14e87234171a..2cf50180cc51 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -118,7 +118,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
118{ 118{
119 const char *chip_name; 119 const char *chip_name;
120 char fw_name[30]; 120 char fw_name[30];
121 int err, i; 121 int err = 0, i;
122 struct amdgpu_firmware_info *info = NULL; 122 struct amdgpu_firmware_info *info = NULL;
123 const struct common_firmware_header *header = NULL; 123 const struct common_firmware_header *header = NULL;
124 const struct sdma_firmware_header_v1_0 *hdr; 124 const struct sdma_firmware_header_v1_0 *hdr;
@@ -132,27 +132,27 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
132 default: BUG(); 132 default: BUG();
133 } 133 }
134 134
135 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 135 for (i = 0; i < adev->sdma.num_instances; i++) {
136 if (i == 0) 136 if (i == 0)
137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 137 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
138 else 138 else
139 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 139 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
140 err = request_firmware(&adev->sdma[i].fw, fw_name, adev->dev); 140 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
141 if (err) 141 if (err)
142 goto out; 142 goto out;
143 err = amdgpu_ucode_validate(adev->sdma[i].fw); 143 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
144 if (err) 144 if (err)
145 goto out; 145 goto out;
146 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 146 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
147 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 147 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
148 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 148 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
149 if (adev->sdma[i].feature_version >= 20) 149 if (adev->sdma.instance[i].feature_version >= 20)
150 adev->sdma[i].burst_nop = true; 150 adev->sdma.instance[i].burst_nop = true;
151 151
152 if (adev->firmware.smu_load) { 152 if (adev->firmware.smu_load) {
153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
154 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 154 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
155 info->fw = adev->sdma[i].fw; 155 info->fw = adev->sdma.instance[i].fw;
156 header = (const struct common_firmware_header *)info->fw->data; 156 header = (const struct common_firmware_header *)info->fw->data;
157 adev->firmware.fw_size += 157 adev->firmware.fw_size +=
158 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 158 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
@@ -164,9 +164,9 @@ out:
164 printk(KERN_ERR 164 printk(KERN_ERR
165 "sdma_v2_4: Failed to load firmware \"%s\"\n", 165 "sdma_v2_4: Failed to load firmware \"%s\"\n",
166 fw_name); 166 fw_name);
167 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 167 for (i = 0; i < adev->sdma.num_instances; i++) {
168 release_firmware(adev->sdma[i].fw); 168 release_firmware(adev->sdma.instance[i].fw);
169 adev->sdma[i].fw = NULL; 169 adev->sdma.instance[i].fw = NULL;
170 } 170 }
171 } 171 }
172 return err; 172 return err;
@@ -199,7 +199,7 @@ static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
199static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) 199static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
200{ 200{
201 struct amdgpu_device *adev = ring->adev; 201 struct amdgpu_device *adev = ring->adev;
202 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1; 202 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
203 u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; 203 u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
204 204
205 return wptr; 205 return wptr;
@@ -215,14 +215,14 @@ static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
215static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) 215static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
216{ 216{
217 struct amdgpu_device *adev = ring->adev; 217 struct amdgpu_device *adev = ring->adev;
218 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1; 218 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
219 219
220 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 220 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
221} 221}
222 222
223static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 223static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
224{ 224{
225 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); 225 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
226 int i; 226 int i;
227 227
228 for (i = 0; i < count; i++) 228 for (i = 0; i < count; i++)
@@ -284,7 +284,7 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
284{ 284{
285 u32 ref_and_mask = 0; 285 u32 ref_and_mask = 0;
286 286
287 if (ring == &ring->adev->sdma[0].ring) 287 if (ring == &ring->adev->sdma.instance[0].ring)
288 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 288 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
289 else 289 else
290 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 290 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -368,8 +368,8 @@ static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring,
368 */ 368 */
369static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) 369static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
370{ 370{
371 struct amdgpu_ring *sdma0 = &adev->sdma[0].ring; 371 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
372 struct amdgpu_ring *sdma1 = &adev->sdma[1].ring; 372 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
373 u32 rb_cntl, ib_cntl; 373 u32 rb_cntl, ib_cntl;
374 int i; 374 int i;
375 375
@@ -377,7 +377,7 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
377 (adev->mman.buffer_funcs_ring == sdma1)) 377 (adev->mman.buffer_funcs_ring == sdma1))
378 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 378 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
379 379
380 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 380 for (i = 0; i < adev->sdma.num_instances; i++) {
381 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 381 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
382 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 382 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
383 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 383 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
@@ -419,7 +419,7 @@ static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
419 sdma_v2_4_rlc_stop(adev); 419 sdma_v2_4_rlc_stop(adev);
420 } 420 }
421 421
422 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 422 for (i = 0; i < adev->sdma.num_instances; i++) {
423 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 423 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
424 if (enable) 424 if (enable)
425 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 425 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
@@ -445,8 +445,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
445 u32 wb_offset; 445 u32 wb_offset;
446 int i, j, r; 446 int i, j, r;
447 447
448 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 448 for (i = 0; i < adev->sdma.num_instances; i++) {
449 ring = &adev->sdma[i].ring; 449 ring = &adev->sdma.instance[i].ring;
450 wb_offset = (ring->rptr_offs * 4); 450 wb_offset = (ring->rptr_offs * 4);
451 451
452 mutex_lock(&adev->srbm_mutex); 452 mutex_lock(&adev->srbm_mutex);
@@ -545,29 +545,23 @@ static int sdma_v2_4_load_microcode(struct amdgpu_device *adev)
545 const __le32 *fw_data; 545 const __le32 *fw_data;
546 u32 fw_size; 546 u32 fw_size;
547 int i, j; 547 int i, j;
548 bool smc_loads_fw = false; /* XXX fix me */
549
550 if (!adev->sdma[0].fw || !adev->sdma[1].fw)
551 return -EINVAL;
552 548
553 /* halt the MEs */ 549 /* halt the MEs */
554 sdma_v2_4_enable(adev, false); 550 sdma_v2_4_enable(adev, false);
555 551
556 if (smc_loads_fw) { 552 for (i = 0; i < adev->sdma.num_instances; i++) {
557 /* XXX query SMC for fw load complete */ 553 if (!adev->sdma.instance[i].fw)
558 } else { 554 return -EINVAL;
559 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 555 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
560 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 556 amdgpu_ucode_print_sdma_hdr(&hdr->header);
561 amdgpu_ucode_print_sdma_hdr(&hdr->header); 557 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
562 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 558 fw_data = (const __le32 *)
563 fw_data = (const __le32 *) 559 (adev->sdma.instance[i].fw->data +
564 (adev->sdma[i].fw->data + 560 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
565 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 561 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
566 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 562 for (j = 0; j < fw_size; j++)
567 for (j = 0; j < fw_size; j++) 563 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
568 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 564 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
569 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma[i].fw_version);
570 }
571 } 565 }
572 566
573 return 0; 567 return 0;
@@ -894,7 +888,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
894 */ 888 */
895static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) 889static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
896{ 890{
897 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); 891 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
898 u32 pad_count; 892 u32 pad_count;
899 int i; 893 int i;
900 894
@@ -952,6 +946,8 @@ static int sdma_v2_4_early_init(void *handle)
952{ 946{
953 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 947 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
954 948
949 adev->sdma.num_instances = SDMA_MAX_INSTANCE;
950
955 sdma_v2_4_set_ring_funcs(adev); 951 sdma_v2_4_set_ring_funcs(adev);
956 sdma_v2_4_set_buffer_funcs(adev); 952 sdma_v2_4_set_buffer_funcs(adev);
957 sdma_v2_4_set_vm_pte_funcs(adev); 953 sdma_v2_4_set_vm_pte_funcs(adev);
@@ -963,21 +959,21 @@ static int sdma_v2_4_early_init(void *handle)
963static int sdma_v2_4_sw_init(void *handle) 959static int sdma_v2_4_sw_init(void *handle)
964{ 960{
965 struct amdgpu_ring *ring; 961 struct amdgpu_ring *ring;
966 int r; 962 int r, i;
967 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 963 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
968 964
969 /* SDMA trap event */ 965 /* SDMA trap event */
970 r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq); 966 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq);
971 if (r) 967 if (r)
972 return r; 968 return r;
973 969
974 /* SDMA Privileged inst */ 970 /* SDMA Privileged inst */
975 r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq); 971 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq);
976 if (r) 972 if (r)
977 return r; 973 return r;
978 974
979 /* SDMA Privileged inst */ 975 /* SDMA Privileged inst */
980 r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq); 976 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq);
981 if (r) 977 if (r)
982 return r; 978 return r;
983 979
@@ -987,31 +983,20 @@ static int sdma_v2_4_sw_init(void *handle)
987 return r; 983 return r;
988 } 984 }
989 985
990 ring = &adev->sdma[0].ring; 986 for (i = 0; i < adev->sdma.num_instances; i++) {
991 ring->ring_obj = NULL; 987 ring = &adev->sdma.instance[i].ring;
992 ring->use_doorbell = false; 988 ring->ring_obj = NULL;
993 989 ring->use_doorbell = false;
994 ring = &adev->sdma[1].ring; 990 sprintf(ring->name, "sdma%d", i);
995 ring->ring_obj = NULL; 991 r = amdgpu_ring_init(adev, ring, 256 * 1024,
996 ring->use_doorbell = false; 992 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
997 993 &adev->sdma.trap_irq,
998 ring = &adev->sdma[0].ring; 994 (i == 0) ?
999 sprintf(ring->name, "sdma0"); 995 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
1000 r = amdgpu_ring_init(adev, ring, 256 * 1024, 996 AMDGPU_RING_TYPE_SDMA);
1001 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, 997 if (r)
1002 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0, 998 return r;
1003 AMDGPU_RING_TYPE_SDMA); 999 }
1004 if (r)
1005 return r;
1006
1007 ring = &adev->sdma[1].ring;
1008 sprintf(ring->name, "sdma1");
1009 r = amdgpu_ring_init(adev, ring, 256 * 1024,
1010 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1011 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1,
1012 AMDGPU_RING_TYPE_SDMA);
1013 if (r)
1014 return r;
1015 1000
1016 return r; 1001 return r;
1017} 1002}
@@ -1019,9 +1004,10 @@ static int sdma_v2_4_sw_init(void *handle)
1019static int sdma_v2_4_sw_fini(void *handle) 1004static int sdma_v2_4_sw_fini(void *handle)
1020{ 1005{
1021 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1006 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1007 int i;
1022 1008
1023 amdgpu_ring_fini(&adev->sdma[0].ring); 1009 for (i = 0; i < adev->sdma.num_instances; i++)
1024 amdgpu_ring_fini(&adev->sdma[1].ring); 1010 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1025 1011
1026 return 0; 1012 return 0;
1027} 1013}
@@ -1100,7 +1086,7 @@ static void sdma_v2_4_print_status(void *handle)
1100 dev_info(adev->dev, "VI SDMA registers\n"); 1086 dev_info(adev->dev, "VI SDMA registers\n");
1101 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", 1087 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
1102 RREG32(mmSRBM_STATUS2)); 1088 RREG32(mmSRBM_STATUS2));
1103 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 1089 for (i = 0; i < adev->sdma.num_instances; i++) {
1104 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", 1090 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n",
1105 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); 1091 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
1106 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", 1092 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n",
@@ -1243,7 +1229,7 @@ static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
1243 case 0: 1229 case 0:
1244 switch (queue_id) { 1230 switch (queue_id) {
1245 case 0: 1231 case 0:
1246 amdgpu_fence_process(&adev->sdma[0].ring); 1232 amdgpu_fence_process(&adev->sdma.instance[0].ring);
1247 break; 1233 break;
1248 case 1: 1234 case 1:
1249 /* XXX compute */ 1235 /* XXX compute */
@@ -1256,7 +1242,7 @@ static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
1256 case 1: 1242 case 1:
1257 switch (queue_id) { 1243 switch (queue_id) {
1258 case 0: 1244 case 0:
1259 amdgpu_fence_process(&adev->sdma[1].ring); 1245 amdgpu_fence_process(&adev->sdma.instance[1].ring);
1260 break; 1246 break;
1261 case 1: 1247 case 1:
1262 /* XXX compute */ 1248 /* XXX compute */
@@ -1309,24 +1295,6 @@ const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
1309 .set_powergating_state = sdma_v2_4_set_powergating_state, 1295 .set_powergating_state = sdma_v2_4_set_powergating_state,
1310}; 1296};
1311 1297
1312/**
1313 * sdma_v2_4_ring_is_lockup - Check if the DMA engine is locked up
1314 *
1315 * @ring: amdgpu_ring structure holding ring information
1316 *
1317 * Check if the async DMA engine is locked up (VI).
1318 * Returns true if the engine appears to be locked up, false if not.
1319 */
1320static bool sdma_v2_4_ring_is_lockup(struct amdgpu_ring *ring)
1321{
1322
1323 if (sdma_v2_4_is_idle(ring->adev)) {
1324 amdgpu_ring_lockup_update(ring);
1325 return false;
1326 }
1327 return amdgpu_ring_test_lockup(ring);
1328}
1329
1330static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { 1298static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1331 .get_rptr = sdma_v2_4_ring_get_rptr, 1299 .get_rptr = sdma_v2_4_ring_get_rptr,
1332 .get_wptr = sdma_v2_4_ring_get_wptr, 1300 .get_wptr = sdma_v2_4_ring_get_wptr,
@@ -1339,14 +1307,15 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1339 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, 1307 .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
1340 .test_ring = sdma_v2_4_ring_test_ring, 1308 .test_ring = sdma_v2_4_ring_test_ring,
1341 .test_ib = sdma_v2_4_ring_test_ib, 1309 .test_ib = sdma_v2_4_ring_test_ib,
1342 .is_lockup = sdma_v2_4_ring_is_lockup,
1343 .insert_nop = sdma_v2_4_ring_insert_nop, 1310 .insert_nop = sdma_v2_4_ring_insert_nop,
1344}; 1311};
1345 1312
1346static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1313static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
1347{ 1314{
1348 adev->sdma[0].ring.funcs = &sdma_v2_4_ring_funcs; 1315 int i;
1349 adev->sdma[1].ring.funcs = &sdma_v2_4_ring_funcs; 1316
1317 for (i = 0; i < adev->sdma.num_instances; i++)
1318 adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
1350} 1319}
1351 1320
1352static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = { 1321static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
@@ -1360,9 +1329,9 @@ static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
1360 1329
1361static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) 1330static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
1362{ 1331{
1363 adev->sdma_trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1332 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1364 adev->sdma_trap_irq.funcs = &sdma_v2_4_trap_irq_funcs; 1333 adev->sdma.trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
1365 adev->sdma_illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs; 1334 adev->sdma.illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
1366} 1335}
1367 1336
1368/** 1337/**
@@ -1428,7 +1397,7 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
1428{ 1397{
1429 if (adev->mman.buffer_funcs == NULL) { 1398 if (adev->mman.buffer_funcs == NULL) {
1430 adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; 1399 adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
1431 adev->mman.buffer_funcs_ring = &adev->sdma[0].ring; 1400 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1432 } 1401 }
1433} 1402}
1434 1403
@@ -1443,7 +1412,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
1443{ 1412{
1444 if (adev->vm_manager.vm_pte_funcs == NULL) { 1413 if (adev->vm_manager.vm_pte_funcs == NULL) {
1445 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; 1414 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
1446 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; 1415 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
1447 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1416 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
1448 } 1417 }
1449} 1418}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 9bfe92df15f7..7253132f04b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -55,6 +55,7 @@ MODULE_FIRMWARE("amdgpu/carrizo_sdma.bin");
55MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin"); 55MODULE_FIRMWARE("amdgpu/carrizo_sdma1.bin");
56MODULE_FIRMWARE("amdgpu/fiji_sdma.bin"); 56MODULE_FIRMWARE("amdgpu/fiji_sdma.bin");
57MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin"); 57MODULE_FIRMWARE("amdgpu/fiji_sdma1.bin");
58MODULE_FIRMWARE("amdgpu/stoney_sdma.bin");
58 59
59static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = 60static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
60{ 61{
@@ -122,6 +123,19 @@ static const u32 cz_mgcg_cgcg_init[] =
122 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 123 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
123}; 124};
124 125
126static const u32 stoney_golden_settings_a11[] =
127{
128 mmSDMA0_GFX_IB_CNTL, 0x00000100, 0x00000100,
129 mmSDMA0_POWER_CNTL, 0x00000800, 0x0003c800,
130 mmSDMA0_RLC0_IB_CNTL, 0x00000100, 0x00000100,
131 mmSDMA0_RLC1_IB_CNTL, 0x00000100, 0x00000100,
132};
133
134static const u32 stoney_mgcg_cgcg_init[] =
135{
136 mmSDMA0_CLK_CTRL, 0xffffffff, 0x00000100,
137};
138
125/* 139/*
126 * sDMA - System DMA 140 * sDMA - System DMA
127 * Starting with CIK, the GPU has new asynchronous 141 * Starting with CIK, the GPU has new asynchronous
@@ -166,6 +180,14 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
166 cz_golden_settings_a11, 180 cz_golden_settings_a11,
167 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 181 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
168 break; 182 break;
183 case CHIP_STONEY:
184 amdgpu_program_register_sequence(adev,
185 stoney_mgcg_cgcg_init,
186 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
187 amdgpu_program_register_sequence(adev,
188 stoney_golden_settings_a11,
189 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
190 break;
169 default: 191 default:
170 break; 192 break;
171 } 193 }
@@ -184,7 +206,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
184{ 206{
185 const char *chip_name; 207 const char *chip_name;
186 char fw_name[30]; 208 char fw_name[30];
187 int err, i; 209 int err = 0, i;
188 struct amdgpu_firmware_info *info = NULL; 210 struct amdgpu_firmware_info *info = NULL;
189 const struct common_firmware_header *header = NULL; 211 const struct common_firmware_header *header = NULL;
190 const struct sdma_firmware_header_v1_0 *hdr; 212 const struct sdma_firmware_header_v1_0 *hdr;
@@ -201,30 +223,33 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
201 case CHIP_CARRIZO: 223 case CHIP_CARRIZO:
202 chip_name = "carrizo"; 224 chip_name = "carrizo";
203 break; 225 break;
226 case CHIP_STONEY:
227 chip_name = "stoney";
228 break;
204 default: BUG(); 229 default: BUG();
205 } 230 }
206 231
207 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 232 for (i = 0; i < adev->sdma.num_instances; i++) {
208 if (i == 0) 233 if (i == 0)
209 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); 234 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
210 else 235 else
211 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); 236 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
212 err = request_firmware(&adev->sdma[i].fw, fw_name, adev->dev); 237 err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
213 if (err) 238 if (err)
214 goto out; 239 goto out;
215 err = amdgpu_ucode_validate(adev->sdma[i].fw); 240 err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
216 if (err) 241 if (err)
217 goto out; 242 goto out;
218 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 243 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
219 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 244 adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
220 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 245 adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
221 if (adev->sdma[i].feature_version >= 20) 246 if (adev->sdma.instance[i].feature_version >= 20)
222 adev->sdma[i].burst_nop = true; 247 adev->sdma.instance[i].burst_nop = true;
223 248
224 if (adev->firmware.smu_load) { 249 if (adev->firmware.smu_load) {
225 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 250 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
226 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; 251 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
227 info->fw = adev->sdma[i].fw; 252 info->fw = adev->sdma.instance[i].fw;
228 header = (const struct common_firmware_header *)info->fw->data; 253 header = (const struct common_firmware_header *)info->fw->data;
229 adev->firmware.fw_size += 254 adev->firmware.fw_size +=
230 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 255 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
@@ -235,9 +260,9 @@ out:
235 printk(KERN_ERR 260 printk(KERN_ERR
236 "sdma_v3_0: Failed to load firmware \"%s\"\n", 261 "sdma_v3_0: Failed to load firmware \"%s\"\n",
237 fw_name); 262 fw_name);
238 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 263 for (i = 0; i < adev->sdma.num_instances; i++) {
239 release_firmware(adev->sdma[i].fw); 264 release_firmware(adev->sdma.instance[i].fw);
240 adev->sdma[i].fw = NULL; 265 adev->sdma.instance[i].fw = NULL;
241 } 266 }
242 } 267 }
243 return err; 268 return err;
@@ -276,7 +301,7 @@ static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
276 /* XXX check if swapping is necessary on BE */ 301 /* XXX check if swapping is necessary on BE */
277 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; 302 wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
278 } else { 303 } else {
279 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1; 304 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
280 305
281 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; 306 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
282 } 307 }
@@ -300,7 +325,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
300 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; 325 adev->wb.wb[ring->wptr_offs] = ring->wptr << 2;
301 WDOORBELL32(ring->doorbell_index, ring->wptr << 2); 326 WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
302 } else { 327 } else {
303 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1; 328 int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
304 329
305 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 330 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
306 } 331 }
@@ -308,7 +333,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
308 333
309static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 334static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
310{ 335{
311 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); 336 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
312 int i; 337 int i;
313 338
314 for (i = 0; i < count; i++) 339 for (i = 0; i < count; i++)
@@ -369,7 +394,7 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
369{ 394{
370 u32 ref_and_mask = 0; 395 u32 ref_and_mask = 0;
371 396
372 if (ring == &ring->adev->sdma[0].ring) 397 if (ring == &ring->adev->sdma.instance[0].ring)
373 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1); 398 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
374 else 399 else
375 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); 400 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -454,8 +479,8 @@ static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring,
454 */ 479 */
455static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) 480static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
456{ 481{
457 struct amdgpu_ring *sdma0 = &adev->sdma[0].ring; 482 struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
458 struct amdgpu_ring *sdma1 = &adev->sdma[1].ring; 483 struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
459 u32 rb_cntl, ib_cntl; 484 u32 rb_cntl, ib_cntl;
460 int i; 485 int i;
461 486
@@ -463,7 +488,7 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
463 (adev->mman.buffer_funcs_ring == sdma1)) 488 (adev->mman.buffer_funcs_ring == sdma1))
464 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); 489 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
465 490
466 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 491 for (i = 0; i < adev->sdma.num_instances; i++) {
467 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); 492 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
468 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); 493 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
469 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); 494 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
@@ -500,7 +525,7 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
500 u32 f32_cntl; 525 u32 f32_cntl;
501 int i; 526 int i;
502 527
503 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 528 for (i = 0; i < adev->sdma.num_instances; i++) {
504 f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); 529 f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
505 if (enable) 530 if (enable)
506 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, 531 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
@@ -530,7 +555,7 @@ static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
530 sdma_v3_0_rlc_stop(adev); 555 sdma_v3_0_rlc_stop(adev);
531 } 556 }
532 557
533 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 558 for (i = 0; i < adev->sdma.num_instances; i++) {
534 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]); 559 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
535 if (enable) 560 if (enable)
536 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0); 561 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
@@ -557,8 +582,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
557 u32 doorbell; 582 u32 doorbell;
558 int i, j, r; 583 int i, j, r;
559 584
560 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 585 for (i = 0; i < adev->sdma.num_instances; i++) {
561 ring = &adev->sdma[i].ring; 586 ring = &adev->sdma.instance[i].ring;
562 wb_offset = (ring->rptr_offs * 4); 587 wb_offset = (ring->rptr_offs * 4);
563 588
564 mutex_lock(&adev->srbm_mutex); 589 mutex_lock(&adev->srbm_mutex);
@@ -669,23 +694,22 @@ static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
669 u32 fw_size; 694 u32 fw_size;
670 int i, j; 695 int i, j;
671 696
672 if (!adev->sdma[0].fw || !adev->sdma[1].fw)
673 return -EINVAL;
674
675 /* halt the MEs */ 697 /* halt the MEs */
676 sdma_v3_0_enable(adev, false); 698 sdma_v3_0_enable(adev, false);
677 699
678 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 700 for (i = 0; i < adev->sdma.num_instances; i++) {
679 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 701 if (!adev->sdma.instance[i].fw)
702 return -EINVAL;
703 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
680 amdgpu_ucode_print_sdma_hdr(&hdr->header); 704 amdgpu_ucode_print_sdma_hdr(&hdr->header);
681 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 705 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
682 fw_data = (const __le32 *) 706 fw_data = (const __le32 *)
683 (adev->sdma[i].fw->data + 707 (adev->sdma.instance[i].fw->data +
684 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 708 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
685 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 709 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
686 for (j = 0; j < fw_size; j++) 710 for (j = 0; j < fw_size; j++)
687 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); 711 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
688 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma[i].fw_version); 712 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
689 } 713 }
690 714
691 return 0; 715 return 0;
@@ -701,21 +725,21 @@ static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
701 */ 725 */
702static int sdma_v3_0_start(struct amdgpu_device *adev) 726static int sdma_v3_0_start(struct amdgpu_device *adev)
703{ 727{
704 int r; 728 int r, i;
705 729
706 if (!adev->firmware.smu_load) { 730 if (!adev->firmware.smu_load) {
707 r = sdma_v3_0_load_microcode(adev); 731 r = sdma_v3_0_load_microcode(adev);
708 if (r) 732 if (r)
709 return r; 733 return r;
710 } else { 734 } else {
711 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 735 for (i = 0; i < adev->sdma.num_instances; i++) {
712 AMDGPU_UCODE_ID_SDMA0); 736 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
713 if (r) 737 (i == 0) ?
714 return -EINVAL; 738 AMDGPU_UCODE_ID_SDMA0 :
715 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 739 AMDGPU_UCODE_ID_SDMA1);
716 AMDGPU_UCODE_ID_SDMA1); 740 if (r)
717 if (r) 741 return -EINVAL;
718 return -EINVAL; 742 }
719 } 743 }
720 744
721 /* unhalt the MEs */ 745 /* unhalt the MEs */
@@ -1013,7 +1037,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
1013 */ 1037 */
1014static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) 1038static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
1015{ 1039{
1016 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); 1040 struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
1017 u32 pad_count; 1041 u32 pad_count;
1018 int i; 1042 int i;
1019 1043
@@ -1071,6 +1095,15 @@ static int sdma_v3_0_early_init(void *handle)
1071{ 1095{
1072 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1096 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1073 1097
1098 switch (adev->asic_type) {
1099 case CHIP_STONEY:
1100 adev->sdma.num_instances = 1;
1101 break;
1102 default:
1103 adev->sdma.num_instances = SDMA_MAX_INSTANCE;
1104 break;
1105 }
1106
1074 sdma_v3_0_set_ring_funcs(adev); 1107 sdma_v3_0_set_ring_funcs(adev);
1075 sdma_v3_0_set_buffer_funcs(adev); 1108 sdma_v3_0_set_buffer_funcs(adev);
1076 sdma_v3_0_set_vm_pte_funcs(adev); 1109 sdma_v3_0_set_vm_pte_funcs(adev);
@@ -1082,21 +1115,21 @@ static int sdma_v3_0_early_init(void *handle)
1082static int sdma_v3_0_sw_init(void *handle) 1115static int sdma_v3_0_sw_init(void *handle)
1083{ 1116{
1084 struct amdgpu_ring *ring; 1117 struct amdgpu_ring *ring;
1085 int r; 1118 int r, i;
1086 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1119 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1087 1120
1088 /* SDMA trap event */ 1121 /* SDMA trap event */
1089 r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq); 1122 r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq);
1090 if (r) 1123 if (r)
1091 return r; 1124 return r;
1092 1125
1093 /* SDMA Privileged inst */ 1126 /* SDMA Privileged inst */
1094 r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq); 1127 r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq);
1095 if (r) 1128 if (r)
1096 return r; 1129 return r;
1097 1130
1098 /* SDMA Privileged inst */ 1131 /* SDMA Privileged inst */
1099 r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq); 1132 r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq);
1100 if (r) 1133 if (r)
1101 return r; 1134 return r;
1102 1135
@@ -1106,33 +1139,23 @@ static int sdma_v3_0_sw_init(void *handle)
1106 return r; 1139 return r;
1107 } 1140 }
1108 1141
1109 ring = &adev->sdma[0].ring; 1142 for (i = 0; i < adev->sdma.num_instances; i++) {
1110 ring->ring_obj = NULL; 1143 ring = &adev->sdma.instance[i].ring;
1111 ring->use_doorbell = true; 1144 ring->ring_obj = NULL;
1112 ring->doorbell_index = AMDGPU_DOORBELL_sDMA_ENGINE0; 1145 ring->use_doorbell = true;
1113 1146 ring->doorbell_index = (i == 0) ?
1114 ring = &adev->sdma[1].ring; 1147 AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1;
1115 ring->ring_obj = NULL; 1148
1116 ring->use_doorbell = true; 1149 sprintf(ring->name, "sdma%d", i);
1117 ring->doorbell_index = AMDGPU_DOORBELL_sDMA_ENGINE1; 1150 r = amdgpu_ring_init(adev, ring, 256 * 1024,
1118 1151 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1119 ring = &adev->sdma[0].ring; 1152 &adev->sdma.trap_irq,
1120 sprintf(ring->name, "sdma0"); 1153 (i == 0) ?
1121 r = amdgpu_ring_init(adev, ring, 256 * 1024, 1154 AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
1122 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, 1155 AMDGPU_RING_TYPE_SDMA);
1123 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0, 1156 if (r)
1124 AMDGPU_RING_TYPE_SDMA); 1157 return r;
1125 if (r) 1158 }
1126 return r;
1127
1128 ring = &adev->sdma[1].ring;
1129 sprintf(ring->name, "sdma1");
1130 r = amdgpu_ring_init(adev, ring, 256 * 1024,
1131 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1132 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1,
1133 AMDGPU_RING_TYPE_SDMA);
1134 if (r)
1135 return r;
1136 1159
1137 return r; 1160 return r;
1138} 1161}
@@ -1140,9 +1163,10 @@ static int sdma_v3_0_sw_init(void *handle)
1140static int sdma_v3_0_sw_fini(void *handle) 1163static int sdma_v3_0_sw_fini(void *handle)
1141{ 1164{
1142 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1165 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1166 int i;
1143 1167
1144 amdgpu_ring_fini(&adev->sdma[0].ring); 1168 for (i = 0; i < adev->sdma.num_instances; i++)
1145 amdgpu_ring_fini(&adev->sdma[1].ring); 1169 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1146 1170
1147 return 0; 1171 return 0;
1148} 1172}
@@ -1222,7 +1246,7 @@ static void sdma_v3_0_print_status(void *handle)
1222 dev_info(adev->dev, "VI SDMA registers\n"); 1246 dev_info(adev->dev, "VI SDMA registers\n");
1223 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", 1247 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
1224 RREG32(mmSRBM_STATUS2)); 1248 RREG32(mmSRBM_STATUS2));
1225 for (i = 0; i < SDMA_MAX_INSTANCE; i++) { 1249 for (i = 0; i < adev->sdma.num_instances; i++) {
1226 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", 1250 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n",
1227 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); 1251 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
1228 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", 1252 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n",
@@ -1367,7 +1391,7 @@ static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev,
1367 case 0: 1391 case 0:
1368 switch (queue_id) { 1392 switch (queue_id) {
1369 case 0: 1393 case 0:
1370 amdgpu_fence_process(&adev->sdma[0].ring); 1394 amdgpu_fence_process(&adev->sdma.instance[0].ring);
1371 break; 1395 break;
1372 case 1: 1396 case 1:
1373 /* XXX compute */ 1397 /* XXX compute */
@@ -1380,7 +1404,7 @@ static int sdma_v3_0_process_trap_irq(struct amdgpu_device *adev,
1380 case 1: 1404 case 1:
1381 switch (queue_id) { 1405 switch (queue_id) {
1382 case 0: 1406 case 0:
1383 amdgpu_fence_process(&adev->sdma[1].ring); 1407 amdgpu_fence_process(&adev->sdma.instance[1].ring);
1384 break; 1408 break;
1385 case 1: 1409 case 1:
1386 /* XXX compute */ 1410 /* XXX compute */
@@ -1432,24 +1456,6 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
1432 .set_powergating_state = sdma_v3_0_set_powergating_state, 1456 .set_powergating_state = sdma_v3_0_set_powergating_state,
1433}; 1457};
1434 1458
1435/**
1436 * sdma_v3_0_ring_is_lockup - Check if the DMA engine is locked up
1437 *
1438 * @ring: amdgpu_ring structure holding ring information
1439 *
1440 * Check if the async DMA engine is locked up (VI).
1441 * Returns true if the engine appears to be locked up, false if not.
1442 */
1443static bool sdma_v3_0_ring_is_lockup(struct amdgpu_ring *ring)
1444{
1445
1446 if (sdma_v3_0_is_idle(ring->adev)) {
1447 amdgpu_ring_lockup_update(ring);
1448 return false;
1449 }
1450 return amdgpu_ring_test_lockup(ring);
1451}
1452
1453static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { 1459static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1454 .get_rptr = sdma_v3_0_ring_get_rptr, 1460 .get_rptr = sdma_v3_0_ring_get_rptr,
1455 .get_wptr = sdma_v3_0_ring_get_wptr, 1461 .get_wptr = sdma_v3_0_ring_get_wptr,
@@ -1462,14 +1468,15 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1462 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, 1468 .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
1463 .test_ring = sdma_v3_0_ring_test_ring, 1469 .test_ring = sdma_v3_0_ring_test_ring,
1464 .test_ib = sdma_v3_0_ring_test_ib, 1470 .test_ib = sdma_v3_0_ring_test_ib,
1465 .is_lockup = sdma_v3_0_ring_is_lockup,
1466 .insert_nop = sdma_v3_0_ring_insert_nop, 1471 .insert_nop = sdma_v3_0_ring_insert_nop,
1467}; 1472};
1468 1473
1469static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1474static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
1470{ 1475{
1471 adev->sdma[0].ring.funcs = &sdma_v3_0_ring_funcs; 1476 int i;
1472 adev->sdma[1].ring.funcs = &sdma_v3_0_ring_funcs; 1477
1478 for (i = 0; i < adev->sdma.num_instances; i++)
1479 adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs;
1473} 1480}
1474 1481
1475static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { 1482static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = {
@@ -1483,9 +1490,9 @@ static const struct amdgpu_irq_src_funcs sdma_v3_0_illegal_inst_irq_funcs = {
1483 1490
1484static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) 1491static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
1485{ 1492{
1486 adev->sdma_trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; 1493 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1487 adev->sdma_trap_irq.funcs = &sdma_v3_0_trap_irq_funcs; 1494 adev->sdma.trap_irq.funcs = &sdma_v3_0_trap_irq_funcs;
1488 adev->sdma_illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs; 1495 adev->sdma.illegal_inst_irq.funcs = &sdma_v3_0_illegal_inst_irq_funcs;
1489} 1496}
1490 1497
1491/** 1498/**
@@ -1551,7 +1558,7 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
1551{ 1558{
1552 if (adev->mman.buffer_funcs == NULL) { 1559 if (adev->mman.buffer_funcs == NULL) {
1553 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; 1560 adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
1554 adev->mman.buffer_funcs_ring = &adev->sdma[0].ring; 1561 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1555 } 1562 }
1556} 1563}
1557 1564
@@ -1566,7 +1573,7 @@ static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
1566{ 1573{
1567 if (adev->vm_manager.vm_pte_funcs == NULL) { 1574 if (adev->vm_manager.vm_pte_funcs == NULL) {
1568 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; 1575 adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
1569 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; 1576 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
1570 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; 1577 adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
1571 } 1578 }
1572} 1579}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index ed50dd725788..5e9f73af83a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -885,7 +885,6 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
885 .emit_semaphore = uvd_v4_2_ring_emit_semaphore, 885 .emit_semaphore = uvd_v4_2_ring_emit_semaphore,
886 .test_ring = uvd_v4_2_ring_test_ring, 886 .test_ring = uvd_v4_2_ring_test_ring,
887 .test_ib = uvd_v4_2_ring_test_ib, 887 .test_ib = uvd_v4_2_ring_test_ib,
888 .is_lockup = amdgpu_ring_test_lockup,
889 .insert_nop = amdgpu_ring_insert_nop, 888 .insert_nop = amdgpu_ring_insert_nop,
890}; 889};
891 890
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 9ad8b9906c0b..38864f562981 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -824,7 +824,6 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
824 .emit_semaphore = uvd_v5_0_ring_emit_semaphore, 824 .emit_semaphore = uvd_v5_0_ring_emit_semaphore,
825 .test_ring = uvd_v5_0_ring_test_ring, 825 .test_ring = uvd_v5_0_ring_test_ring,
826 .test_ib = uvd_v5_0_ring_test_ib, 826 .test_ib = uvd_v5_0_ring_test_ib,
827 .is_lockup = amdgpu_ring_test_lockup,
828 .insert_nop = amdgpu_ring_insert_nop, 827 .insert_nop = amdgpu_ring_insert_nop,
829}; 828};
830 829
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 7e9934fa4193..121915bbc3b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -808,7 +808,6 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
808 .emit_semaphore = uvd_v6_0_ring_emit_semaphore, 808 .emit_semaphore = uvd_v6_0_ring_emit_semaphore,
809 .test_ring = uvd_v6_0_ring_test_ring, 809 .test_ring = uvd_v6_0_ring_test_ring,
810 .test_ib = uvd_v6_0_ring_test_ib, 810 .test_ib = uvd_v6_0_ring_test_ib,
811 .is_lockup = amdgpu_ring_test_lockup,
812 .insert_nop = amdgpu_ring_insert_nop, 811 .insert_nop = amdgpu_ring_insert_nop,
813}; 812};
814 813
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index cd16df543f64..52ac7a8f1e58 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -642,7 +642,6 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
642 .emit_semaphore = amdgpu_vce_ring_emit_semaphore, 642 .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
643 .test_ring = amdgpu_vce_ring_test_ring, 643 .test_ring = amdgpu_vce_ring_test_ring,
644 .test_ib = amdgpu_vce_ring_test_ib, 644 .test_ib = amdgpu_vce_ring_test_ib,
645 .is_lockup = amdgpu_ring_test_lockup,
646 .insert_nop = amdgpu_ring_insert_nop, 645 .insert_nop = amdgpu_ring_insert_nop,
647}; 646};
648 647
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index f0656dfb53f3..370c6c9d81c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -40,6 +40,9 @@
40 40
41#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 41#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
42#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 42#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
43#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
44#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
45#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
43 46
44#define VCE_V3_0_FW_SIZE (384 * 1024) 47#define VCE_V3_0_FW_SIZE (384 * 1024)
45#define VCE_V3_0_STACK_SIZE (64 * 1024) 48#define VCE_V3_0_STACK_SIZE (64 * 1024)
@@ -130,9 +133,11 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
130 133
131 /* set BUSY flag */ 134 /* set BUSY flag */
132 WREG32_P(mmVCE_STATUS, 1, ~1); 135 WREG32_P(mmVCE_STATUS, 1, ~1);
133 136 if (adev->asic_type >= CHIP_STONEY)
134 WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, 137 WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
135 ~VCE_VCPU_CNTL__CLK_EN_MASK); 138 else
139 WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
140 ~VCE_VCPU_CNTL__CLK_EN_MASK);
136 141
137 WREG32_P(mmVCE_SOFT_RESET, 142 WREG32_P(mmVCE_SOFT_RESET,
138 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 143 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
@@ -205,8 +210,9 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
205 u32 tmp; 210 u32 tmp;
206 unsigned ret; 211 unsigned ret;
207 212
208 /* Fiji is single pipe */ 213 /* Fiji, Stoney are single pipe */
209 if (adev->asic_type == CHIP_FIJI) { 214 if ((adev->asic_type == CHIP_FIJI) ||
215 (adev->asic_type == CHIP_STONEY)){
210 ret = AMDGPU_VCE_HARVEST_VCE1; 216 ret = AMDGPU_VCE_HARVEST_VCE1;
211 return ret; 217 return ret;
212 } 218 }
@@ -390,8 +396,12 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
390 WREG32(mmVCE_LMI_SWAP_CNTL, 0); 396 WREG32(mmVCE_LMI_SWAP_CNTL, 0);
391 WREG32(mmVCE_LMI_SWAP_CNTL1, 0); 397 WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
392 WREG32(mmVCE_LMI_VM_CTRL, 0); 398 WREG32(mmVCE_LMI_VM_CTRL, 0);
393 399 if (adev->asic_type >= CHIP_STONEY) {
394 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); 400 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
401 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
402 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
403 } else
404 WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
395 offset = AMDGPU_VCE_FIRMWARE_OFFSET; 405 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
396 size = VCE_V3_0_FW_SIZE; 406 size = VCE_V3_0_FW_SIZE;
397 WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); 407 WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
@@ -575,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
575 struct amdgpu_iv_entry *entry) 585 struct amdgpu_iv_entry *entry)
576{ 586{
577 DRM_DEBUG("IH: VCE\n"); 587 DRM_DEBUG("IH: VCE\n");
588
589 WREG32_P(mmVCE_SYS_INT_STATUS,
590 VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
591 ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
592
578 switch (entry->src_data) { 593 switch (entry->src_data) {
579 case 0: 594 case 0:
580 amdgpu_fence_process(&adev->vce.ring[0]); 595 amdgpu_fence_process(&adev->vce.ring[0]);
@@ -643,7 +658,6 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
643 .emit_semaphore = amdgpu_vce_ring_emit_semaphore, 658 .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
644 .test_ring = amdgpu_vce_ring_test_ring, 659 .test_ring = amdgpu_vce_ring_test_ring,
645 .test_ib = amdgpu_vce_ring_test_ib, 660 .test_ib = amdgpu_vce_ring_test_ib,
646 .is_lockup = amdgpu_ring_test_lockup,
647 .insert_nop = amdgpu_ring_insert_nop, 661 .insert_nop = amdgpu_ring_insert_nop,
648}; 662};
649 663
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index b55ceb14fdcd..2adc1c855e85 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -232,6 +232,13 @@ static const u32 cz_mgcg_cgcg_init[] =
232 mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104, 232 mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
233}; 233};
234 234
235static const u32 stoney_mgcg_cgcg_init[] =
236{
237 mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00000100,
238 mmHDP_XDP_CGTT_BLK_CTRL, 0xffffffff, 0x00000104,
239 mmHDP_HOST_PATH_CNTL, 0xffffffff, 0x0f000027,
240};
241
235static void vi_init_golden_registers(struct amdgpu_device *adev) 242static void vi_init_golden_registers(struct amdgpu_device *adev)
236{ 243{
237 /* Some of the registers might be dependent on GRBM_GFX_INDEX */ 244 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
@@ -258,6 +265,11 @@ static void vi_init_golden_registers(struct amdgpu_device *adev)
258 cz_mgcg_cgcg_init, 265 cz_mgcg_cgcg_init,
259 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 266 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
260 break; 267 break;
268 case CHIP_STONEY:
269 amdgpu_program_register_sequence(adev,
270 stoney_mgcg_cgcg_init,
271 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
272 break;
261 default: 273 default:
262 break; 274 break;
263 } 275 }
@@ -488,6 +500,7 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
488 case CHIP_FIJI: 500 case CHIP_FIJI:
489 case CHIP_TONGA: 501 case CHIP_TONGA:
490 case CHIP_CARRIZO: 502 case CHIP_CARRIZO:
503 case CHIP_STONEY:
491 asic_register_table = cz_allowed_read_registers; 504 asic_register_table = cz_allowed_read_registers;
492 size = ARRAY_SIZE(cz_allowed_read_registers); 505 size = ARRAY_SIZE(cz_allowed_read_registers);
493 break; 506 break;
@@ -543,8 +556,10 @@ static void vi_print_gpu_status_regs(struct amdgpu_device *adev)
543 RREG32(mmSRBM_STATUS2)); 556 RREG32(mmSRBM_STATUS2));
544 dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", 557 dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
545 RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); 558 RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
546 dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", 559 if (adev->sdma.num_instances > 1) {
547 RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); 560 dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
561 RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
562 }
548 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); 563 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
549 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", 564 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
550 RREG32(mmCP_STALLED_STAT1)); 565 RREG32(mmCP_STALLED_STAT1));
@@ -639,9 +654,11 @@ u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev)
639 reset_mask |= AMDGPU_RESET_DMA; 654 reset_mask |= AMDGPU_RESET_DMA;
640 655
641 /* SDMA1_STATUS_REG */ 656 /* SDMA1_STATUS_REG */
642 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); 657 if (adev->sdma.num_instances > 1) {
643 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) 658 tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
644 reset_mask |= AMDGPU_RESET_DMA1; 659 if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
660 reset_mask |= AMDGPU_RESET_DMA1;
661 }
645#if 0 662#if 0
646 /* VCE_STATUS */ 663 /* VCE_STATUS */
647 if (adev->asic_type != CHIP_TOPAZ) { 664 if (adev->asic_type != CHIP_TOPAZ) {
@@ -1005,6 +1022,9 @@ static void vi_pcie_gen3_enable(struct amdgpu_device *adev)
1005 u32 mask; 1022 u32 mask;
1006 int ret; 1023 int ret;
1007 1024
1025 if (pci_is_root_bus(adev->pdev->bus))
1026 return;
1027
1008 if (amdgpu_pcie_gen2 == 0) 1028 if (amdgpu_pcie_gen2 == 0)
1009 return; 1029 return;
1010 1030
@@ -1316,6 +1336,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1316 adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks); 1336 adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks);
1317 break; 1337 break;
1318 case CHIP_CARRIZO: 1338 case CHIP_CARRIZO:
1339 case CHIP_STONEY:
1319 adev->ip_blocks = cz_ip_blocks; 1340 adev->ip_blocks = cz_ip_blocks;
1320 adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks); 1341 adev->num_ip_blocks = ARRAY_SIZE(cz_ip_blocks);
1321 break; 1342 break;
@@ -1327,11 +1348,18 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
1327 return 0; 1348 return 0;
1328} 1349}
1329 1350
1351#define ATI_REV_ID_FUSE_MACRO__ADDRESS 0xC0014044
1352#define ATI_REV_ID_FUSE_MACRO__SHIFT 9
1353#define ATI_REV_ID_FUSE_MACRO__MASK 0x00001E00
1354
1330static uint32_t vi_get_rev_id(struct amdgpu_device *adev) 1355static uint32_t vi_get_rev_id(struct amdgpu_device *adev)
1331{ 1356{
1332 if (adev->asic_type == CHIP_TOPAZ) 1357 if (adev->asic_type == CHIP_TOPAZ)
1333 return (RREG32(mmPCIE_EFUSE4) & PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID_MASK) 1358 return (RREG32(mmPCIE_EFUSE4) & PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID_MASK)
1334 >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT; 1359 >> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT;
1360 else if (adev->flags & AMD_IS_APU)
1361 return (RREG32_SMC(ATI_REV_ID_FUSE_MACRO__ADDRESS) & ATI_REV_ID_FUSE_MACRO__MASK)
1362 >> ATI_REV_ID_FUSE_MACRO__SHIFT;
1335 else 1363 else
1336 return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) 1364 return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
1337 >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; 1365 >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
@@ -1385,32 +1413,35 @@ static int vi_common_early_init(void *handle)
1385 adev->cg_flags = 0; 1413 adev->cg_flags = 0;
1386 adev->pg_flags = 0; 1414 adev->pg_flags = 0;
1387 adev->external_rev_id = 0x1; 1415 adev->external_rev_id = 0x1;
1388 if (amdgpu_smc_load_fw && smc_enabled)
1389 adev->firmware.smu_load = true;
1390 break; 1416 break;
1391 case CHIP_FIJI: 1417 case CHIP_FIJI:
1418 adev->has_uvd = true;
1419 adev->cg_flags = 0;
1420 adev->pg_flags = 0;
1421 adev->external_rev_id = adev->rev_id + 0x3c;
1422 break;
1392 case CHIP_TONGA: 1423 case CHIP_TONGA:
1393 adev->has_uvd = true; 1424 adev->has_uvd = true;
1394 adev->cg_flags = 0; 1425 adev->cg_flags = 0;
1395 adev->pg_flags = 0; 1426 adev->pg_flags = 0;
1396 adev->external_rev_id = adev->rev_id + 0x14; 1427 adev->external_rev_id = adev->rev_id + 0x14;
1397 if (amdgpu_smc_load_fw && smc_enabled)
1398 adev->firmware.smu_load = true;
1399 break; 1428 break;
1400 case CHIP_CARRIZO: 1429 case CHIP_CARRIZO:
1430 case CHIP_STONEY:
1401 adev->has_uvd = true; 1431 adev->has_uvd = true;
1402 adev->cg_flags = 0; 1432 adev->cg_flags = 0;
1403 /* Disable UVD pg */ 1433 /* Disable UVD pg */
1404 adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE; 1434 adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
1405 adev->external_rev_id = adev->rev_id + 0x1; 1435 adev->external_rev_id = adev->rev_id + 0x1;
1406 if (amdgpu_smc_load_fw && smc_enabled)
1407 adev->firmware.smu_load = true;
1408 break; 1436 break;
1409 default: 1437 default:
1410 /* FIXME: not supported yet */ 1438 /* FIXME: not supported yet */
1411 return -EINVAL; 1439 return -EINVAL;
1412 } 1440 }
1413 1441
1442 if (amdgpu_smc_load_fw && smc_enabled)
1443 adev->firmware.smu_load = true;
1444
1414 return 0; 1445 return 0;
1415} 1446}
1416 1447