aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-09-03 23:06:29 -0400
committerDave Airlie <airlied@redhat.com>2015-09-03 23:06:29 -0400
commit99495589aa4de7166af254bc497cdbe133fc24bb (patch)
treed525e957854064f2492976e9beb8a04dddc28143
parent879a37d00f1882b1e56a66e626af4194d592d257 (diff)
parentbddf8026386927985ef6d0d11c3ba78f70b76bad (diff)
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
More fixes for radeon and amdgpu for 4.3: - Send full DP aux address fixes for radeon and amdgpu - Fix an HDMI display regression for pre-DCE5 parts - UVD suspend fixes for amdgpu - Add an rs480 suspend quirk - Fix bo reserve handling in amdgpu GEM_OP ioctl - GPU scheduler fixes - SDMA optimizations - MEC fix for Fiji * 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (21 commits) drm/amdgpu: set MEC doorbell range for Fiji drm/amdgpu: implement burst NOP for SDMA drm/amdgpu: add insert_nop ring func and default implementation drm/amdgpu: add amdgpu_get_sdma_instance helper function drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES drm/amdgpu: add burst_nop flag for sdma drm/amdgpu: add count field for the SDMA NOP packet v2 drm/amdgpu: use PT for VM sync on unmap drm/amdgpu: make wait_event uninterruptible in push_job drm/amdgpu: fix amdgpu_bo_unreserve order in GEM_OP IOCTL v2 drm/amdgpu: partially revert "modify amdgpu_fence_wait_any() to amdgpu_fence_wait_multiple()" v2 Add radeon suspend/resume quirk for HP Compaq dc5750. drm/amdgpu: re-work sync_resv drm/amdgpu/atom: Send out the full AUX address drm/radeon/native: Send out the full AUX address drm/radeon/atom: Send out the full AUX address drm/amdgpu: use IB for fill_buffer instead of direct command drm/amdgpu: stop trying to suspend UVD sessions v2 drm/amdgpu: add scheduler dependency callback v2 drm/amdgpu: let the scheduler work more with jobs v2 ...
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c1
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c90
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h3
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_auxch.c4
29 files changed, 360 insertions, 164 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index aa2dcf578dd6..668939a14206 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -98,6 +98,9 @@ extern int amdgpu_sched_hw_submission;
98#define AMDGPU_MAX_COMPUTE_RINGS 8 98#define AMDGPU_MAX_COMPUTE_RINGS 8
99#define AMDGPU_MAX_VCE_RINGS 2 99#define AMDGPU_MAX_VCE_RINGS 2
100 100
101/* max number of IP instances */
102#define AMDGPU_MAX_SDMA_INSTANCES 2
103
101/* number of hw syncs before falling back on blocking */ 104/* number of hw syncs before falling back on blocking */
102#define AMDGPU_NUM_SYNCS 4 105#define AMDGPU_NUM_SYNCS 4
103 106
@@ -262,7 +265,7 @@ struct amdgpu_buffer_funcs {
262 unsigned fill_num_dw; 265 unsigned fill_num_dw;
263 266
264 /* used for buffer clearing */ 267 /* used for buffer clearing */
265 void (*emit_fill_buffer)(struct amdgpu_ring *ring, 268 void (*emit_fill_buffer)(struct amdgpu_ib *ib,
266 /* value to write to memory */ 269 /* value to write to memory */
267 uint32_t src_data, 270 uint32_t src_data,
268 /* dst addr in bytes */ 271 /* dst addr in bytes */
@@ -340,6 +343,8 @@ struct amdgpu_ring_funcs {
340 int (*test_ring)(struct amdgpu_ring *ring); 343 int (*test_ring)(struct amdgpu_ring *ring);
341 int (*test_ib)(struct amdgpu_ring *ring); 344 int (*test_ib)(struct amdgpu_ring *ring);
342 bool (*is_lockup)(struct amdgpu_ring *ring); 345 bool (*is_lockup)(struct amdgpu_ring *ring);
346 /* insert NOP packets */
347 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
343}; 348};
344 349
345/* 350/*
@@ -440,12 +445,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
440int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 445int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
441unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 446unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
442 447
443signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, 448signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
444 struct fence **array, 449 struct fence **array,
445 uint32_t count, 450 uint32_t count,
446 bool wait_all, 451 bool intr,
447 bool intr, 452 signed long t);
448 signed long t);
449struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); 453struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
450void amdgpu_fence_unref(struct amdgpu_fence **fence); 454void amdgpu_fence_unref(struct amdgpu_fence **fence);
451 455
@@ -717,6 +721,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
717 void *owner); 721 void *owner);
718int amdgpu_sync_rings(struct amdgpu_sync *sync, 722int amdgpu_sync_rings(struct amdgpu_sync *sync,
719 struct amdgpu_ring *ring); 723 struct amdgpu_ring *ring);
724struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
720int amdgpu_sync_wait(struct amdgpu_sync *sync); 725int amdgpu_sync_wait(struct amdgpu_sync *sync);
721void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, 726void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
722 struct fence *fence); 727 struct fence *fence);
@@ -1214,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
1214void amdgpu_ring_free_size(struct amdgpu_ring *ring); 1219void amdgpu_ring_free_size(struct amdgpu_ring *ring);
1215int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); 1220int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
1216int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); 1221int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
1222void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
1217void amdgpu_ring_commit(struct amdgpu_ring *ring); 1223void amdgpu_ring_commit(struct amdgpu_ring *ring);
1218void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); 1224void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
1219void amdgpu_ring_undo(struct amdgpu_ring *ring); 1225void amdgpu_ring_undo(struct amdgpu_ring *ring);
@@ -1665,7 +1671,6 @@ struct amdgpu_uvd {
1665 struct amdgpu_bo *vcpu_bo; 1671 struct amdgpu_bo *vcpu_bo;
1666 void *cpu_addr; 1672 void *cpu_addr;
1667 uint64_t gpu_addr; 1673 uint64_t gpu_addr;
1668 void *saved_bo;
1669 atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; 1674 atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
1670 struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; 1675 struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
1671 struct delayed_work idle_work; 1676 struct delayed_work idle_work;
@@ -1709,6 +1714,7 @@ struct amdgpu_sdma {
1709 uint32_t feature_version; 1714 uint32_t feature_version;
1710 1715
1711 struct amdgpu_ring ring; 1716 struct amdgpu_ring ring;
1717 bool burst_nop;
1712}; 1718};
1713 1719
1714/* 1720/*
@@ -2057,7 +2063,7 @@ struct amdgpu_device {
2057 struct amdgpu_gfx gfx; 2063 struct amdgpu_gfx gfx;
2058 2064
2059 /* sdma */ 2065 /* sdma */
2060 struct amdgpu_sdma sdma[2]; 2066 struct amdgpu_sdma sdma[AMDGPU_MAX_SDMA_INSTANCES];
2061 struct amdgpu_irq_src sdma_trap_irq; 2067 struct amdgpu_irq_src sdma_trap_irq;
2062 struct amdgpu_irq_src sdma_illegal_inst_irq; 2068 struct amdgpu_irq_src sdma_illegal_inst_irq;
2063 2069
@@ -2196,6 +2202,21 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
2196 ring->ring_free_dw--; 2202 ring->ring_free_dw--;
2197} 2203}
2198 2204
2205static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2206{
2207 struct amdgpu_device *adev = ring->adev;
2208 int i;
2209
2210 for (i = 0; i < AMDGPU_MAX_SDMA_INSTANCES; i++)
2211 if (&adev->sdma[i].ring == ring)
2212 break;
2213
2214 if (i < AMDGPU_MAX_SDMA_INSTANCES)
2215 return &adev->sdma[i];
2216 else
2217 return NULL;
2218}
2219
2199/* 2220/*
2200 * ASICs macro. 2221 * ASICs macro.
2201 */ 2222 */
@@ -2248,7 +2269,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
2248#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) 2269#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
2249#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) 2270#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
2250#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) 2271#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
2251#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) 2272#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
2252#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) 2273#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
2253#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) 2274#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
2254#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev)) 2275#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index f446bf2fedc9..1be2bd6d07ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -851,22 +851,6 @@ static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
851 return false; 851 return false;
852} 852}
853 853
854static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count)
855{
856 int idx;
857 struct fence *fence;
858
859 for (idx = 0; idx < count; ++idx) {
860 fence = fences[idx];
861 if (fence) {
862 if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
863 return false;
864 }
865 }
866
867 return true;
868}
869
870struct amdgpu_wait_cb { 854struct amdgpu_wait_cb {
871 struct fence_cb base; 855 struct fence_cb base;
872 struct task_struct *task; 856 struct task_struct *task;
@@ -885,7 +869,7 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
885 struct amdgpu_fence *fence = to_amdgpu_fence(f); 869 struct amdgpu_fence *fence = to_amdgpu_fence(f);
886 struct amdgpu_device *adev = fence->ring->adev; 870 struct amdgpu_device *adev = fence->ring->adev;
887 871
888 return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); 872 return amdgpu_fence_wait_any(adev, &f, 1, intr, t);
889} 873}
890 874
891/** 875/**
@@ -894,23 +878,18 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
894 * @adev: amdgpu device 878 * @adev: amdgpu device
895 * @array: the fence array with amdgpu fence pointer 879 * @array: the fence array with amdgpu fence pointer
896 * @count: the number of the fence array 880 * @count: the number of the fence array
897 * @wait_all: the flag of wait all(true) or wait any(false)
898 * @intr: when sleep, set the current task interruptable or not 881 * @intr: when sleep, set the current task interruptable or not
899 * @t: timeout to wait 882 * @t: timeout to wait
900 * 883 *
901 * If wait_all is true, it will return when all fences are signaled or timeout. 884 * It will return when any fence is signaled or timeout.
902 * If wait_all is false, it will return when any fence is signaled or timeout.
903 */ 885 */
904signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, 886signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
905 struct fence **array, 887 struct fence **array, uint32_t count,
906 uint32_t count, 888 bool intr, signed long t)
907 bool wait_all, 889{
908 bool intr,
909 signed long t)
910{
911 long idx = 0;
912 struct amdgpu_wait_cb *cb; 890 struct amdgpu_wait_cb *cb;
913 struct fence *fence; 891 struct fence *fence;
892 unsigned idx;
914 893
915 BUG_ON(!array); 894 BUG_ON(!array);
916 895
@@ -927,10 +906,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
927 if (fence_add_callback(fence, 906 if (fence_add_callback(fence,
928 &cb[idx].base, amdgpu_fence_wait_cb)) { 907 &cb[idx].base, amdgpu_fence_wait_cb)) {
929 /* The fence is already signaled */ 908 /* The fence is already signaled */
930 if (wait_all) 909 goto fence_rm_cb;
931 continue;
932 else
933 goto fence_rm_cb;
934 } 910 }
935 } 911 }
936 } 912 }
@@ -945,9 +921,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
945 * amdgpu_test_signaled_any must be called after 921 * amdgpu_test_signaled_any must be called after
946 * set_current_state to prevent a race with wake_up_process 922 * set_current_state to prevent a race with wake_up_process
947 */ 923 */
948 if (!wait_all && amdgpu_test_signaled_any(array, count)) 924 if (amdgpu_test_signaled_any(array, count))
949 break;
950 if (wait_all && amdgpu_test_signaled_all(array, count))
951 break; 925 break;
952 926
953 if (adev->needs_reset) { 927 if (adev->needs_reset) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 4afc507820c0..4b36e779622f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -615,6 +615,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
615 info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; 615 info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
616 info.domains = robj->initial_domain; 616 info.domains = robj->initial_domain;
617 info.domain_flags = robj->flags; 617 info.domain_flags = robj->flags;
618 amdgpu_bo_unreserve(robj);
618 if (copy_to_user(out, &info, sizeof(info))) 619 if (copy_to_user(out, &info, sizeof(info)))
619 r = -EFAULT; 620 r = -EFAULT;
620 break; 621 break;
@@ -622,17 +623,19 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
622 case AMDGPU_GEM_OP_SET_PLACEMENT: 623 case AMDGPU_GEM_OP_SET_PLACEMENT:
623 if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) { 624 if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
624 r = -EPERM; 625 r = -EPERM;
626 amdgpu_bo_unreserve(robj);
625 break; 627 break;
626 } 628 }
627 robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM | 629 robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
628 AMDGPU_GEM_DOMAIN_GTT | 630 AMDGPU_GEM_DOMAIN_GTT |
629 AMDGPU_GEM_DOMAIN_CPU); 631 AMDGPU_GEM_DOMAIN_CPU);
632 amdgpu_bo_unreserve(robj);
630 break; 633 break;
631 default: 634 default:
635 amdgpu_bo_unreserve(robj);
632 r = -EINVAL; 636 r = -EINVAL;
633 } 637 }
634 638
635 amdgpu_bo_unreserve(robj);
636out: 639out:
637 drm_gem_object_unreference_unlocked(gobj); 640 drm_gem_object_unreference_unlocked(gobj);
638 return r; 641 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 7d442c51063e..9bec91484c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
131 return 0; 131 return 0;
132} 132}
133 133
134/** amdgpu_ring_insert_nop - insert NOP packets
135 *
136 * @ring: amdgpu_ring structure holding ring information
137 * @count: the number of NOP packets to insert
138 *
139 * This is the generic insert_nop function for rings except SDMA
140 */
141void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
142{
143 int i;
144
145 for (i = 0; i < count; i++)
146 amdgpu_ring_write(ring, ring->nop);
147}
148
134/** 149/**
135 * amdgpu_ring_commit - tell the GPU to execute the new 150 * amdgpu_ring_commit - tell the GPU to execute the new
136 * commands on the ring buffer 151 * commands on the ring buffer
@@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
143 */ 158 */
144void amdgpu_ring_commit(struct amdgpu_ring *ring) 159void amdgpu_ring_commit(struct amdgpu_ring *ring)
145{ 160{
161 uint32_t count;
162
146 /* We pad to match fetch size */ 163 /* We pad to match fetch size */
147 while (ring->wptr & ring->align_mask) { 164 count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
148 amdgpu_ring_write(ring, ring->nop); 165 count %= ring->align_mask + 1;
149 } 166 ring->funcs->insert_nop(ring, count);
167
150 mb(); 168 mb();
151 amdgpu_ring_set_wptr(ring); 169 amdgpu_ring_set_wptr(ring);
152} 170}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index b92525329d6c..74dad270362c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -367,8 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
367 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 367 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
368 368
369 spin_unlock(&sa_manager->wq.lock); 369 spin_unlock(&sa_manager->wq.lock);
370 t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false, 370 t = amdgpu_fence_wait_any(adev, fences, AMDGPU_MAX_RINGS,
371 MAX_SCHEDULE_TIMEOUT); 371 false, MAX_SCHEDULE_TIMEOUT);
372 r = (t > 0) ? 0 : t; 372 r = (t > 0) ? 0 : t;
373 spin_lock(&sa_manager->wq.lock); 373 spin_lock(&sa_manager->wq.lock);
374 /* if we have nothing to wait for block */ 374 /* if we have nothing to wait for block */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index f93fb3541488..de98fbd2971e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -27,6 +27,12 @@
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "amdgpu.h" 28#include "amdgpu.h"
29 29
30static struct fence *amdgpu_sched_dependency(struct amd_sched_job *job)
31{
32 struct amdgpu_job *sched_job = (struct amdgpu_job *)job;
33 return amdgpu_sync_get_fence(&sched_job->ibs->sync);
34}
35
30static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) 36static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job)
31{ 37{
32 struct amdgpu_job *sched_job; 38 struct amdgpu_job *sched_job;
@@ -75,6 +81,7 @@ static void amdgpu_sched_process_job(struct amd_sched_job *job)
75} 81}
76 82
77struct amd_sched_backend_ops amdgpu_sched_ops = { 83struct amd_sched_backend_ops amdgpu_sched_ops = {
84 .dependency = amdgpu_sched_dependency,
78 .run_job = amdgpu_sched_run_job, 85 .run_job = amdgpu_sched_run_job,
79 .process_job = amdgpu_sched_process_job 86 .process_job = amdgpu_sched_process_job
80}; 87};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 4fffb2539331..068aeaff7183 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -142,6 +142,18 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
142 return 0; 142 return 0;
143} 143}
144 144
145static void *amdgpu_sync_get_owner(struct fence *f)
146{
147 struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
148 struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
149
150 if (s_fence)
151 return s_fence->owner;
152 else if (a_fence)
153 return a_fence->owner;
154 return AMDGPU_FENCE_OWNER_UNDEFINED;
155}
156
145/** 157/**
146 * amdgpu_sync_resv - use the semaphores to sync to a reservation object 158 * amdgpu_sync_resv - use the semaphores to sync to a reservation object
147 * 159 *
@@ -158,7 +170,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
158{ 170{
159 struct reservation_object_list *flist; 171 struct reservation_object_list *flist;
160 struct fence *f; 172 struct fence *f;
161 struct amdgpu_fence *fence; 173 void *fence_owner;
162 unsigned i; 174 unsigned i;
163 int r = 0; 175 int r = 0;
164 176
@@ -176,22 +188,22 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
176 for (i = 0; i < flist->shared_count; ++i) { 188 for (i = 0; i < flist->shared_count; ++i) {
177 f = rcu_dereference_protected(flist->shared[i], 189 f = rcu_dereference_protected(flist->shared[i],
178 reservation_object_held(resv)); 190 reservation_object_held(resv));
179 fence = f ? to_amdgpu_fence(f) : NULL; 191 if (amdgpu_sync_same_dev(adev, f)) {
180 if (fence && fence->ring->adev == adev) {
181 /* VM updates are only interesting 192 /* VM updates are only interesting
182 * for other VM updates and moves. 193 * for other VM updates and moves.
183 */ 194 */
195 fence_owner = amdgpu_sync_get_owner(f);
184 if ((owner != AMDGPU_FENCE_OWNER_MOVE) && 196 if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
185 (fence->owner != AMDGPU_FENCE_OWNER_MOVE) && 197 (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
186 ((owner == AMDGPU_FENCE_OWNER_VM) != 198 ((owner == AMDGPU_FENCE_OWNER_VM) !=
187 (fence->owner == AMDGPU_FENCE_OWNER_VM))) 199 (fence_owner == AMDGPU_FENCE_OWNER_VM)))
188 continue; 200 continue;
189 201
190 /* Ignore fence from the same owner as 202 /* Ignore fence from the same owner as
191 * long as it isn't undefined. 203 * long as it isn't undefined.
192 */ 204 */
193 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && 205 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
194 fence->owner == owner) 206 fence_owner == owner)
195 continue; 207 continue;
196 } 208 }
197 209
@@ -202,6 +214,28 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
202 return r; 214 return r;
203} 215}
204 216
217struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
218{
219 struct amdgpu_sync_entry *e;
220 struct hlist_node *tmp;
221 struct fence *f;
222 int i;
223
224 hash_for_each_safe(sync->fences, i, tmp, e, node) {
225
226 f = e->fence;
227
228 hash_del(&e->node);
229 kfree(e);
230
231 if (!fence_is_signaled(f))
232 return f;
233
234 fence_put(f);
235 }
236 return NULL;
237}
238
205int amdgpu_sync_wait(struct amdgpu_sync *sync) 239int amdgpu_sync_wait(struct amdgpu_sync *sync)
206{ 240{
207 struct amdgpu_sync_entry *e; 241 struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b87355ccfb1d..3ad4a83c418f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -221,31 +221,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
221 221
222int amdgpu_uvd_suspend(struct amdgpu_device *adev) 222int amdgpu_uvd_suspend(struct amdgpu_device *adev)
223{ 223{
224 unsigned size; 224 struct amdgpu_ring *ring = &adev->uvd.ring;
225 void *ptr; 225 int i, r;
226 const struct common_firmware_header *hdr;
227 int i;
228 226
229 if (adev->uvd.vcpu_bo == NULL) 227 if (adev->uvd.vcpu_bo == NULL)
230 return 0; 228 return 0;
231 229
232 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) 230 for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
233 if (atomic_read(&adev->uvd.handles[i])) 231 uint32_t handle = atomic_read(&adev->uvd.handles[i]);
234 break; 232 if (handle != 0) {
235 233 struct fence *fence;
236 if (i == AMDGPU_MAX_UVD_HANDLES)
237 return 0;
238 234
239 hdr = (const struct common_firmware_header *)adev->uvd.fw->data; 235 amdgpu_uvd_note_usage(adev);
240 236
241 size = amdgpu_bo_size(adev->uvd.vcpu_bo); 237 r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
242 size -= le32_to_cpu(hdr->ucode_size_bytes); 238 if (r) {
239 DRM_ERROR("Error destroying UVD (%d)!\n", r);
240 continue;
241 }
243 242
244 ptr = adev->uvd.cpu_addr; 243 fence_wait(fence, false);
245 ptr += le32_to_cpu(hdr->ucode_size_bytes); 244 fence_put(fence);
246 245
247 adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); 246 adev->uvd.filp[i] = NULL;
248 memcpy(adev->uvd.saved_bo, ptr, size); 247 atomic_set(&adev->uvd.handles[i], 0);
248 }
249 }
249 250
250 return 0; 251 return 0;
251} 252}
@@ -270,12 +271,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
270 ptr = adev->uvd.cpu_addr; 271 ptr = adev->uvd.cpu_addr;
271 ptr += le32_to_cpu(hdr->ucode_size_bytes); 272 ptr += le32_to_cpu(hdr->ucode_size_bytes);
272 273
273 if (adev->uvd.saved_bo != NULL) { 274 memset(ptr, 0, size);
274 memcpy(ptr, adev->uvd.saved_bo, size);
275 kfree(adev->uvd.saved_bo);
276 adev->uvd.saved_bo = NULL;
277 } else
278 memset(ptr, 0, size);
279 275
280 return 0; 276 return 0;
281} 277}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 83b7ce6f5f72..5848564d3d66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -627,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
627{ 627{
628 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 628 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
629 uint64_t last_pte = ~0, last_dst = ~0; 629 uint64_t last_pte = ~0, last_dst = ~0;
630 void *owner = AMDGPU_FENCE_OWNER_VM;
630 unsigned count = 0; 631 unsigned count = 0;
631 uint64_t addr; 632 uint64_t addr;
632 633
634 /* sync to everything on unmapping */
635 if (!(flags & AMDGPU_PTE_VALID))
636 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
637
633 /* walk over the address space and update the page tables */ 638 /* walk over the address space and update the page tables */
634 for (addr = start; addr < end; ) { 639 for (addr = start; addr < end; ) {
635 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 640 uint64_t pt_idx = addr >> amdgpu_vm_block_size;
@@ -638,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
638 uint64_t pte; 643 uint64_t pte;
639 int r; 644 int r;
640 645
641 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, 646 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
642 AMDGPU_FENCE_OWNER_VM);
643 r = reservation_object_reserve_shared(pt->tbo.resv); 647 r = reservation_object_reserve_shared(pt->tbo.resv);
644 if (r) 648 if (r)
645 return r; 649 return r;
@@ -790,17 +794,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
790 794
791 ib->length_dw = 0; 795 ib->length_dw = 0;
792 796
793 if (!(flags & AMDGPU_PTE_VALID)) {
794 unsigned i;
795
796 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
797 struct amdgpu_fence *f = vm->ids[i].last_id_use;
798 r = amdgpu_sync_fence(adev, &ib->sync, &f->base);
799 if (r)
800 return r;
801 }
802 }
803
804 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, 797 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
805 mapping->it.last + 1, addr + mapping->offset, 798 mapping->it.last + 1, addr + mapping->offset,
806 flags, gtt_flags); 799 flags, gtt_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 9ba0a7d5bc8e..92b6acadfc52 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -139,7 +139,8 @@ amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *m
139 139
140 tx_buf[0] = msg->address & 0xff; 140 tx_buf[0] = msg->address & 0xff;
141 tx_buf[1] = msg->address >> 8; 141 tx_buf[1] = msg->address >> 8;
142 tx_buf[2] = msg->request << 4; 142 tx_buf[2] = (msg->request << 4) |
143 ((msg->address >> 16) & 0xf);
143 tx_buf[3] = msg->size ? (msg->size - 1) : 0; 144 tx_buf[3] = msg->size ? (msg->size - 1) : 0;
144 145
145 switch (msg->request & ~DP_AUX_I2C_MOT) { 146 switch (msg->request & ~DP_AUX_I2C_MOT) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 3920c1e346f8..9ea9de457da3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
188 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); 188 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
189} 189}
190 190
191static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
192{
193 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
194 int i;
195
196 for (i = 0; i < count; i++)
197 if (sdma && sdma->burst_nop && (i == 0))
198 amdgpu_ring_write(ring, ring->nop |
199 SDMA_NOP_COUNT(count - 1));
200 else
201 amdgpu_ring_write(ring, ring->nop);
202}
203
191/** 204/**
192 * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine 205 * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
193 * 206 *
@@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
213 amdgpu_ring_write(ring, next_rptr); 226 amdgpu_ring_write(ring, next_rptr);
214 227
215 /* IB packet must end on a 8 DW boundary */ 228 /* IB packet must end on a 8 DW boundary */
216 while ((ring->wptr & 7) != 4) 229 cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
217 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); 230
218 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); 231 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
219 amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ 232 amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
220 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); 233 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
@@ -501,6 +514,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev)
501 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 514 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
502 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 515 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
503 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 516 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
517 if (adev->sdma[i].feature_version >= 20)
518 adev->sdma[i].burst_nop = true;
504 fw_data = (const __le32 *) 519 fw_data = (const __le32 *)
505 (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 520 (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
506 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); 521 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
@@ -815,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
815 */ 830 */
816static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) 831static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
817{ 832{
818 while (ib->length_dw & 0x7) 833 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
819 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); 834 u32 pad_count;
835 int i;
836
837 pad_count = (8 - (ib->length_dw & 0x7)) % 8;
838 for (i = 0; i < pad_count; i++)
839 if (sdma && sdma->burst_nop && (i == 0))
840 ib->ptr[ib->length_dw++] =
841 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
842 SDMA_NOP_COUNT(pad_count - 1);
843 else
844 ib->ptr[ib->length_dw++] =
845 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
820} 846}
821 847
822/** 848/**
@@ -1303,6 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
1303 .test_ring = cik_sdma_ring_test_ring, 1329 .test_ring = cik_sdma_ring_test_ring,
1304 .test_ib = cik_sdma_ring_test_ib, 1330 .test_ib = cik_sdma_ring_test_ib,
1305 .is_lockup = cik_sdma_ring_is_lockup, 1331 .is_lockup = cik_sdma_ring_is_lockup,
1332 .insert_nop = cik_sdma_ring_insert_nop,
1306}; 1333};
1307 1334
1308static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) 1335static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1363,16 +1390,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
1363 * 1390 *
1364 * Fill GPU buffers using the DMA engine (CIK). 1391 * Fill GPU buffers using the DMA engine (CIK).
1365 */ 1392 */
1366static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, 1393static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib,
1367 uint32_t src_data, 1394 uint32_t src_data,
1368 uint64_t dst_offset, 1395 uint64_t dst_offset,
1369 uint32_t byte_count) 1396 uint32_t byte_count)
1370{ 1397{
1371 amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); 1398 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0);
1372 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1399 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1373 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1400 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1374 amdgpu_ring_write(ring, src_data); 1401 ib->ptr[ib->length_dw++] = src_data;
1375 amdgpu_ring_write(ring, byte_count); 1402 ib->ptr[ib->length_dw++] = byte_count;
1376} 1403}
1377 1404
1378static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { 1405static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index a3e3dfaa01a4..7f6d457f250a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -487,6 +487,7 @@
487 (((op) & 0xFF) << 0)) 487 (((op) & 0xFF) << 0))
488/* sDMA opcodes */ 488/* sDMA opcodes */
489#define SDMA_OPCODE_NOP 0 489#define SDMA_OPCODE_NOP 0
490# define SDMA_NOP_COUNT(x) (((x) & 0x3FFF) << 16)
490#define SDMA_OPCODE_COPY 1 491#define SDMA_OPCODE_COPY 1
491# define SDMA_COPY_SUB_OPCODE_LINEAR 0 492# define SDMA_COPY_SUB_OPCODE_LINEAR 0
492# define SDMA_COPY_SUB_OPCODE_TILED 1 493# define SDMA_COPY_SUB_OPCODE_TILED 1
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index fab7b236f37f..517a68f82ec3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5598,6 +5598,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
5598 .test_ring = gfx_v7_0_ring_test_ring, 5598 .test_ring = gfx_v7_0_ring_test_ring,
5599 .test_ib = gfx_v7_0_ring_test_ib, 5599 .test_ib = gfx_v7_0_ring_test_ib,
5600 .is_lockup = gfx_v7_0_ring_is_lockup, 5600 .is_lockup = gfx_v7_0_ring_is_lockup,
5601 .insert_nop = amdgpu_ring_insert_nop,
5601}; 5602};
5602 5603
5603static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 5604static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5614,6 +5615,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5614 .test_ring = gfx_v7_0_ring_test_ring, 5615 .test_ring = gfx_v7_0_ring_test_ring,
5615 .test_ib = gfx_v7_0_ring_test_ib, 5616 .test_ib = gfx_v7_0_ring_test_ib,
5616 .is_lockup = gfx_v7_0_ring_is_lockup, 5617 .is_lockup = gfx_v7_0_ring_is_lockup,
5618 .insert_nop = amdgpu_ring_insert_nop,
5617}; 5619};
5618 5620
5619static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) 5621static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 818edb37fa9c..0af357a1a170 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3240,7 +3240,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3240 3240
3241 /* enable the doorbell if requested */ 3241 /* enable the doorbell if requested */
3242 if (use_doorbell) { 3242 if (use_doorbell) {
3243 if (adev->asic_type == CHIP_CARRIZO) { 3243 if ((adev->asic_type == CHIP_CARRIZO) ||
3244 (adev->asic_type == CHIP_FIJI)) {
3244 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 3245 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3245 AMDGPU_DOORBELL_KIQ << 2); 3246 AMDGPU_DOORBELL_KIQ << 2);
3246 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 3247 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
@@ -4378,6 +4379,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4378 .test_ring = gfx_v8_0_ring_test_ring, 4379 .test_ring = gfx_v8_0_ring_test_ring,
4379 .test_ib = gfx_v8_0_ring_test_ib, 4380 .test_ib = gfx_v8_0_ring_test_ib,
4380 .is_lockup = gfx_v8_0_ring_is_lockup, 4381 .is_lockup = gfx_v8_0_ring_is_lockup,
4382 .insert_nop = amdgpu_ring_insert_nop,
4381}; 4383};
4382 4384
4383static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 4385static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -4394,6 +4396,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4394 .test_ring = gfx_v8_0_ring_test_ring, 4396 .test_ring = gfx_v8_0_ring_test_ring,
4395 .test_ib = gfx_v8_0_ring_test_ib, 4397 .test_ib = gfx_v8_0_ring_test_ib,
4396 .is_lockup = gfx_v8_0_ring_is_lockup, 4398 .is_lockup = gfx_v8_0_ring_is_lockup,
4399 .insert_nop = amdgpu_ring_insert_nop,
4397}; 4400};
4398 4401
4399static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 4402static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
index c723602c7b0c..ee6a041cb288 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_sdma_pkt_open.h
@@ -2163,5 +2163,10 @@
2163#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 2163#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
2164#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) 2164#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
2165 2165
2166/*define for count field*/
2167#define SDMA_PKT_NOP_HEADER_count_offset 0
2168#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
2169#define SDMA_PKT_NOP_HEADER_count_shift 16
2170#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
2166 2171
2167#endif /* __ICELAND_SDMA_PKT_OPEN_H_ */ 2172#endif /* __ICELAND_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 715e02d3bfba..14e87234171a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
146 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 146 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
147 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 147 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
148 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 148 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
149 if (adev->sdma[i].feature_version >= 20)
150 adev->sdma[i].burst_nop = true;
149 151
150 if (adev->firmware.smu_load) { 152 if (adev->firmware.smu_load) {
151 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
218 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); 220 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
219} 221}
220 222
223static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
224{
225 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
226 int i;
227
228 for (i = 0; i < count; i++)
229 if (sdma && sdma->burst_nop && (i == 0))
230 amdgpu_ring_write(ring, ring->nop |
231 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
232 else
233 amdgpu_ring_write(ring, ring->nop);
234}
235
221/** 236/**
222 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine 237 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
223 * 238 *
@@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
245 amdgpu_ring_write(ring, next_rptr); 260 amdgpu_ring_write(ring, next_rptr);
246 261
247 /* IB packet must end on a 8 DW boundary */ 262 /* IB packet must end on a 8 DW boundary */
248 while ((ring->wptr & 7) != 2) 263 sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
249 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); 264
250 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 265 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
251 SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 266 SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
252 /* base must be 32 byte aligned */ 267 /* base must be 32 byte aligned */
@@ -879,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
879 */ 894 */
880static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) 895static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
881{ 896{
882 while (ib->length_dw & 0x7) 897 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
883 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 898 u32 pad_count;
899 int i;
900
901 pad_count = (8 - (ib->length_dw & 0x7)) % 8;
902 for (i = 0; i < pad_count; i++)
903 if (sdma && sdma->burst_nop && (i == 0))
904 ib->ptr[ib->length_dw++] =
905 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
906 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
907 else
908 ib->ptr[ib->length_dw++] =
909 SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
884} 910}
885 911
886/** 912/**
@@ -1314,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1314 .test_ring = sdma_v2_4_ring_test_ring, 1340 .test_ring = sdma_v2_4_ring_test_ring,
1315 .test_ib = sdma_v2_4_ring_test_ib, 1341 .test_ib = sdma_v2_4_ring_test_ib,
1316 .is_lockup = sdma_v2_4_ring_is_lockup, 1342 .is_lockup = sdma_v2_4_ring_is_lockup,
1343 .insert_nop = sdma_v2_4_ring_insert_nop,
1317}; 1344};
1318 1345
1319static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) 1346static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1375,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
1375 * 1402 *
1376 * Fill GPU buffers using the DMA engine (VI). 1403 * Fill GPU buffers using the DMA engine (VI).
1377 */ 1404 */
1378static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring, 1405static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
1379 uint32_t src_data, 1406 uint32_t src_data,
1380 uint64_t dst_offset, 1407 uint64_t dst_offset,
1381 uint32_t byte_count) 1408 uint32_t byte_count)
1382{ 1409{
1383 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); 1410 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1384 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1411 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1385 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1412 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1386 amdgpu_ring_write(ring, src_data); 1413 ib->ptr[ib->length_dw++] = src_data;
1387 amdgpu_ring_write(ring, byte_count); 1414 ib->ptr[ib->length_dw++] = byte_count;
1388} 1415}
1389 1416
1390static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { 1417static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 67128c8e78b8..9bfe92df15f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
218 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; 218 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
219 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); 219 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
220 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); 220 adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
221 if (adev->sdma[i].feature_version >= 20)
222 adev->sdma[i].burst_nop = true;
221 223
222 if (adev->firmware.smu_load) { 224 if (adev->firmware.smu_load) {
223 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; 225 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -304,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
304 } 306 }
305} 307}
306 308
309static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
310{
311 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
312 int i;
313
314 for (i = 0; i < count; i++)
315 if (sdma && sdma->burst_nop && (i == 0))
316 amdgpu_ring_write(ring, ring->nop |
317 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
318 else
319 amdgpu_ring_write(ring, ring->nop);
320}
321
307/** 322/**
308 * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine 323 * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
309 * 324 *
@@ -330,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
330 amdgpu_ring_write(ring, next_rptr); 345 amdgpu_ring_write(ring, next_rptr);
331 346
332 /* IB packet must end on a 8 DW boundary */ 347 /* IB packet must end on a 8 DW boundary */
333 while ((ring->wptr & 7) != 2) 348 sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
334 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
335 349
336 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | 350 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
337 SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); 351 SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
@@ -999,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
999 */ 1013 */
1000static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) 1014static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
1001{ 1015{
1002 while (ib->length_dw & 0x7) 1016 struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
1003 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); 1017 u32 pad_count;
1018 int i;
1019
1020 pad_count = (8 - (ib->length_dw & 0x7)) % 8;
1021 for (i = 0; i < pad_count; i++)
1022 if (sdma && sdma->burst_nop && (i == 0))
1023 ib->ptr[ib->length_dw++] =
1024 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1025 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1026 else
1027 ib->ptr[ib->length_dw++] =
1028 SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1004} 1029}
1005 1030
1006/** 1031/**
@@ -1438,6 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
1438 .test_ring = sdma_v3_0_ring_test_ring, 1463 .test_ring = sdma_v3_0_ring_test_ring,
1439 .test_ib = sdma_v3_0_ring_test_ib, 1464 .test_ib = sdma_v3_0_ring_test_ib,
1440 .is_lockup = sdma_v3_0_ring_is_lockup, 1465 .is_lockup = sdma_v3_0_ring_is_lockup,
1466 .insert_nop = sdma_v3_0_ring_insert_nop,
1441}; 1467};
1442 1468
1443static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) 1469static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1499,16 +1525,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
1499 * 1525 *
1500 * Fill GPU buffers using the DMA engine (VI). 1526 * Fill GPU buffers using the DMA engine (VI).
1501 */ 1527 */
1502static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring, 1528static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
1503 uint32_t src_data, 1529 uint32_t src_data,
1504 uint64_t dst_offset, 1530 uint64_t dst_offset,
1505 uint32_t byte_count) 1531 uint32_t byte_count)
1506{ 1532{
1507 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); 1533 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1508 amdgpu_ring_write(ring, lower_32_bits(dst_offset)); 1534 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1509 amdgpu_ring_write(ring, upper_32_bits(dst_offset)); 1535 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1510 amdgpu_ring_write(ring, src_data); 1536 ib->ptr[ib->length_dw++] = src_data;
1511 amdgpu_ring_write(ring, byte_count); 1537 ib->ptr[ib->length_dw++] = byte_count;
1512} 1538}
1513 1539
1514static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { 1540static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
index 099b7b56113c..e5ebd084288d 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_sdma_pkt_open.h
@@ -2236,5 +2236,10 @@
2236#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 2236#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
2237#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) 2237#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
2238 2238
2239/*define for count field*/
2240#define SDMA_PKT_NOP_HEADER_count_offset 0
2241#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
2242#define SDMA_PKT_NOP_HEADER_count_shift 16
2243#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
2239 2244
2240#endif /* __TONGA_SDMA_PKT_OPEN_H_ */ 2245#endif /* __TONGA_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 9ac383bc6c1f..5fac5da694f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
886 .test_ring = uvd_v4_2_ring_test_ring, 886 .test_ring = uvd_v4_2_ring_test_ring,
887 .test_ib = uvd_v4_2_ring_test_ib, 887 .test_ib = uvd_v4_2_ring_test_ib,
888 .is_lockup = amdgpu_ring_test_lockup, 888 .is_lockup = amdgpu_ring_test_lockup,
889 .insert_nop = amdgpu_ring_insert_nop,
889}; 890};
890 891
891static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) 892static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index de4b3f57902d..2d5c59c318af 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
825 .test_ring = uvd_v5_0_ring_test_ring, 825 .test_ring = uvd_v5_0_ring_test_ring,
826 .test_ib = uvd_v5_0_ring_test_ib, 826 .test_ib = uvd_v5_0_ring_test_ib,
827 .is_lockup = amdgpu_ring_test_lockup, 827 .is_lockup = amdgpu_ring_test_lockup,
828 .insert_nop = amdgpu_ring_insert_nop,
828}; 829};
829 830
830static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) 831static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 66c975870e97..d9f553fce531 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -805,6 +805,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
805 .test_ring = uvd_v6_0_ring_test_ring, 805 .test_ring = uvd_v6_0_ring_test_ring,
806 .test_ib = uvd_v6_0_ring_test_ib, 806 .test_ib = uvd_v6_0_ring_test_ib,
807 .is_lockup = amdgpu_ring_test_lockup, 807 .is_lockup = amdgpu_ring_test_lockup,
808 .insert_nop = amdgpu_ring_insert_nop,
808}; 809};
809 810
810static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) 811static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 303d961d57bd..cd16df543f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
643 .test_ring = amdgpu_vce_ring_test_ring, 643 .test_ring = amdgpu_vce_ring_test_ring,
644 .test_ib = amdgpu_vce_ring_test_ib, 644 .test_ib = amdgpu_vce_ring_test_ib,
645 .is_lockup = amdgpu_ring_test_lockup, 645 .is_lockup = amdgpu_ring_test_lockup,
646 .insert_nop = amdgpu_ring_insert_nop,
646}; 647};
647 648
648static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) 649static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 4349658081ff..5642b8eb92ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -608,6 +608,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
608 .test_ring = amdgpu_vce_ring_test_ring, 608 .test_ring = amdgpu_vce_ring_test_ring,
609 .test_ib = amdgpu_vce_ring_test_ib, 609 .test_ib = amdgpu_vce_ring_test_ib,
610 .is_lockup = amdgpu_ring_test_lockup, 610 .is_lockup = amdgpu_ring_test_lockup,
611 .insert_nop = amdgpu_ring_insert_nop,
611}; 612};
612 613
613static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) 614static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index d99fe90991dc..ab8577f8ed4a 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -27,6 +27,8 @@
27#include <drm/drmP.h> 27#include <drm/drmP.h>
28#include "gpu_scheduler.h" 28#include "gpu_scheduler.h"
29 29
30static struct amd_sched_job *
31amd_sched_entity_pop_job(struct amd_sched_entity *entity);
30static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); 32static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
31 33
32/* Initialize a given run queue struct */ 34/* Initialize a given run queue struct */
@@ -56,34 +58,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
56} 58}
57 59
58/** 60/**
59 * Select next entity from a specified run queue with round robin policy. 61 * Select next job from a specified run queue with round robin policy.
60 * It could return the same entity as current one if current is the only 62 * Return NULL if nothing available.
61 * available one in the queue. Return NULL if nothing available.
62 */ 63 */
63static struct amd_sched_entity * 64static struct amd_sched_job *
64amd_sched_rq_select_entity(struct amd_sched_rq *rq) 65amd_sched_rq_select_job(struct amd_sched_rq *rq)
65{ 66{
66 struct amd_sched_entity *entity; 67 struct amd_sched_entity *entity;
68 struct amd_sched_job *job;
67 69
68 spin_lock(&rq->lock); 70 spin_lock(&rq->lock);
69 71
70 entity = rq->current_entity; 72 entity = rq->current_entity;
71 if (entity) { 73 if (entity) {
72 list_for_each_entry_continue(entity, &rq->entities, list) { 74 list_for_each_entry_continue(entity, &rq->entities, list) {
73 if (!kfifo_is_empty(&entity->job_queue)) { 75 job = amd_sched_entity_pop_job(entity);
76 if (job) {
74 rq->current_entity = entity; 77 rq->current_entity = entity;
75 spin_unlock(&rq->lock); 78 spin_unlock(&rq->lock);
76 return rq->current_entity; 79 return job;
77 } 80 }
78 } 81 }
79 } 82 }
80 83
81 list_for_each_entry(entity, &rq->entities, list) { 84 list_for_each_entry(entity, &rq->entities, list) {
82 85
83 if (!kfifo_is_empty(&entity->job_queue)) { 86 job = amd_sched_entity_pop_job(entity);
87 if (job) {
84 rq->current_entity = entity; 88 rq->current_entity = entity;
85 spin_unlock(&rq->lock); 89 spin_unlock(&rq->lock);
86 return rq->current_entity; 90 return job;
87 } 91 }
88 92
89 if (entity == rq->current_entity) 93 if (entity == rq->current_entity)
@@ -188,6 +192,39 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
188 kfifo_free(&entity->job_queue); 192 kfifo_free(&entity->job_queue);
189} 193}
190 194
195static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
196{
197 struct amd_sched_entity *entity =
198 container_of(cb, struct amd_sched_entity, cb);
199 entity->dependency = NULL;
200 fence_put(f);
201 amd_sched_wakeup(entity->scheduler);
202}
203
204static struct amd_sched_job *
205amd_sched_entity_pop_job(struct amd_sched_entity *entity)
206{
207 struct amd_gpu_scheduler *sched = entity->scheduler;
208 struct amd_sched_job *job;
209
210 if (ACCESS_ONCE(entity->dependency))
211 return NULL;
212
213 if (!kfifo_out_peek(&entity->job_queue, &job, sizeof(job)))
214 return NULL;
215
216 while ((entity->dependency = sched->ops->dependency(job))) {
217
218 if (fence_add_callback(entity->dependency, &entity->cb,
219 amd_sched_entity_wakeup))
220 fence_put(entity->dependency);
221 else
222 return NULL;
223 }
224
225 return job;
226}
227
191/** 228/**
192 * Helper to submit a job to the job queue 229 * Helper to submit a job to the job queue
193 * 230 *
@@ -227,7 +264,6 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
227 struct amd_sched_entity *entity = sched_job->s_entity; 264 struct amd_sched_entity *entity = sched_job->s_entity;
228 struct amd_sched_fence *fence = amd_sched_fence_create( 265 struct amd_sched_fence *fence = amd_sched_fence_create(
229 entity, sched_job->owner); 266 entity, sched_job->owner);
230 int r;
231 267
232 if (!fence) 268 if (!fence)
233 return -ENOMEM; 269 return -ENOMEM;
@@ -235,10 +271,10 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
235 fence_get(&fence->base); 271 fence_get(&fence->base);
236 sched_job->s_fence = fence; 272 sched_job->s_fence = fence;
237 273
238 r = wait_event_interruptible(entity->scheduler->job_scheduled, 274 wait_event(entity->scheduler->job_scheduled,
239 amd_sched_entity_in(sched_job)); 275 amd_sched_entity_in(sched_job));
240 276
241 return r; 277 return 0;
242} 278}
243 279
244/** 280/**
@@ -260,22 +296,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
260} 296}
261 297
262/** 298/**
263 * Select next entity containing real IB submissions 299 * Select next to run
264*/ 300*/
265static struct amd_sched_entity * 301static struct amd_sched_job *
266amd_sched_select_context(struct amd_gpu_scheduler *sched) 302amd_sched_select_job(struct amd_gpu_scheduler *sched)
267{ 303{
268 struct amd_sched_entity *tmp; 304 struct amd_sched_job *job;
269 305
270 if (!amd_sched_ready(sched)) 306 if (!amd_sched_ready(sched))
271 return NULL; 307 return NULL;
272 308
273 /* Kernel run queue has higher priority than normal run queue*/ 309 /* Kernel run queue has higher priority than normal run queue*/
274 tmp = amd_sched_rq_select_entity(&sched->kernel_rq); 310 job = amd_sched_rq_select_job(&sched->kernel_rq);
275 if (tmp == NULL) 311 if (job == NULL)
276 tmp = amd_sched_rq_select_entity(&sched->sched_rq); 312 job = amd_sched_rq_select_job(&sched->sched_rq);
277 313
278 return tmp; 314 return job;
279} 315}
280 316
281static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) 317static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
@@ -301,22 +337,19 @@ static int amd_sched_main(void *param)
301 sched_setscheduler(current, SCHED_FIFO, &sparam); 337 sched_setscheduler(current, SCHED_FIFO, &sparam);
302 338
303 while (!kthread_should_stop()) { 339 while (!kthread_should_stop()) {
304 struct amd_sched_entity *c_entity = NULL; 340 struct amd_sched_entity *entity;
305 struct amd_sched_job *job; 341 struct amd_sched_job *job;
306 struct fence *fence; 342 struct fence *fence;
307 343
308 wait_event_interruptible(sched->wake_up_worker, 344 wait_event_interruptible(sched->wake_up_worker,
309 kthread_should_stop() || 345 kthread_should_stop() ||
310 (c_entity = amd_sched_select_context(sched))); 346 (job = amd_sched_select_job(sched)));
311 347
312 if (!c_entity) 348 if (!job)
313 continue; 349 continue;
314 350
315 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); 351 entity = job->s_entity;
316 if (r != sizeof(void *))
317 continue;
318 atomic_inc(&sched->hw_rq_count); 352 atomic_inc(&sched->hw_rq_count);
319
320 fence = sched->ops->run_job(job); 353 fence = sched->ops->run_job(job);
321 if (fence) { 354 if (fence) {
322 r = fence_add_callback(fence, &job->cb, 355 r = fence_add_callback(fence, &job->cb,
@@ -328,6 +361,7 @@ static int amd_sched_main(void *param)
328 fence_put(fence); 361 fence_put(fence);
329 } 362 }
330 363
364 kfifo_out(&entity->job_queue, &job, sizeof(job));
331 wake_up(&sched->job_scheduled); 365 wake_up(&sched->job_scheduled);
332 } 366 }
333 return 0; 367 return 0;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index e797796dcad7..2af0e4d4d817 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -45,6 +45,8 @@ struct amd_sched_entity {
45 spinlock_t queue_lock; 45 spinlock_t queue_lock;
46 struct amd_gpu_scheduler *scheduler; 46 struct amd_gpu_scheduler *scheduler;
47 uint64_t fence_context; 47 uint64_t fence_context;
48 struct fence *dependency;
49 struct fence_cb cb;
48}; 50};
49 51
50/** 52/**
@@ -89,6 +91,7 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
89 * these functions should be implemented in driver side 91 * these functions should be implemented in driver side
90*/ 92*/
91struct amd_sched_backend_ops { 93struct amd_sched_backend_ops {
94 struct fence *(*dependency)(struct amd_sched_job *job);
92 struct fence *(*run_job)(struct amd_sched_job *job); 95 struct fence *(*run_job)(struct amd_sched_job *job);
93 void (*process_job)(struct amd_sched_job *job); 96 void (*process_job)(struct amd_sched_job *job);
94}; 97};
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index f81e0d7d0232..9cd49c584263 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -171,8 +171,9 @@ radeon_dp_aux_transfer_atom(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
171 return -E2BIG; 171 return -E2BIG;
172 172
173 tx_buf[0] = msg->address & 0xff; 173 tx_buf[0] = msg->address & 0xff;
174 tx_buf[1] = msg->address >> 8; 174 tx_buf[1] = (msg->address >> 8) & 0xff;
175 tx_buf[2] = msg->request << 4; 175 tx_buf[2] = (msg->request << 4) |
176 ((msg->address >> 16) & 0xf);
176 tx_buf[3] = msg->size ? (msg->size - 1) : 0; 177 tx_buf[3] = msg->size ? (msg->size - 1) : 0;
177 178
178 switch (msg->request & ~DP_AUX_I2C_MOT) { 179 switch (msg->request & ~DP_AUX_I2C_MOT) {
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index fbc8d88d6e5d..2c02e99b5f95 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -522,13 +522,15 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder,
522 return err; 522 return err;
523 } 523 }
524 524
525 if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { 525 if (radeon_encoder->output_csc != RADEON_OUTPUT_CSC_BYPASS) {
526 if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) 526 if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) {
527 frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; 527 if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB)
528 else 528 frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED;
529 frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; 529 else
530 } else { 530 frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL;
531 frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; 531 } else {
532 frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
533 }
532 } 534 }
533 535
534 err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); 536 err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index c097d3a82bda..a9b01bcf7d0a 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3387,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev)
3387 rdev->pdev->subsystem_device == 0x30ae) 3387 rdev->pdev->subsystem_device == 0x30ae)
3388 return; 3388 return;
3389 3389
3390 /* quirk for rs4xx HP Compaq dc5750 Small Form Factor to make it resume
3391 * - it hangs on resume inside the dynclk 1 table.
3392 */
3393 if (rdev->family == CHIP_RS480 &&
3394 rdev->pdev->subsystem_vendor == 0x103c &&
3395 rdev->pdev->subsystem_device == 0x280a)
3396 return;
3397
3390 /* DYN CLK 1 */ 3398 /* DYN CLK 1 */
3391 table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); 3399 table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
3392 if (table) 3400 if (table)
diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
index fcbd60bb0349..3b0c229d7dcd 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
@@ -116,8 +116,8 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg
116 AUX_SW_WR_BYTES(bytes)); 116 AUX_SW_WR_BYTES(bytes));
117 117
118 /* write the data header into the registers */ 118 /* write the data header into the registers */
119 /* request, addres, msg size */ 119 /* request, address, msg size */
120 byte = (msg->request << 4); 120 byte = (msg->request << 4) | ((msg->address >> 16) & 0xf);
121 WREG32(AUX_SW_DATA + aux_offset[instance], 121 WREG32(AUX_SW_DATA + aux_offset[instance],
122 AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE); 122 AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE);
123 123