aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2016-03-17 17:16:25 -0400
committerDave Airlie <airlied@redhat.com>2016-03-17 17:16:25 -0400
commit902d02db1ff1b0d0075276917a36ba70847798a7 (patch)
tree2dbd296578edf17b7abd5d2ecde1b1f2f1eb5e10 /drivers/gpu/drm/amd/amdgpu
parente6087877794520748f7295212a4c6bdb870122f2 (diff)
parentb9c743b85dc378510ef0e5ebe3c2e4ac1495c410 (diff)
Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
A few other misc cleanups and bug fixes for 4.6. Highlights: - unify endian handling in powerplay - powerplay fixes - fix a regression in 4.5 on boards with no display connectors - fence cleanups and locking fixes - whitespace cleanups and code refactoring in radeon * 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (35 commits) drm/amdgpu/gfx7: add MTYPE definition drm/amdgpu: removing BO_VAs shouldn't be interruptible drm/amd/powerplay: show uvd/vce power gate enablement for tonga. drm/amd/powerplay: show uvd/vce power gate info for fiji drm/amdgpu: use sched fence if possible drm/amdgpu: move ib.fence to job.fence drm/amdgpu: give a fence param to ib_free drm/amdgpu: include the right version of gmc header files for iceland drm/radeon: fix indentation. drm/amd/powerplay: add uvd/vce dpm enabling flag to fix the performance issue for CZ drm/amdgpu: switch back to 32bit hw fences v2 drm/amdgpu: remove amdgpu_fence_is_signaled drm/amdgpu: drop the extra fence range check v2 drm/amdgpu: signal fences directly in amdgpu_fence_process drm/amdgpu: cleanup amdgpu_fence_wait_empty v2 drm/amdgpu: keep all fences in an RCU protected array v2 drm/amdgpu: add number of hardware submissions to amdgpu_fence_driver_init_ring drm/amdgpu: RCU protected amd_sched_fence_release drm/amdgpu: RCU protected amdgpu_fence_release drm/amdgpu: merge amdgpu_fence_process and _activity ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c375
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c3
15 files changed, 214 insertions, 364 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a80c8cea7609..c4a21c6428f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -141,7 +141,6 @@ extern unsigned amdgpu_pcie_lane_cap;
141#define CIK_CURSOR_HEIGHT 128 141#define CIK_CURSOR_HEIGHT 128
142 142
143struct amdgpu_device; 143struct amdgpu_device;
144struct amdgpu_fence;
145struct amdgpu_ib; 144struct amdgpu_ib;
146struct amdgpu_vm; 145struct amdgpu_vm;
147struct amdgpu_ring; 146struct amdgpu_ring;
@@ -348,13 +347,15 @@ struct amdgpu_fence_driver {
348 uint64_t gpu_addr; 347 uint64_t gpu_addr;
349 volatile uint32_t *cpu_addr; 348 volatile uint32_t *cpu_addr;
350 /* sync_seq is protected by ring emission lock */ 349 /* sync_seq is protected by ring emission lock */
351 uint64_t sync_seq; 350 uint32_t sync_seq;
352 atomic64_t last_seq; 351 atomic_t last_seq;
353 bool initialized; 352 bool initialized;
354 struct amdgpu_irq_src *irq_src; 353 struct amdgpu_irq_src *irq_src;
355 unsigned irq_type; 354 unsigned irq_type;
356 struct timer_list fallback_timer; 355 struct timer_list fallback_timer;
357 wait_queue_head_t fence_queue; 356 unsigned num_fences_mask;
357 spinlock_t lock;
358 struct fence **fences;
358}; 359};
359 360
360/* some special values for the owner field */ 361/* some special values for the owner field */
@@ -364,16 +365,6 @@ struct amdgpu_fence_driver {
364#define AMDGPU_FENCE_FLAG_64BIT (1 << 0) 365#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
365#define AMDGPU_FENCE_FLAG_INT (1 << 1) 366#define AMDGPU_FENCE_FLAG_INT (1 << 1)
366 367
367struct amdgpu_fence {
368 struct fence base;
369
370 /* RB, DMA, etc. */
371 struct amdgpu_ring *ring;
372 uint64_t seq;
373
374 wait_queue_t fence_wake;
375};
376
377struct amdgpu_user_fence { 368struct amdgpu_user_fence {
378 /* write-back bo */ 369 /* write-back bo */
379 struct amdgpu_bo *bo; 370 struct amdgpu_bo *bo;
@@ -385,7 +376,8 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
385void amdgpu_fence_driver_fini(struct amdgpu_device *adev); 376void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
386void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); 377void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
387 378
388int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); 379int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
380 unsigned num_hw_submission);
389int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, 381int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
390 struct amdgpu_irq_src *irq_src, 382 struct amdgpu_irq_src *irq_src,
391 unsigned irq_type); 383 unsigned irq_type);
@@ -393,7 +385,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
393void amdgpu_fence_driver_resume(struct amdgpu_device *adev); 385void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
394int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence); 386int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
395void amdgpu_fence_process(struct amdgpu_ring *ring); 387void amdgpu_fence_process(struct amdgpu_ring *ring);
396int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
397int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); 388int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
398unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); 389unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
399 390
@@ -539,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
539 * Assumption is that there won't be hole (all object on same 530 * Assumption is that there won't be hole (all object on same
540 * alignment). 531 * alignment).
541 */ 532 */
533
534#define AMDGPU_SA_NUM_FENCE_LISTS 32
535
542struct amdgpu_sa_manager { 536struct amdgpu_sa_manager {
543 wait_queue_head_t wq; 537 wait_queue_head_t wq;
544 struct amdgpu_bo *bo; 538 struct amdgpu_bo *bo;
545 struct list_head *hole; 539 struct list_head *hole;
546 struct list_head flist[AMDGPU_MAX_RINGS]; 540 struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
547 struct list_head olist; 541 struct list_head olist;
548 unsigned size; 542 unsigned size;
549 uint64_t gpu_addr; 543 uint64_t gpu_addr;
@@ -727,7 +721,6 @@ struct amdgpu_ib {
727 uint32_t length_dw; 721 uint32_t length_dw;
728 uint64_t gpu_addr; 722 uint64_t gpu_addr;
729 uint32_t *ptr; 723 uint32_t *ptr;
730 struct fence *fence;
731 struct amdgpu_user_fence *user; 724 struct amdgpu_user_fence *user;
732 struct amdgpu_vm *vm; 725 struct amdgpu_vm *vm;
733 unsigned vm_id; 726 unsigned vm_id;
@@ -1143,7 +1136,7 @@ struct amdgpu_gfx {
1143 1136
1144int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1137int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1145 unsigned size, struct amdgpu_ib *ib); 1138 unsigned size, struct amdgpu_ib *ib);
1146void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); 1139void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
1147int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, 1140int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
1148 struct amdgpu_ib *ib, struct fence *last_vm_update, 1141 struct amdgpu_ib *ib, struct fence *last_vm_update,
1149 struct fence **f); 1142 struct fence **f);
@@ -1164,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
1164 struct amdgpu_irq_src *irq_src, unsigned irq_type, 1157 struct amdgpu_irq_src *irq_src, unsigned irq_type,
1165 enum amdgpu_ring_type ring_type); 1158 enum amdgpu_ring_type ring_type);
1166void amdgpu_ring_fini(struct amdgpu_ring *ring); 1159void amdgpu_ring_fini(struct amdgpu_ring *ring);
1167struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
1168 1160
1169/* 1161/*
1170 * CS. 1162 * CS.
@@ -1206,6 +1198,7 @@ struct amdgpu_job {
1206 struct amdgpu_ring *ring; 1198 struct amdgpu_ring *ring;
1207 struct amdgpu_sync sync; 1199 struct amdgpu_sync sync;
1208 struct amdgpu_ib *ibs; 1200 struct amdgpu_ib *ibs;
1201 struct fence *fence; /* the hw fence */
1209 uint32_t num_ibs; 1202 uint32_t num_ibs;
1210 void *owner; 1203 void *owner;
1211 struct amdgpu_user_fence uf; 1204 struct amdgpu_user_fence uf;
@@ -2067,20 +2060,6 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
2067void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); 2060void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
2068 2061
2069/* 2062/*
2070 * Cast helper
2071 */
2072extern const struct fence_ops amdgpu_fence_ops;
2073static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
2074{
2075 struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
2076
2077 if (__f->base.ops == &amdgpu_fence_ops)
2078 return __f;
2079
2080 return NULL;
2081}
2082
2083/*
2084 * Registers read & write functions. 2063 * Registers read & write functions.
2085 */ 2064 */
2086#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false) 2065#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 83599f2a0387..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
47 * that the the relevant GPU caches have been flushed. 47 * that the the relevant GPU caches have been flushed.
48 */ 48 */
49 49
50struct amdgpu_fence {
51 struct fence base;
52
53 /* RB, DMA, etc. */
54 struct amdgpu_ring *ring;
55};
56
50static struct kmem_cache *amdgpu_fence_slab; 57static struct kmem_cache *amdgpu_fence_slab;
51static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); 58static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
52 59
60/*
61 * Cast helper
62 */
63static const struct fence_ops amdgpu_fence_ops;
64static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
65{
66 struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
67
68 if (__f->base.ops == &amdgpu_fence_ops)
69 return __f;
70
71 return NULL;
72}
73
53/** 74/**
54 * amdgpu_fence_write - write a fence value 75 * amdgpu_fence_write - write a fence value
55 * 76 *
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
82 if (drv->cpu_addr) 103 if (drv->cpu_addr)
83 seq = le32_to_cpu(*drv->cpu_addr); 104 seq = le32_to_cpu(*drv->cpu_addr);
84 else 105 else
85 seq = lower_32_bits(atomic64_read(&drv->last_seq)); 106 seq = atomic_read(&drv->last_seq);
86 107
87 return seq; 108 return seq;
88} 109}
@@ -100,20 +121,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
100{ 121{
101 struct amdgpu_device *adev = ring->adev; 122 struct amdgpu_device *adev = ring->adev;
102 struct amdgpu_fence *fence; 123 struct amdgpu_fence *fence;
124 struct fence **ptr;
125 uint32_t seq;
103 126
104 fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); 127 fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
105 if (fence == NULL) 128 if (fence == NULL)
106 return -ENOMEM; 129 return -ENOMEM;
107 130
108 fence->seq = ++ring->fence_drv.sync_seq; 131 seq = ++ring->fence_drv.sync_seq;
109 fence->ring = ring; 132 fence->ring = ring;
110 fence_init(&fence->base, &amdgpu_fence_ops, 133 fence_init(&fence->base, &amdgpu_fence_ops,
111 &ring->fence_drv.fence_queue.lock, 134 &ring->fence_drv.lock,
112 adev->fence_context + ring->idx, 135 adev->fence_context + ring->idx,
113 fence->seq); 136 seq);
114 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 137 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
115 fence->seq, AMDGPU_FENCE_FLAG_INT); 138 seq, AMDGPU_FENCE_FLAG_INT);
139
140 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
141 /* This function can't be called concurrently anyway, otherwise
142 * emitting the fence would mess up the hardware ring buffer.
143 */
144 BUG_ON(rcu_dereference_protected(*ptr, 1));
145
146 rcu_assign_pointer(*ptr, fence_get(&fence->base));
147
116 *f = &fence->base; 148 *f = &fence->base;
149
117 return 0; 150 return 0;
118} 151}
119 152
@@ -131,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
131} 164}
132 165
133/** 166/**
134 * amdgpu_fence_activity - check for fence activity 167 * amdgpu_fence_process - check for fence activity
135 * 168 *
136 * @ring: pointer to struct amdgpu_ring 169 * @ring: pointer to struct amdgpu_ring
137 * 170 *
138 * Checks the current fence value and calculates the last 171 * Checks the current fence value and calculates the last
139 * signalled fence value. Returns true if activity occured 172 * signalled fence value. Wakes the fence queue if the
140 * on the ring, and the fence_queue should be waken up. 173 * sequence number has increased.
141 */ 174 */
142static bool amdgpu_fence_activity(struct amdgpu_ring *ring) 175void amdgpu_fence_process(struct amdgpu_ring *ring)
143{ 176{
144 uint64_t seq, last_seq, last_emitted; 177 struct amdgpu_fence_driver *drv = &ring->fence_drv;
145 unsigned count_loop = 0; 178 uint32_t seq, last_seq;
146 bool wake = false; 179 int r;
147 180
148 /* Note there is a scenario here for an infinite loop but it's
149 * very unlikely to happen. For it to happen, the current polling
150 * process need to be interrupted by another process and another
151 * process needs to update the last_seq btw the atomic read and
152 * xchg of the current process.
153 *
154 * More over for this to go in infinite loop there need to be
155 * continuously new fence signaled ie amdgpu_fence_read needs
156 * to return a different value each time for both the currently
157 * polling process and the other process that xchg the last_seq
158 * btw atomic read and xchg of the current process. And the
159 * value the other process set as last seq must be higher than
160 * the seq value we just read. Which means that current process
161 * need to be interrupted after amdgpu_fence_read and before
162 * atomic xchg.
163 *
164 * To be even more safe we count the number of time we loop and
165 * we bail after 10 loop just accepting the fact that we might
166 * have temporarly set the last_seq not to the true real last
167 * seq but to an older one.
168 */
169 last_seq = atomic64_read(&ring->fence_drv.last_seq);
170 do { 181 do {
171 last_emitted = ring->fence_drv.sync_seq; 182 last_seq = atomic_read(&ring->fence_drv.last_seq);
172 seq = amdgpu_fence_read(ring); 183 seq = amdgpu_fence_read(ring);
173 seq |= last_seq & 0xffffffff00000000LL;
174 if (seq < last_seq) {
175 seq &= 0xffffffff;
176 seq |= last_emitted & 0xffffffff00000000LL;
177 }
178 184
179 if (seq <= last_seq || seq > last_emitted) { 185 } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
180 break;
181 }
182 /* If we loop over we don't want to return without
183 * checking if a fence is signaled as it means that the
184 * seq we just read is different from the previous on.
185 */
186 wake = true;
187 last_seq = seq;
188 if ((count_loop++) > 10) {
189 /* We looped over too many time leave with the
190 * fact that we might have set an older fence
191 * seq then the current real last seq as signaled
192 * by the hw.
193 */
194 break;
195 }
196 } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
197 186
198 if (seq < last_emitted) 187 if (seq != ring->fence_drv.sync_seq)
199 amdgpu_fence_schedule_fallback(ring); 188 amdgpu_fence_schedule_fallback(ring);
200 189
201 return wake; 190 while (last_seq != seq) {
202} 191 struct fence *fence, **ptr;
203 192
204/** 193 ptr = &drv->fences[++last_seq & drv->num_fences_mask];
205 * amdgpu_fence_process - process a fence 194
206 * 195 /* There is always exactly one thread signaling this fence slot */
207 * @adev: amdgpu_device pointer 196 fence = rcu_dereference_protected(*ptr, 1);
208 * @ring: ring index the fence is associated with 197 rcu_assign_pointer(*ptr, NULL);
209 * 198
210 * Checks the current fence value and wakes the fence queue 199 BUG_ON(!fence);
211 * if the sequence number has increased (all asics). 200
212 */ 201 r = fence_signal(fence);
213void amdgpu_fence_process(struct amdgpu_ring *ring) 202 if (!r)
214{ 203 FENCE_TRACE(fence, "signaled from irq context\n");
215 if (amdgpu_fence_activity(ring)) 204 else
216 wake_up_all(&ring->fence_drv.fence_queue); 205 BUG();
206
207 fence_put(fence);
208 }
217} 209}
218 210
219/** 211/**
@@ -231,77 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
231} 223}
232 224
233/** 225/**
234 * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
235 *
236 * @ring: ring the fence is associated with
237 * @seq: sequence number
238 *
239 * Check if the last signaled fence sequnce number is >= the requested
240 * sequence number (all asics).
241 * Returns true if the fence has signaled (current fence value
242 * is >= requested value) or false if it has not (current fence
243 * value is < the requested value. Helper function for
244 * amdgpu_fence_signaled().
245 */
246static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
247{
248 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
249 return true;
250
251 /* poll new last sequence at least once */
252 amdgpu_fence_process(ring);
253 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
254 return true;
255
256 return false;
257}
258
259/*
260 * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
261 * @ring: ring to wait on for the seq number
262 * @seq: seq number wait for
263 *
264 * return value:
265 * 0: seq signaled, and gpu not hang
266 * -EINVAL: some paramter is not valid
267 */
268static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
269{
270 BUG_ON(!ring);
271 if (seq > ring->fence_drv.sync_seq)
272 return -EINVAL;
273
274 if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
275 return 0;
276
277 amdgpu_fence_schedule_fallback(ring);
278 wait_event(ring->fence_drv.fence_queue,
279 amdgpu_fence_seq_signaled(ring, seq));
280
281 return 0;
282}
283
284/**
285 * amdgpu_fence_wait_next - wait for the next fence to signal
286 *
287 * @adev: amdgpu device pointer
288 * @ring: ring index the fence is associated with
289 *
290 * Wait for the next fence on the requested ring to signal (all asics).
291 * Returns 0 if the next fence has passed, error for all other cases.
292 * Caller must hold ring lock.
293 */
294int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
295{
296 uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
297
298 if (seq >= ring->fence_drv.sync_seq)
299 return -ENOENT;
300
301 return amdgpu_fence_ring_wait_seq(ring, seq);
302}
303
304/**
305 * amdgpu_fence_wait_empty - wait for all fences to signal 226 * amdgpu_fence_wait_empty - wait for all fences to signal
306 * 227 *
307 * @adev: amdgpu device pointer 228 * @adev: amdgpu device pointer
@@ -309,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
309 * 230 *
310 * Wait for all fences on the requested ring to signal (all asics). 231 * Wait for all fences on the requested ring to signal (all asics).
311 * Returns 0 if the fences have passed, error for all other cases. 232 * Returns 0 if the fences have passed, error for all other cases.
312 * Caller must hold ring lock.
313 */ 233 */
314int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) 234int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
315{ 235{
316 uint64_t seq = ring->fence_drv.sync_seq; 236 uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
237 struct fence *fence, **ptr;
238 int r;
317 239
318 if (!seq) 240 if (!seq)
319 return 0; 241 return 0;
320 242
321 return amdgpu_fence_ring_wait_seq(ring, seq); 243 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
244 rcu_read_lock();
245 fence = rcu_dereference(*ptr);
246 if (!fence || !fence_get_rcu(fence)) {
247 rcu_read_unlock();
248 return 0;
249 }
250 rcu_read_unlock();
251
252 r = fence_wait(fence, false);
253 fence_put(fence);
254 return r;
322} 255}
323 256
324/** 257/**
@@ -338,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
338 * but it's ok to report slightly wrong fence count here. 271 * but it's ok to report slightly wrong fence count here.
339 */ 272 */
340 amdgpu_fence_process(ring); 273 amdgpu_fence_process(ring);
341 emitted = ring->fence_drv.sync_seq 274 emitted = 0x100000000ull;
342 - atomic64_read(&ring->fence_drv.last_seq); 275 emitted -= atomic_read(&ring->fence_drv.last_seq);
343 /* to avoid 32bits warp around */ 276 emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
344 if (emitted > 0x10000000) 277 return lower_32_bits(emitted);
345 emitted = 0x10000000;
346
347 return (unsigned)emitted;
348} 278}
349 279
350/** 280/**
@@ -376,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
376 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; 306 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
377 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; 307 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
378 } 308 }
379 amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); 309 amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
380 amdgpu_irq_get(adev, irq_src, irq_type); 310 amdgpu_irq_get(adev, irq_src, irq_type);
381 311
382 ring->fence_drv.irq_src = irq_src; 312 ring->fence_drv.irq_src = irq_src;
@@ -394,25 +324,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
394 * for the requested ring. 324 * for the requested ring.
395 * 325 *
396 * @ring: ring to init the fence driver on 326 * @ring: ring to init the fence driver on
327 * @num_hw_submission: number of entries on the hardware queue
397 * 328 *
398 * Init the fence driver for the requested ring (all asics). 329 * Init the fence driver for the requested ring (all asics).
399 * Helper function for amdgpu_fence_driver_init(). 330 * Helper function for amdgpu_fence_driver_init().
400 */ 331 */
401int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) 332int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
333 unsigned num_hw_submission)
402{ 334{
403 long timeout; 335 long timeout;
404 int r; 336 int r;
405 337
338 /* Check that num_hw_submission is a power of two */
339 if ((num_hw_submission & (num_hw_submission - 1)) != 0)
340 return -EINVAL;
341
406 ring->fence_drv.cpu_addr = NULL; 342 ring->fence_drv.cpu_addr = NULL;
407 ring->fence_drv.gpu_addr = 0; 343 ring->fence_drv.gpu_addr = 0;
408 ring->fence_drv.sync_seq = 0; 344 ring->fence_drv.sync_seq = 0;
409 atomic64_set(&ring->fence_drv.last_seq, 0); 345 atomic_set(&ring->fence_drv.last_seq, 0);
410 ring->fence_drv.initialized = false; 346 ring->fence_drv.initialized = false;
411 347
412 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 348 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
413 (unsigned long)ring); 349 (unsigned long)ring);
414 350
415 init_waitqueue_head(&ring->fence_drv.fence_queue); 351 ring->fence_drv.num_fences_mask = num_hw_submission - 1;
352 spin_lock_init(&ring->fence_drv.lock);
353 ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
354 GFP_KERNEL);
355 if (!ring->fence_drv.fences)
356 return -ENOMEM;
416 357
417 timeout = msecs_to_jiffies(amdgpu_lockup_timeout); 358 timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
418 if (timeout == 0) { 359 if (timeout == 0) {
@@ -426,7 +367,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
426 timeout = MAX_SCHEDULE_TIMEOUT; 367 timeout = MAX_SCHEDULE_TIMEOUT;
427 } 368 }
428 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, 369 r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
429 amdgpu_sched_hw_submission, 370 num_hw_submission,
430 timeout, ring->name); 371 timeout, ring->name);
431 if (r) { 372 if (r) {
432 DRM_ERROR("Failed to create scheduler on ring %s.\n", 373 DRM_ERROR("Failed to create scheduler on ring %s.\n",
@@ -474,10 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
474 */ 415 */
475void amdgpu_fence_driver_fini(struct amdgpu_device *adev) 416void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
476{ 417{
477 int i, r; 418 unsigned i, j;
419 int r;
478 420
479 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
480 kmem_cache_destroy(amdgpu_fence_slab);
481 for (i = 0; i < AMDGPU_MAX_RINGS; i++) { 421 for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
482 struct amdgpu_ring *ring = adev->rings[i]; 422 struct amdgpu_ring *ring = adev->rings[i];
483 423
@@ -488,13 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
488 /* no need to trigger GPU reset as we are unloading */ 428 /* no need to trigger GPU reset as we are unloading */
489 amdgpu_fence_driver_force_completion(adev); 429 amdgpu_fence_driver_force_completion(adev);
490 } 430 }
491 wake_up_all(&ring->fence_drv.fence_queue);
492 amdgpu_irq_put(adev, ring->fence_drv.irq_src, 431 amdgpu_irq_put(adev, ring->fence_drv.irq_src,
493 ring->fence_drv.irq_type); 432 ring->fence_drv.irq_type);
494 amd_sched_fini(&ring->sched); 433 amd_sched_fini(&ring->sched);
495 del_timer_sync(&ring->fence_drv.fallback_timer); 434 del_timer_sync(&ring->fence_drv.fallback_timer);
435 for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
436 fence_put(ring->fence_drv.fences[i]);
437 kfree(ring->fence_drv.fences);
496 ring->fence_drv.initialized = false; 438 ring->fence_drv.initialized = false;
497 } 439 }
440
441 if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
442 kmem_cache_destroy(amdgpu_fence_slab);
498} 443}
499 444
500/** 445/**
@@ -591,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
591} 536}
592 537
593/** 538/**
594 * amdgpu_fence_is_signaled - test if fence is signaled 539 * amdgpu_fence_enable_signaling - enable signalling on fence
595 * 540 * @fence: fence
596 * @f: fence to test
597 * 541 *
598 * Test the fence sequence number if it is already signaled. If it isn't 542 * This function is called with fence_queue lock held, and adds a callback
599 * signaled start fence processing. Returns True if the fence is signaled. 543 * to fence_queue that checks if this fence is signaled, and if so it
544 * signals the fence and removes itself.
600 */ 545 */
601static bool amdgpu_fence_is_signaled(struct fence *f) 546static bool amdgpu_fence_enable_signaling(struct fence *f)
602{ 547{
603 struct amdgpu_fence *fence = to_amdgpu_fence(f); 548 struct amdgpu_fence *fence = to_amdgpu_fence(f);
604 struct amdgpu_ring *ring = fence->ring; 549 struct amdgpu_ring *ring = fence->ring;
605 550
606 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 551 if (!timer_pending(&ring->fence_drv.fallback_timer))
607 return true; 552 amdgpu_fence_schedule_fallback(ring);
608
609 amdgpu_fence_process(ring);
610 553
611 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) 554 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
612 return true;
613 555
614 return false; 556 return true;
615} 557}
616 558
617/** 559/**
618 * amdgpu_fence_check_signaled - callback from fence_queue 560 * amdgpu_fence_free - free up the fence memory
561 *
562 * @rcu: RCU callback head
619 * 563 *
620 * this function is called with fence_queue lock held, which is also used 564 * Free up the fence memory after the RCU grace period.
621 * for the fence locking itself, so unlocked variants are used for
622 * fence_signal, and remove_wait_queue.
623 */ 565 */
624static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) 566static void amdgpu_fence_free(struct rcu_head *rcu)
625{ 567{
626 struct amdgpu_fence *fence; 568 struct fence *f = container_of(rcu, struct fence, rcu);
627 struct amdgpu_device *adev; 569 struct amdgpu_fence *fence = to_amdgpu_fence(f);
628 u64 seq; 570 kmem_cache_free(amdgpu_fence_slab, fence);
629 int ret;
630
631 fence = container_of(wait, struct amdgpu_fence, fence_wake);
632 adev = fence->ring->adev;
633
634 /*
635 * We cannot use amdgpu_fence_process here because we're already
636 * in the waitqueue, in a call from wake_up_all.
637 */
638 seq = atomic64_read(&fence->ring->fence_drv.last_seq);
639 if (seq >= fence->seq) {
640 ret = fence_signal_locked(&fence->base);
641 if (!ret)
642 FENCE_TRACE(&fence->base, "signaled from irq context\n");
643 else
644 FENCE_TRACE(&fence->base, "was already signaled\n");
645
646 __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
647 fence_put(&fence->base);
648 } else
649 FENCE_TRACE(&fence->base, "pending\n");
650 return 0;
651} 571}
652 572
653/** 573/**
654 * amdgpu_fence_enable_signaling - enable signalling on fence 574 * amdgpu_fence_release - callback that fence can be freed
575 *
655 * @fence: fence 576 * @fence: fence
656 * 577 *
657 * This function is called with fence_queue lock held, and adds a callback 578 * This function is called when the reference count becomes zero.
658 * to fence_queue that checks if this fence is signaled, and if so it 579 * It just RCU schedules freeing up the fence.
659 * signals the fence and removes itself.
660 */ 580 */
661static bool amdgpu_fence_enable_signaling(struct fence *f)
662{
663 struct amdgpu_fence *fence = to_amdgpu_fence(f);
664 struct amdgpu_ring *ring = fence->ring;
665
666 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
667 return false;
668
669 fence->fence_wake.flags = 0;
670 fence->fence_wake.private = NULL;
671 fence->fence_wake.func = amdgpu_fence_check_signaled;
672 __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
673 fence_get(f);
674 if (!timer_pending(&ring->fence_drv.fallback_timer))
675 amdgpu_fence_schedule_fallback(ring);
676 FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
677 return true;
678}
679
680static void amdgpu_fence_release(struct fence *f) 581static void amdgpu_fence_release(struct fence *f)
681{ 582{
682 struct amdgpu_fence *fence = to_amdgpu_fence(f); 583 call_rcu(&f->rcu, amdgpu_fence_free);
683 kmem_cache_free(amdgpu_fence_slab, fence);
684} 584}
685 585
686const struct fence_ops amdgpu_fence_ops = { 586static const struct fence_ops amdgpu_fence_ops = {
687 .get_driver_name = amdgpu_fence_get_driver_name, 587 .get_driver_name = amdgpu_fence_get_driver_name,
688 .get_timeline_name = amdgpu_fence_get_timeline_name, 588 .get_timeline_name = amdgpu_fence_get_timeline_name,
689 .enable_signaling = amdgpu_fence_enable_signaling, 589 .enable_signaling = amdgpu_fence_enable_signaling,
690 .signaled = amdgpu_fence_is_signaled,
691 .wait = fence_default_wait, 590 .wait = fence_default_wait,
692 .release = amdgpu_fence_release, 591 .release = amdgpu_fence_release,
693}; 592};
@@ -711,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
711 amdgpu_fence_process(ring); 610 amdgpu_fence_process(ring);
712 611
713 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); 612 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
714 seq_printf(m, "Last signaled fence 0x%016llx\n", 613 seq_printf(m, "Last signaled fence 0x%08x\n",
715 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); 614 atomic_read(&ring->fence_drv.last_seq));
716 seq_printf(m, "Last emitted 0x%016llx\n", 615 seq_printf(m, "Last emitted 0x%08x\n",
717 ring->fence_drv.sync_seq); 616 ring->fence_drv.sync_seq);
718 } 617 }
719 return 0; 618 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1ecdf6c01368..3e128c52bb87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -161,7 +161,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
161 161
162 amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); 162 amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
163 163
164 r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); 164 r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
165 if (r) { 165 if (r) {
166 dev_err(adev->dev, "leaking bo va because " 166 dev_err(adev->dev, "leaking bo va because "
167 "we fail to reserve bo (%d)\n", r); 167 "we fail to reserve bo (%d)\n", r);
@@ -258,12 +258,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
258 AMDGPU_GEM_USERPTR_REGISTER)) 258 AMDGPU_GEM_USERPTR_REGISTER))
259 return -EINVAL; 259 return -EINVAL;
260 260
261 if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && ( 261 if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
262 !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) || 262 !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
263 !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
264 263
265 /* if we want to write to it we must require anonymous 264 /* if we want to write to it we must install a MMU notifier */
266 memory and install a MMU notifier */
267 return -EACCES; 265 return -EACCES;
268 } 266 }
269 267
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 798d46626820..8443cea6821a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -85,13 +85,13 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
85 * 85 *
86 * @adev: amdgpu_device pointer 86 * @adev: amdgpu_device pointer
87 * @ib: IB object to free 87 * @ib: IB object to free
88 * @f: the fence SA bo need wait on for the ib alloation
88 * 89 *
89 * Free an IB (all asics). 90 * Free an IB (all asics).
90 */ 91 */
91void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) 92void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
92{ 93{
93 amdgpu_sa_bo_free(adev, &ib->sa_bo, ib->fence); 94 amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
94 fence_put(ib->fence);
95} 95}
96 96
97/** 97/**
@@ -123,6 +123,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
123 struct amdgpu_ib *ib = &ibs[0]; 123 struct amdgpu_ib *ib = &ibs[0];
124 struct amdgpu_ctx *ctx, *old_ctx; 124 struct amdgpu_ctx *ctx, *old_ctx;
125 struct amdgpu_vm *vm; 125 struct amdgpu_vm *vm;
126 struct fence *hwf;
126 unsigned i; 127 unsigned i;
127 int r = 0; 128 int r = 0;
128 129
@@ -179,7 +180,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
179 amdgpu_ring_emit_hdp_invalidate(ring); 180 amdgpu_ring_emit_hdp_invalidate(ring);
180 } 181 }
181 182
182 r = amdgpu_fence_emit(ring, &ib->fence); 183 r = amdgpu_fence_emit(ring, &hwf);
183 if (r) { 184 if (r) {
184 dev_err(adev->dev, "failed to emit fence (%d)\n", r); 185 dev_err(adev->dev, "failed to emit fence (%d)\n", r);
185 ring->current_ctx = old_ctx; 186 ring->current_ctx = old_ctx;
@@ -198,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
198 } 199 }
199 200
200 if (f) 201 if (f)
201 *f = fence_get(ib->fence); 202 *f = fence_get(hwf);
202 203
203 amdgpu_ring_commit(ring); 204 amdgpu_ring_commit(ring);
204 return 0; 205 return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 692b45560d0a..9c9b19e2f353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -70,9 +70,13 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
70void amdgpu_job_free(struct amdgpu_job *job) 70void amdgpu_job_free(struct amdgpu_job *job)
71{ 71{
72 unsigned i; 72 unsigned i;
73 struct fence *f;
74 /* use sched fence if available */
75 f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
73 76
74 for (i = 0; i < job->num_ibs; ++i) 77 for (i = 0; i < job->num_ibs; ++i)
75 amdgpu_ib_free(job->adev, &job->ibs[i]); 78 amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
79 fence_put(job->fence);
76 80
77 amdgpu_bo_unref(&job->uf.bo); 81 amdgpu_bo_unref(&job->uf.bo);
78 amdgpu_sync_free(&job->sync); 82 amdgpu_sync_free(&job->sync);
@@ -156,6 +160,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
156 } 160 }
157 161
158err: 162err:
163 job->fence = fence;
159 amdgpu_job_free(job); 164 amdgpu_job_free(job);
160 return fence; 165 return fence;
161} 166}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9a025a77958d..151a2d42c639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -308,7 +308,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
308int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) 308int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
309{ 309{
310 bool is_iomem; 310 bool is_iomem;
311 int r; 311 long r;
312 312
313 if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) 313 if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
314 return -EPERM; 314 return -EPERM;
@@ -319,14 +319,20 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
319 } 319 }
320 return 0; 320 return 0;
321 } 321 }
322
323 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
324 MAX_SCHEDULE_TIMEOUT);
325 if (r < 0)
326 return r;
327
322 r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap); 328 r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
323 if (r) { 329 if (r)
324 return r; 330 return r;
325 } 331
326 bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); 332 bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
327 if (ptr) { 333 if (ptr)
328 *ptr = bo->kptr; 334 *ptr = bo->kptr;
329 } 335
330 return 0; 336 return 0;
331} 337}
332 338
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 56c07e3fdb33..972eed2ef787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -236,7 +236,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
236 ring->adev = adev; 236 ring->adev = adev;
237 ring->idx = adev->num_rings++; 237 ring->idx = adev->num_rings++;
238 adev->rings[ring->idx] = ring; 238 adev->rings[ring->idx] = ring;
239 r = amdgpu_fence_driver_init_ring(ring); 239 r = amdgpu_fence_driver_init_ring(ring,
240 amdgpu_sched_hw_submission);
240 if (r) 241 if (r)
241 return r; 242 return r;
242 } 243 }
@@ -352,30 +353,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
352 } 353 }
353} 354}
354 355
355/**
356 * amdgpu_ring_from_fence - get ring from fence
357 *
358 * @f: fence structure
359 *
360 * Extract the ring a fence belongs to. Handles both scheduler as
361 * well as hardware fences.
362 */
363struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
364{
365 struct amdgpu_fence *a_fence;
366 struct amd_sched_fence *s_fence;
367
368 s_fence = to_amd_sched_fence(f);
369 if (s_fence)
370 return container_of(s_fence->sched, struct amdgpu_ring, sched);
371
372 a_fence = to_amdgpu_fence(f);
373 if (a_fence)
374 return a_fence->ring;
375
376 return NULL;
377}
378
379/* 356/*
380 * Debugfs info 357 * Debugfs info
381 */ 358 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 2faf03bcda21..8bf84efafb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -60,9 +60,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
60 sa_manager->align = align; 60 sa_manager->align = align;
61 sa_manager->hole = &sa_manager->olist; 61 sa_manager->hole = &sa_manager->olist;
62 INIT_LIST_HEAD(&sa_manager->olist); 62 INIT_LIST_HEAD(&sa_manager->olist);
63 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 63 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
64 INIT_LIST_HEAD(&sa_manager->flist[i]); 64 INIT_LIST_HEAD(&sa_manager->flist[i]);
65 }
66 65
67 r = amdgpu_bo_create(adev, size, align, true, domain, 66 r = amdgpu_bo_create(adev, size, align, true, domain,
68 0, NULL, NULL, &sa_manager->bo); 67 0, NULL, NULL, &sa_manager->bo);
@@ -228,11 +227,9 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
228 unsigned soffset, eoffset, wasted; 227 unsigned soffset, eoffset, wasted;
229 int i; 228 int i;
230 229
231 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 230 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
232 if (!list_empty(&sa_manager->flist[i])) { 231 if (!list_empty(&sa_manager->flist[i]))
233 return true; 232 return true;
234 }
235 }
236 233
237 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 234 soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
238 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 235 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
@@ -265,12 +262,11 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
265 /* go over all fence list and try to find the closest sa_bo 262 /* go over all fence list and try to find the closest sa_bo
266 * of the current last 263 * of the current last
267 */ 264 */
268 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 265 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
269 struct amdgpu_sa_bo *sa_bo; 266 struct amdgpu_sa_bo *sa_bo;
270 267
271 if (list_empty(&sa_manager->flist[i])) { 268 if (list_empty(&sa_manager->flist[i]))
272 continue; 269 continue;
273 }
274 270
275 sa_bo = list_first_entry(&sa_manager->flist[i], 271 sa_bo = list_first_entry(&sa_manager->flist[i],
276 struct amdgpu_sa_bo, flist); 272 struct amdgpu_sa_bo, flist);
@@ -299,7 +295,9 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
299 } 295 }
300 296
301 if (best_bo) { 297 if (best_bo) {
302 uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx; 298 uint32_t idx = best_bo->fence->context;
299
300 idx %= AMDGPU_SA_NUM_FENCE_LISTS;
303 ++tries[idx]; 301 ++tries[idx];
304 sa_manager->hole = best_bo->olist.prev; 302 sa_manager->hole = best_bo->olist.prev;
305 303
@@ -315,8 +313,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
315 struct amdgpu_sa_bo **sa_bo, 313 struct amdgpu_sa_bo **sa_bo,
316 unsigned size, unsigned align) 314 unsigned size, unsigned align)
317{ 315{
318 struct fence *fences[AMDGPU_MAX_RINGS]; 316 struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
319 unsigned tries[AMDGPU_MAX_RINGS]; 317 unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
320 unsigned count; 318 unsigned count;
321 int i, r; 319 int i, r;
322 signed long t; 320 signed long t;
@@ -338,7 +336,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
338 336
339 spin_lock(&sa_manager->wq.lock); 337 spin_lock(&sa_manager->wq.lock);
340 do { 338 do {
341 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 339 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
342 fences[i] = NULL; 340 fences[i] = NULL;
343 tries[i] = 0; 341 tries[i] = 0;
344 } 342 }
@@ -355,7 +353,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
355 /* see if we can skip over some allocations */ 353 /* see if we can skip over some allocations */
356 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 354 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
357 355
358 for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i) 356 for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
359 if (fences[i]) 357 if (fences[i])
360 fences[count++] = fence_get(fences[i]); 358 fences[count++] = fence_get(fences[i]);
361 359
@@ -397,8 +395,9 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
397 spin_lock(&sa_manager->wq.lock); 395 spin_lock(&sa_manager->wq.lock);
398 if (fence && !fence_is_signaled(fence)) { 396 if (fence && !fence_is_signaled(fence)) {
399 uint32_t idx; 397 uint32_t idx;
398
400 (*sa_bo)->fence = fence_get(fence); 399 (*sa_bo)->fence = fence_get(fence);
401 idx = amdgpu_ring_from_fence(fence)->idx; 400 idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
402 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 401 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
403 } else { 402 } else {
404 amdgpu_sa_bo_remove_locked(*sa_bo); 403 amdgpu_sa_bo_remove_locked(*sa_bo);
@@ -410,25 +409,6 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
410 409
411#if defined(CONFIG_DEBUG_FS) 410#if defined(CONFIG_DEBUG_FS)
412 411
413static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
414{
415 struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
416 struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
417
418 if (a_fence)
419 seq_printf(m, " protected by 0x%016llx on ring %d",
420 a_fence->seq, a_fence->ring->idx);
421
422 if (s_fence) {
423 struct amdgpu_ring *ring;
424
425
426 ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
427 seq_printf(m, " protected by 0x%016x on ring %d",
428 s_fence->base.seqno, ring->idx);
429 }
430}
431
432void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 412void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
433 struct seq_file *m) 413 struct seq_file *m)
434{ 414{
@@ -445,8 +425,11 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
445 } 425 }
446 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 426 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
447 soffset, eoffset, eoffset - soffset); 427 soffset, eoffset, eoffset - soffset);
428
448 if (i->fence) 429 if (i->fence)
449 amdgpu_sa_bo_dump_fence(i->fence, m); 430 seq_printf(m, " protected by 0x%08x on context %d",
431 i->fence->seqno, i->fence->context);
432
450 seq_printf(m, "\n"); 433 seq_printf(m, "\n");
451 } 434 }
452 spin_unlock(&sa_manager->wq.lock); 435 spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e5f0a5e29551..c1a581044417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -539,13 +539,6 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
539 return -EINVAL; 539 return -EINVAL;
540 } 540 }
541 541
542 r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
543 MAX_SCHEDULE_TIMEOUT);
544 if (r < 0) {
545 DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
546 return r;
547 }
548
549 r = amdgpu_bo_kmap(bo, &ptr); 542 r = amdgpu_bo_kmap(bo, &ptr);
550 if (r) { 543 if (r) {
551 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); 544 DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
@@ -887,6 +880,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
887 880
888 if (direct) { 881 if (direct) {
889 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 882 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
883 job->fence = f;
890 if (r) 884 if (r)
891 goto err_free; 885 goto err_free;
892 886
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 6d191fb40b38..4bec0c108cea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -426,6 +426,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
426 ib->ptr[i] = 0x0; 426 ib->ptr[i] = 0x0;
427 427
428 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 428 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
429 job->fence = f;
429 if (r) 430 if (r)
430 goto err; 431 goto err;
431 432
@@ -487,6 +488,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
487 488
488 if (direct) { 489 if (direct) {
489 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); 490 r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
491 job->fence = f;
490 if (r) 492 if (r)
491 goto err; 493 goto err;
492 494
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 02122197d2b6..d3ac3298fba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -669,7 +669,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
669 669
670err1: 670err1:
671 fence_put(f); 671 fence_put(f);
672 amdgpu_ib_free(adev, &ib); 672 amdgpu_ib_free(adev, &ib, NULL);
673 fence_put(f);
673err0: 674err0:
674 amdgpu_wb_free(adev, index); 675 amdgpu_wb_free(adev, index);
675 return r; 676 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a06045f040f1..bb8709066fd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2163,7 +2163,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
2163 2163
2164err2: 2164err2:
2165 fence_put(f); 2165 fence_put(f);
2166 amdgpu_ib_free(adev, &ib); 2166 amdgpu_ib_free(adev, &ib, NULL);
2167 fence_put(f);
2167err1: 2168err1:
2168 amdgpu_gfx_scratch_free(adev, scratch); 2169 amdgpu_gfx_scratch_free(adev, scratch);
2169 return r; 2170 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 509d0baaeaae..f0c7b3596480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -732,7 +732,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
732 } 732 }
733err2: 733err2:
734 fence_put(f); 734 fence_put(f);
735 amdgpu_ib_free(adev, &ib); 735 amdgpu_ib_free(adev, &ib, NULL);
736 fence_put(f);
736err1: 737err1:
737 amdgpu_gfx_scratch_free(adev, scratch); 738 amdgpu_gfx_scratch_free(adev, scratch);
738 return r; 739 return r;
@@ -1289,7 +1290,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1289 1290
1290fail: 1291fail:
1291 fence_put(f); 1292 fence_put(f);
1292 amdgpu_ib_free(adev, &ib); 1293 amdgpu_ib_free(adev, &ib, NULL);
1294 fence_put(f);
1293 1295
1294 return r; 1296 return r;
1295} 1297}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index ab9ff89a3096..6e0a86a563f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
32#include "oss/oss_2_4_d.h" 32#include "oss/oss_2_4_d.h"
33#include "oss/oss_2_4_sh_mask.h" 33#include "oss/oss_2_4_sh_mask.h"
34 34
35#include "gmc/gmc_8_1_d.h" 35#include "gmc/gmc_7_1_d.h"
36#include "gmc/gmc_8_1_sh_mask.h" 36#include "gmc/gmc_7_1_sh_mask.h"
37 37
38#include "gca/gfx_8_0_d.h" 38#include "gca/gfx_8_0_d.h"
39#include "gca/gfx_8_0_enum.h" 39#include "gca/gfx_8_0_enum.h"
@@ -727,7 +727,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
727 727
728err1: 728err1:
729 fence_put(f); 729 fence_put(f);
730 amdgpu_ib_free(adev, &ib); 730 amdgpu_ib_free(adev, &ib, NULL);
731 fence_put(f);
731err0: 732err0:
732 amdgpu_wb_free(adev, index); 733 amdgpu_wb_free(adev, index);
733 return r; 734 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4c24c371fec7..8c8ca98dd129 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -878,7 +878,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
878 } 878 }
879err1: 879err1:
880 fence_put(f); 880 fence_put(f);
881 amdgpu_ib_free(adev, &ib); 881 amdgpu_ib_free(adev, &ib, NULL);
882 fence_put(f);
882err0: 883err0:
883 amdgpu_wb_free(adev, index); 884 amdgpu_wb_free(adev, index);
884 return r; 885 return r;