diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 97 |
2 files changed, 31 insertions, 68 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2fea597f8b6c..c91156c7bda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -392,8 +392,8 @@ struct amdgpu_fence_driver { | |||
392 | struct amdgpu_irq_src *irq_src; | 392 | struct amdgpu_irq_src *irq_src; |
393 | unsigned irq_type; | 393 | unsigned irq_type; |
394 | struct timer_list fallback_timer; | 394 | struct timer_list fallback_timer; |
395 | wait_queue_head_t fence_queue; | ||
396 | unsigned num_fences_mask; | 395 | unsigned num_fences_mask; |
396 | spinlock_t lock; | ||
397 | struct fence **fences; | 397 | struct fence **fences; |
398 | }; | 398 | }; |
399 | 399 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d5bdd9633c85..c5980c4133a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -53,8 +53,6 @@ struct amdgpu_fence { | |||
53 | /* RB, DMA, etc. */ | 53 | /* RB, DMA, etc. */ |
54 | struct amdgpu_ring *ring; | 54 | struct amdgpu_ring *ring; |
55 | uint64_t seq; | 55 | uint64_t seq; |
56 | |||
57 | wait_queue_t fence_wake; | ||
58 | }; | 56 | }; |
59 | 57 | ||
60 | static struct kmem_cache *amdgpu_fence_slab; | 58 | static struct kmem_cache *amdgpu_fence_slab; |
@@ -124,7 +122,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) | |||
124 | { | 122 | { |
125 | struct amdgpu_device *adev = ring->adev; | 123 | struct amdgpu_device *adev = ring->adev; |
126 | struct amdgpu_fence *fence; | 124 | struct amdgpu_fence *fence; |
127 | struct fence *old, **ptr; | 125 | struct fence **ptr; |
128 | unsigned idx; | 126 | unsigned idx; |
129 | 127 | ||
130 | fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); | 128 | fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); |
@@ -134,7 +132,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) | |||
134 | fence->seq = ++ring->fence_drv.sync_seq; | 132 | fence->seq = ++ring->fence_drv.sync_seq; |
135 | fence->ring = ring; | 133 | fence->ring = ring; |
136 | fence_init(&fence->base, &amdgpu_fence_ops, | 134 | fence_init(&fence->base, &amdgpu_fence_ops, |
137 | &ring->fence_drv.fence_queue.lock, | 135 | &ring->fence_drv.lock, |
138 | adev->fence_context + ring->idx, | 136 | adev->fence_context + ring->idx, |
139 | fence->seq); | 137 | fence->seq); |
140 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 138 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
@@ -145,13 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) | |||
145 | /* This function can't be called concurrently anyway, otherwise | 143 | /* This function can't be called concurrently anyway, otherwise |
146 | * emitting the fence would mess up the hardware ring buffer. | 144 | * emitting the fence would mess up the hardware ring buffer. |
147 | */ | 145 | */ |
148 | old = rcu_dereference_protected(*ptr, 1); | 146 | BUG_ON(rcu_dereference_protected(*ptr, 1)); |
149 | 147 | ||
150 | rcu_assign_pointer(*ptr, fence_get(&fence->base)); | 148 | rcu_assign_pointer(*ptr, fence_get(&fence->base)); |
151 | 149 | ||
152 | BUG_ON(old && !fence_is_signaled(old)); | ||
153 | fence_put(old); | ||
154 | |||
155 | *f = &fence->base; | 150 | *f = &fence->base; |
156 | 151 | ||
157 | return 0; | 152 | return 0; |
@@ -181,11 +176,12 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) | |||
181 | */ | 176 | */ |
182 | void amdgpu_fence_process(struct amdgpu_ring *ring) | 177 | void amdgpu_fence_process(struct amdgpu_ring *ring) |
183 | { | 178 | { |
179 | struct amdgpu_fence_driver *drv = &ring->fence_drv; | ||
184 | uint64_t seq, last_seq, last_emitted; | 180 | uint64_t seq, last_seq, last_emitted; |
185 | bool wake = false; | 181 | int r; |
186 | 182 | ||
187 | last_seq = atomic64_read(&ring->fence_drv.last_seq); | ||
188 | do { | 183 | do { |
184 | last_seq = atomic64_read(&ring->fence_drv.last_seq); | ||
189 | last_emitted = ring->fence_drv.sync_seq; | 185 | last_emitted = ring->fence_drv.sync_seq; |
190 | seq = amdgpu_fence_read(ring); | 186 | seq = amdgpu_fence_read(ring); |
191 | seq |= last_seq & 0xffffffff00000000LL; | 187 | seq |= last_seq & 0xffffffff00000000LL; |
@@ -195,22 +191,32 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) | |||
195 | } | 191 | } |
196 | 192 | ||
197 | if (seq <= last_seq || seq > last_emitted) | 193 | if (seq <= last_seq || seq > last_emitted) |
198 | break; | 194 | return; |
199 | 195 | ||
200 | /* If we loop over we don't want to return without | 196 | } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); |
201 | * checking if a fence is signaled as it means that the | ||
202 | * seq we just read is different from the previous on. | ||
203 | */ | ||
204 | wake = true; | ||
205 | last_seq = seq; | ||
206 | |||
207 | } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); | ||
208 | 197 | ||
209 | if (seq < last_emitted) | 198 | if (seq < last_emitted) |
210 | amdgpu_fence_schedule_fallback(ring); | 199 | amdgpu_fence_schedule_fallback(ring); |
211 | 200 | ||
212 | if (wake) | 201 | while (last_seq != seq) { |
213 | wake_up_all(&ring->fence_drv.fence_queue); | 202 | struct fence *fence, **ptr; |
203 | |||
204 | ptr = &drv->fences[++last_seq & drv->num_fences_mask]; | ||
205 | |||
206 | /* There is always exactly one thread signaling this fence slot */ | ||
207 | fence = rcu_dereference_protected(*ptr, 1); | ||
208 | rcu_assign_pointer(*ptr, NULL); | ||
209 | |||
210 | BUG_ON(!fence); | ||
211 | |||
212 | r = fence_signal(fence); | ||
213 | if (!r) | ||
214 | FENCE_TRACE(fence, "signaled from irq context\n"); | ||
215 | else | ||
216 | BUG(); | ||
217 | |||
218 | fence_put(fence); | ||
219 | } | ||
214 | } | 220 | } |
215 | 221 | ||
216 | /** | 222 | /** |
@@ -356,8 +362,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, | |||
356 | setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, | 362 | setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, |
357 | (unsigned long)ring); | 363 | (unsigned long)ring); |
358 | 364 | ||
359 | init_waitqueue_head(&ring->fence_drv.fence_queue); | ||
360 | ring->fence_drv.num_fences_mask = num_hw_submission - 1; | 365 | ring->fence_drv.num_fences_mask = num_hw_submission - 1; |
366 | spin_lock_init(&ring->fence_drv.lock); | ||
361 | ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *), | 367 | ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *), |
362 | GFP_KERNEL); | 368 | GFP_KERNEL); |
363 | if (!ring->fence_drv.fences) | 369 | if (!ring->fence_drv.fences) |
@@ -436,7 +442,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) | |||
436 | /* no need to trigger GPU reset as we are unloading */ | 442 | /* no need to trigger GPU reset as we are unloading */ |
437 | amdgpu_fence_driver_force_completion(adev); | 443 | amdgpu_fence_driver_force_completion(adev); |
438 | } | 444 | } |
439 | wake_up_all(&ring->fence_drv.fence_queue); | ||
440 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, | 445 | amdgpu_irq_put(adev, ring->fence_drv.irq_src, |
441 | ring->fence_drv.irq_type); | 446 | ring->fence_drv.irq_type); |
442 | amd_sched_fini(&ring->sched); | 447 | amd_sched_fini(&ring->sched); |
@@ -569,42 +574,6 @@ static bool amdgpu_fence_is_signaled(struct fence *f) | |||
569 | } | 574 | } |
570 | 575 | ||
571 | /** | 576 | /** |
572 | * amdgpu_fence_check_signaled - callback from fence_queue | ||
573 | * | ||
574 | * this function is called with fence_queue lock held, which is also used | ||
575 | * for the fence locking itself, so unlocked variants are used for | ||
576 | * fence_signal, and remove_wait_queue. | ||
577 | */ | ||
578 | static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) | ||
579 | { | ||
580 | struct amdgpu_fence *fence; | ||
581 | struct amdgpu_device *adev; | ||
582 | u64 seq; | ||
583 | int ret; | ||
584 | |||
585 | fence = container_of(wait, struct amdgpu_fence, fence_wake); | ||
586 | adev = fence->ring->adev; | ||
587 | |||
588 | /* | ||
589 | * We cannot use amdgpu_fence_process here because we're already | ||
590 | * in the waitqueue, in a call from wake_up_all. | ||
591 | */ | ||
592 | seq = atomic64_read(&fence->ring->fence_drv.last_seq); | ||
593 | if (seq >= fence->seq) { | ||
594 | ret = fence_signal_locked(&fence->base); | ||
595 | if (!ret) | ||
596 | FENCE_TRACE(&fence->base, "signaled from irq context\n"); | ||
597 | else | ||
598 | FENCE_TRACE(&fence->base, "was already signaled\n"); | ||
599 | |||
600 | __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake); | ||
601 | fence_put(&fence->base); | ||
602 | } else | ||
603 | FENCE_TRACE(&fence->base, "pending\n"); | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | /** | ||
608 | * amdgpu_fence_enable_signaling - enable signalling on fence | 577 | * amdgpu_fence_enable_signaling - enable signalling on fence |
609 | * @fence: fence | 578 | * @fence: fence |
610 | * | 579 | * |
@@ -617,17 +586,11 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) | |||
617 | struct amdgpu_fence *fence = to_amdgpu_fence(f); | 586 | struct amdgpu_fence *fence = to_amdgpu_fence(f); |
618 | struct amdgpu_ring *ring = fence->ring; | 587 | struct amdgpu_ring *ring = fence->ring; |
619 | 588 | ||
620 | if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) | ||
621 | return false; | ||
622 | |||
623 | fence->fence_wake.flags = 0; | ||
624 | fence->fence_wake.private = NULL; | ||
625 | fence->fence_wake.func = amdgpu_fence_check_signaled; | ||
626 | __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); | ||
627 | fence_get(f); | ||
628 | if (!timer_pending(&ring->fence_drv.fallback_timer)) | 589 | if (!timer_pending(&ring->fence_drv.fallback_timer)) |
629 | amdgpu_fence_schedule_fallback(ring); | 590 | amdgpu_fence_schedule_fallback(ring); |
591 | |||
630 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); | 592 | FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); |
593 | |||
631 | return true; | 594 | return true; |
632 | } | 595 | } |
633 | 596 | ||