diff options
author | Christian König <christian.koenig@amd.com> | 2016-03-14 10:46:06 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2016-03-16 18:00:10 -0400 |
commit | 742c085fa86345ae9de259c7f15c652322da4e83 (patch) | |
tree | c76c88963dc7c00ed38d23ae01d02fd4dc9c3708 /drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |
parent | 480d0bf07ecf27da0a5f12774d9642072c364fa9 (diff) |
drm/amdgpu: switch back to 32bit hw fences v2
We don't need to extend them to 64bits any more, so avoid the extra overhead.
v2: update commit message.
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 49 |
1 files changed, 19 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index da9a155a622c..4303b447efe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -52,7 +52,6 @@ struct amdgpu_fence { | |||
52 | 52 | ||
53 | /* RB, DMA, etc. */ | 53 | /* RB, DMA, etc. */ |
54 | struct amdgpu_ring *ring; | 54 | struct amdgpu_ring *ring; |
55 | uint64_t seq; | ||
56 | }; | 55 | }; |
57 | 56 | ||
58 | static struct kmem_cache *amdgpu_fence_slab; | 57 | static struct kmem_cache *amdgpu_fence_slab; |
@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) | |||
104 | if (drv->cpu_addr) | 103 | if (drv->cpu_addr) |
105 | seq = le32_to_cpu(*drv->cpu_addr); | 104 | seq = le32_to_cpu(*drv->cpu_addr); |
106 | else | 105 | else |
107 | seq = lower_32_bits(atomic64_read(&drv->last_seq)); | 106 | seq = atomic_read(&drv->last_seq); |
108 | 107 | ||
109 | return seq; | 108 | return seq; |
110 | } | 109 | } |
@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) | |||
123 | struct amdgpu_device *adev = ring->adev; | 122 | struct amdgpu_device *adev = ring->adev; |
124 | struct amdgpu_fence *fence; | 123 | struct amdgpu_fence *fence; |
125 | struct fence **ptr; | 124 | struct fence **ptr; |
126 | unsigned idx; | 125 | uint32_t seq; |
127 | 126 | ||
128 | fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); | 127 | fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); |
129 | if (fence == NULL) | 128 | if (fence == NULL) |
130 | return -ENOMEM; | 129 | return -ENOMEM; |
131 | 130 | ||
132 | fence->seq = ++ring->fence_drv.sync_seq; | 131 | seq = ++ring->fence_drv.sync_seq; |
133 | fence->ring = ring; | 132 | fence->ring = ring; |
134 | fence_init(&fence->base, &amdgpu_fence_ops, | 133 | fence_init(&fence->base, &amdgpu_fence_ops, |
135 | &ring->fence_drv.lock, | 134 | &ring->fence_drv.lock, |
136 | adev->fence_context + ring->idx, | 135 | adev->fence_context + ring->idx, |
137 | fence->seq); | 136 | seq); |
138 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, | 137 | amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, |
139 | fence->seq, AMDGPU_FENCE_FLAG_INT); | 138 | seq, AMDGPU_FENCE_FLAG_INT); |
140 | 139 | ||
141 | idx = fence->seq & ring->fence_drv.num_fences_mask; | 140 | ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; |
142 | ptr = &ring->fence_drv.fences[idx]; | ||
143 | /* This function can't be called concurrently anyway, otherwise | 141 | /* This function can't be called concurrently anyway, otherwise |
144 | * emitting the fence would mess up the hardware ring buffer. | 142 | * emitting the fence would mess up the hardware ring buffer. |
145 | */ | 143 | */ |
@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) | |||
177 | void amdgpu_fence_process(struct amdgpu_ring *ring) | 175 | void amdgpu_fence_process(struct amdgpu_ring *ring) |
178 | { | 176 | { |
179 | struct amdgpu_fence_driver *drv = &ring->fence_drv; | 177 | struct amdgpu_fence_driver *drv = &ring->fence_drv; |
180 | uint64_t seq, last_seq, last_emitted; | 178 | uint32_t seq, last_seq; |
181 | int r; | 179 | int r; |
182 | 180 | ||
183 | do { | 181 | do { |
184 | last_seq = atomic64_read(&ring->fence_drv.last_seq); | 182 | last_seq = atomic_read(&ring->fence_drv.last_seq); |
185 | last_emitted = ring->fence_drv.sync_seq; | ||
186 | seq = amdgpu_fence_read(ring); | 183 | seq = amdgpu_fence_read(ring); |
187 | seq |= last_seq & 0xffffffff00000000LL; | ||
188 | if (seq < last_seq) { | ||
189 | seq &= 0xffffffff; | ||
190 | seq |= last_emitted & 0xffffffff00000000LL; | ||
191 | } | ||
192 | 184 | ||
193 | } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); | 185 | } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); |
194 | 186 | ||
195 | if (seq < last_emitted) | 187 | if (seq != ring->fence_drv.sync_seq) |
196 | amdgpu_fence_schedule_fallback(ring); | 188 | amdgpu_fence_schedule_fallback(ring); |
197 | 189 | ||
198 | while (last_seq != seq) { | 190 | while (last_seq != seq) { |
@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) | |||
279 | * but it's ok to report slightly wrong fence count here. | 271 | * but it's ok to report slightly wrong fence count here. |
280 | */ | 272 | */ |
281 | amdgpu_fence_process(ring); | 273 | amdgpu_fence_process(ring); |
282 | emitted = ring->fence_drv.sync_seq | 274 | emitted = 0x100000000ull; |
283 | - atomic64_read(&ring->fence_drv.last_seq); | 275 | emitted -= atomic_read(&ring->fence_drv.last_seq); |
284 | /* to avoid 32bits warp around */ | 276 | emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); |
285 | if (emitted > 0x10000000) | 277 | return lower_32_bits(emitted); |
286 | emitted = 0x10000000; | ||
287 | |||
288 | return (unsigned)emitted; | ||
289 | } | 278 | } |
290 | 279 | ||
291 | /** | 280 | /** |
@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, | |||
317 | ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; | 306 | ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; |
318 | ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; | 307 | ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; |
319 | } | 308 | } |
320 | amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); | 309 | amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); |
321 | amdgpu_irq_get(adev, irq_src, irq_type); | 310 | amdgpu_irq_get(adev, irq_src, irq_type); |
322 | 311 | ||
323 | ring->fence_drv.irq_src = irq_src; | 312 | ring->fence_drv.irq_src = irq_src; |
@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, | |||
353 | ring->fence_drv.cpu_addr = NULL; | 342 | ring->fence_drv.cpu_addr = NULL; |
354 | ring->fence_drv.gpu_addr = 0; | 343 | ring->fence_drv.gpu_addr = 0; |
355 | ring->fence_drv.sync_seq = 0; | 344 | ring->fence_drv.sync_seq = 0; |
356 | atomic64_set(&ring->fence_drv.last_seq, 0); | 345 | atomic_set(&ring->fence_drv.last_seq, 0); |
357 | ring->fence_drv.initialized = false; | 346 | ring->fence_drv.initialized = false; |
358 | 347 | ||
359 | setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, | 348 | setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, |
@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) | |||
621 | amdgpu_fence_process(ring); | 610 | amdgpu_fence_process(ring); |
622 | 611 | ||
623 | seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); | 612 | seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); |
624 | seq_printf(m, "Last signaled fence 0x%016llx\n", | 613 | seq_printf(m, "Last signaled fence 0x%08x\n", |
625 | (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); | 614 | atomic_read(&ring->fence_drv.last_seq)); |
626 | seq_printf(m, "Last emitted 0x%016llx\n", | 615 | seq_printf(m, "Last emitted 0x%08x\n", |
627 | ring->fence_drv.sync_seq); | 616 | ring->fence_drv.sync_seq); |
628 | } | 617 | } |
629 | return 0; | 618 | return 0; |