aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2016-03-14 10:46:06 -0400
committerAlex Deucher <alexander.deucher@amd.com>2016-03-16 18:00:10 -0400
commit742c085fa86345ae9de259c7f15c652322da4e83 (patch)
treec76c88963dc7c00ed38d23ae01d02fd4dc9c3708 /drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
parent480d0bf07ecf27da0a5f12774d9642072c364fa9 (diff)
drm/amdgpu: switch back to 32bit hw fences v2
We don't need to extend them to 64bits any more, so avoid the extra overhead. v2: update commit message. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c49
1 files changed, 19 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index da9a155a622c..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -52,7 +52,6 @@ struct amdgpu_fence {
52 52
53 /* RB, DMA, etc. */ 53 /* RB, DMA, etc. */
54 struct amdgpu_ring *ring; 54 struct amdgpu_ring *ring;
55 uint64_t seq;
56}; 55};
57 56
58static struct kmem_cache *amdgpu_fence_slab; 57static struct kmem_cache *amdgpu_fence_slab;
@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
104 if (drv->cpu_addr) 103 if (drv->cpu_addr)
105 seq = le32_to_cpu(*drv->cpu_addr); 104 seq = le32_to_cpu(*drv->cpu_addr);
106 else 105 else
107 seq = lower_32_bits(atomic64_read(&drv->last_seq)); 106 seq = atomic_read(&drv->last_seq);
108 107
109 return seq; 108 return seq;
110} 109}
@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
123 struct amdgpu_device *adev = ring->adev; 122 struct amdgpu_device *adev = ring->adev;
124 struct amdgpu_fence *fence; 123 struct amdgpu_fence *fence;
125 struct fence **ptr; 124 struct fence **ptr;
126 unsigned idx; 125 uint32_t seq;
127 126
128 fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); 127 fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
129 if (fence == NULL) 128 if (fence == NULL)
130 return -ENOMEM; 129 return -ENOMEM;
131 130
132 fence->seq = ++ring->fence_drv.sync_seq; 131 seq = ++ring->fence_drv.sync_seq;
133 fence->ring = ring; 132 fence->ring = ring;
134 fence_init(&fence->base, &amdgpu_fence_ops, 133 fence_init(&fence->base, &amdgpu_fence_ops,
135 &ring->fence_drv.lock, 134 &ring->fence_drv.lock,
136 adev->fence_context + ring->idx, 135 adev->fence_context + ring->idx,
137 fence->seq); 136 seq);
138 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 137 amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
139 fence->seq, AMDGPU_FENCE_FLAG_INT); 138 seq, AMDGPU_FENCE_FLAG_INT);
140 139
141 idx = fence->seq & ring->fence_drv.num_fences_mask; 140 ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
142 ptr = &ring->fence_drv.fences[idx];
143 /* This function can't be called concurrently anyway, otherwise 141 /* This function can't be called concurrently anyway, otherwise
144 * emitting the fence would mess up the hardware ring buffer. 142 * emitting the fence would mess up the hardware ring buffer.
145 */ 143 */
@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
177void amdgpu_fence_process(struct amdgpu_ring *ring) 175void amdgpu_fence_process(struct amdgpu_ring *ring)
178{ 176{
179 struct amdgpu_fence_driver *drv = &ring->fence_drv; 177 struct amdgpu_fence_driver *drv = &ring->fence_drv;
180 uint64_t seq, last_seq, last_emitted; 178 uint32_t seq, last_seq;
181 int r; 179 int r;
182 180
183 do { 181 do {
184 last_seq = atomic64_read(&ring->fence_drv.last_seq); 182 last_seq = atomic_read(&ring->fence_drv.last_seq);
185 last_emitted = ring->fence_drv.sync_seq;
186 seq = amdgpu_fence_read(ring); 183 seq = amdgpu_fence_read(ring);
187 seq |= last_seq & 0xffffffff00000000LL;
188 if (seq < last_seq) {
189 seq &= 0xffffffff;
190 seq |= last_emitted & 0xffffffff00000000LL;
191 }
192 184
193 } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); 185 } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
194 186
195 if (seq < last_emitted) 187 if (seq != ring->fence_drv.sync_seq)
196 amdgpu_fence_schedule_fallback(ring); 188 amdgpu_fence_schedule_fallback(ring);
197 189
198 while (last_seq != seq) { 190 while (last_seq != seq) {
@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
279 * but it's ok to report slightly wrong fence count here. 271 * but it's ok to report slightly wrong fence count here.
280 */ 272 */
281 amdgpu_fence_process(ring); 273 amdgpu_fence_process(ring);
282 emitted = ring->fence_drv.sync_seq 274 emitted = 0x100000000ull;
283 - atomic64_read(&ring->fence_drv.last_seq); 275 emitted -= atomic_read(&ring->fence_drv.last_seq);
284 /* to avoid 32bits warp around */ 276 emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
285 if (emitted > 0x10000000) 277 return lower_32_bits(emitted);
286 emitted = 0x10000000;
287
288 return (unsigned)emitted;
289} 278}
290 279
291/** 280/**
@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
317 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; 306 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
318 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; 307 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
319 } 308 }
320 amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); 309 amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
321 amdgpu_irq_get(adev, irq_src, irq_type); 310 amdgpu_irq_get(adev, irq_src, irq_type);
322 311
323 ring->fence_drv.irq_src = irq_src; 312 ring->fence_drv.irq_src = irq_src;
@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
353 ring->fence_drv.cpu_addr = NULL; 342 ring->fence_drv.cpu_addr = NULL;
354 ring->fence_drv.gpu_addr = 0; 343 ring->fence_drv.gpu_addr = 0;
355 ring->fence_drv.sync_seq = 0; 344 ring->fence_drv.sync_seq = 0;
356 atomic64_set(&ring->fence_drv.last_seq, 0); 345 atomic_set(&ring->fence_drv.last_seq, 0);
357 ring->fence_drv.initialized = false; 346 ring->fence_drv.initialized = false;
358 347
359 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 348 setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
621 amdgpu_fence_process(ring); 610 amdgpu_fence_process(ring);
622 611
623 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); 612 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
624 seq_printf(m, "Last signaled fence 0x%016llx\n", 613 seq_printf(m, "Last signaled fence 0x%08x\n",
625 (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); 614 atomic_read(&ring->fence_drv.last_seq));
626 seq_printf(m, "Last emitted 0x%016llx\n", 615 seq_printf(m, "Last emitted 0x%08x\n",
627 ring->fence_drv.sync_seq); 616 ring->fence_drv.sync_seq);
628 } 617 }
629 return 0; 618 return 0;