aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2010-10-02 11:04:46 -0400
committerBen Skeggs <bskeggs@redhat.com>2010-10-04 19:59:28 -0400
commit8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba (patch)
tree1b572625051c09fccca446db2966f6b07fa837f8 /drivers/gpu
parent647988175234a733cc0d4cf968949344803a77a7 (diff)
drm/nv50: Fix large 3D performance regression caused by the interchannel sync patches.
Reported-by: Christoph Bumiller <e0425955@student.tuwien.ac.at> Signed-off-by: Francisco Jerez <currojerez@riseup.net> Tested-by: Maarten Maathuis <madman2003@gmail.com> Tested-by: Xavier Chantry <chantry.xavier@gmail.com> Tested-by: Ben Skeggs <bskeggs@redhat.com> Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c46
2 files changed, 40 insertions, 7 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index a308c132c19..3a07e580d27 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -1473,6 +1473,7 @@ nv_match_device(struct drm_device *dev, unsigned device,
1473#define NV_SW_SEMAPHORE_OFFSET 0x00000064 1473#define NV_SW_SEMAPHORE_OFFSET 0x00000064
1474#define NV_SW_SEMAPHORE_ACQUIRE 0x00000068 1474#define NV_SW_SEMAPHORE_ACQUIRE 0x00000068
1475#define NV_SW_SEMAPHORE_RELEASE 0x0000006c 1475#define NV_SW_SEMAPHORE_RELEASE 0x0000006c
1476#define NV_SW_YIELD 0x00000080
1476#define NV_SW_DMA_VBLSEM 0x0000018c 1477#define NV_SW_DMA_VBLSEM 0x0000018c
1477#define NV_SW_VBLSEM_OFFSET 0x00000400 1478#define NV_SW_VBLSEM_OFFSET 0x00000400
1478#define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404 1479#define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index f42675cc9d1..441b12420bb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -308,21 +308,54 @@ emit_semaphore(struct nouveau_channel *chan, int method,
308{ 308{
309 struct drm_nouveau_private *dev_priv = sema->dev->dev_private; 309 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
310 struct nouveau_fence *fence; 310 struct nouveau_fence *fence;
311 bool smart = (dev_priv->card_type >= NV_50);
311 int ret; 312 int ret;
312 313
313 ret = RING_SPACE(chan, dev_priv->card_type >= NV_50 ? 6 : 4); 314 ret = RING_SPACE(chan, smart ? 8 : 4);
314 if (ret) 315 if (ret)
315 return ret; 316 return ret;
316 317
317 if (dev_priv->card_type >= NV_50) { 318 if (smart) {
318 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); 319 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
319 OUT_RING(chan, NvSema); 320 OUT_RING(chan, NvSema);
320 } 321 }
321 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); 322 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
322 OUT_RING(chan, sema->mem->start); 323 OUT_RING(chan, sema->mem->start);
324
325 if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
326 /*
327 * NV50 tries to be too smart and context-switch
328 * between semaphores instead of doing a "first come,
329 * first served" strategy like previous cards
330 * do.
331 *
332 * That's bad because the ACQUIRE latency can get as
333 * large as the PFIFO context time slice in the
334 * typical DRI2 case where you have several
335 * outstanding semaphores at the same moment.
336 *
337 * If we're going to ACQUIRE, force the card to
338 * context switch before, just in case the matching
339 * RELEASE is already scheduled to be executed in
340 * another channel.
341 */
342 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
343 OUT_RING(chan, 0);
344 }
345
323 BEGIN_RING(chan, NvSubSw, method, 1); 346 BEGIN_RING(chan, NvSubSw, method, 1);
324 OUT_RING(chan, 1); 347 OUT_RING(chan, 1);
325 348
349 if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
350 /*
351 * Force the card to context switch, there may be
352 * another channel waiting for the semaphore we just
353 * released.
354 */
355 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
356 OUT_RING(chan, 0);
357 }
358
326 /* Delay semaphore destruction until its work is done */ 359 /* Delay semaphore destruction until its work is done */
327 ret = nouveau_fence_new(chan, &fence, true); 360 ret = nouveau_fence_new(chan, &fence, true);
328 if (ret) 361 if (ret)
@@ -355,14 +388,13 @@ nouveau_fence_sync(struct nouveau_fence *fence,
355 return nouveau_fence_wait(fence, NULL, false, false); 388 return nouveau_fence_wait(fence, NULL, false, false);
356 } 389 }
357 390
358 /* Signal the semaphore from chan */
359 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
360 if (ret)
361 goto out;
362
363 /* Make wchan wait until it gets signalled */ 391 /* Make wchan wait until it gets signalled */
364 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); 392 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
393 if (ret)
394 goto out;
365 395
396 /* Signal the semaphore from chan */
397 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
366out: 398out:
367 kref_put(&sema->ref, free_semaphore); 399 kref_put(&sema->ref, free_semaphore);
368 return ret; 400 return ret;