diff options
author | Francisco Jerez <currojerez@riseup.net> | 2010-10-02 11:04:46 -0400 |
---|---|---|
committer | Ben Skeggs <bskeggs@redhat.com> | 2010-10-04 19:59:28 -0400 |
commit | 8af29ccd7917ab448ea7b5cf581fa7b2b4ea3cba (patch) | |
tree | 1b572625051c09fccca446db2966f6b07fa837f8 /drivers/gpu | |
parent | 647988175234a733cc0d4cf968949344803a77a7 (diff) |
drm/nv50: Fix large 3D performance regression caused by the interchannel sync patches.
Reported-by: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Tested-by: Maarten Maathuis <madman2003@gmail.com>
Tested-by: Xavier Chantry <chantry.xavier@gmail.com>
Tested-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_fence.c | 46 |
2 files changed, 40 insertions, 7 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index a308c132c19..3a07e580d27 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h | |||
@@ -1473,6 +1473,7 @@ nv_match_device(struct drm_device *dev, unsigned device, | |||
1473 | #define NV_SW_SEMAPHORE_OFFSET 0x00000064 | 1473 | #define NV_SW_SEMAPHORE_OFFSET 0x00000064 |
1474 | #define NV_SW_SEMAPHORE_ACQUIRE 0x00000068 | 1474 | #define NV_SW_SEMAPHORE_ACQUIRE 0x00000068 |
1475 | #define NV_SW_SEMAPHORE_RELEASE 0x0000006c | 1475 | #define NV_SW_SEMAPHORE_RELEASE 0x0000006c |
1476 | #define NV_SW_YIELD 0x00000080 | ||
1476 | #define NV_SW_DMA_VBLSEM 0x0000018c | 1477 | #define NV_SW_DMA_VBLSEM 0x0000018c |
1477 | #define NV_SW_VBLSEM_OFFSET 0x00000400 | 1478 | #define NV_SW_VBLSEM_OFFSET 0x00000400 |
1478 | #define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404 | 1479 | #define NV_SW_VBLSEM_RELEASE_VALUE 0x00000404 |
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index f42675cc9d1..441b12420bb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c | |||
@@ -308,21 +308,54 @@ emit_semaphore(struct nouveau_channel *chan, int method, | |||
308 | { | 308 | { |
309 | struct drm_nouveau_private *dev_priv = sema->dev->dev_private; | 309 | struct drm_nouveau_private *dev_priv = sema->dev->dev_private; |
310 | struct nouveau_fence *fence; | 310 | struct nouveau_fence *fence; |
311 | bool smart = (dev_priv->card_type >= NV_50); | ||
311 | int ret; | 312 | int ret; |
312 | 313 | ||
313 | ret = RING_SPACE(chan, dev_priv->card_type >= NV_50 ? 6 : 4); | 314 | ret = RING_SPACE(chan, smart ? 8 : 4); |
314 | if (ret) | 315 | if (ret) |
315 | return ret; | 316 | return ret; |
316 | 317 | ||
317 | if (dev_priv->card_type >= NV_50) { | 318 | if (smart) { |
318 | BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); | 319 | BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1); |
319 | OUT_RING(chan, NvSema); | 320 | OUT_RING(chan, NvSema); |
320 | } | 321 | } |
321 | BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); | 322 | BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1); |
322 | OUT_RING(chan, sema->mem->start); | 323 | OUT_RING(chan, sema->mem->start); |
324 | |||
325 | if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) { | ||
326 | /* | ||
327 | * NV50 tries to be too smart and context-switch | ||
328 | * between semaphores instead of doing a "first come, | ||
329 | * first served" strategy like previous cards | ||
330 | * do. | ||
331 | * | ||
332 | * That's bad because the ACQUIRE latency can get as | ||
333 | * large as the PFIFO context time slice in the | ||
334 | * typical DRI2 case where you have several | ||
335 | * outstanding semaphores at the same moment. | ||
336 | * | ||
337 | * If we're going to ACQUIRE, force the card to | ||
338 | * context switch before, just in case the matching | ||
339 | * RELEASE is already scheduled to be executed in | ||
340 | * another channel. | ||
341 | */ | ||
342 | BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); | ||
343 | OUT_RING(chan, 0); | ||
344 | } | ||
345 | |||
323 | BEGIN_RING(chan, NvSubSw, method, 1); | 346 | BEGIN_RING(chan, NvSubSw, method, 1); |
324 | OUT_RING(chan, 1); | 347 | OUT_RING(chan, 1); |
325 | 348 | ||
349 | if (smart && method == NV_SW_SEMAPHORE_RELEASE) { | ||
350 | /* | ||
351 | * Force the card to context switch, there may be | ||
352 | * another channel waiting for the semaphore we just | ||
353 | * released. | ||
354 | */ | ||
355 | BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1); | ||
356 | OUT_RING(chan, 0); | ||
357 | } | ||
358 | |||
326 | /* Delay semaphore destruction until its work is done */ | 359 | /* Delay semaphore destruction until its work is done */ |
327 | ret = nouveau_fence_new(chan, &fence, true); | 360 | ret = nouveau_fence_new(chan, &fence, true); |
328 | if (ret) | 361 | if (ret) |
@@ -355,14 +388,13 @@ nouveau_fence_sync(struct nouveau_fence *fence, | |||
355 | return nouveau_fence_wait(fence, NULL, false, false); | 388 | return nouveau_fence_wait(fence, NULL, false, false); |
356 | } | 389 | } |
357 | 390 | ||
358 | /* Signal the semaphore from chan */ | ||
359 | ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); | ||
360 | if (ret) | ||
361 | goto out; | ||
362 | |||
363 | /* Make wchan wait until it gets signalled */ | 391 | /* Make wchan wait until it gets signalled */ |
364 | ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); | 392 | ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema); |
393 | if (ret) | ||
394 | goto out; | ||
365 | 395 | ||
396 | /* Signal the semaphore from chan */ | ||
397 | ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema); | ||
366 | out: | 398 | out: |
367 | kref_put(&sema->ref, free_semaphore); | 399 | kref_put(&sema->ref, free_semaphore); |
368 | return ret; | 400 | return ret; |