aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-06-03 03:27:35 -0400
committerEric Anholt <eric@anholt.net>2009-06-04 07:44:22 -0400
commitb962442e46a9340bdbc6711982c59ff0cc2b5afb (patch)
tree12cfe74d0061728951b004c5519e961bd9e0b33c /drivers/gpu
parent1fd1c624362819ecc36db2458c6a972c48ae92d6 (diff)
drm/i915: Change GEM throttling to be 20ms like the comment says.
keithp didn't like the original 20ms plan because a cooperative client could be starved by an uncooperative client. There may even have been problems with cooperative clients versus cooperative clients. So keithp changed throttle to just wait for the second to last seqno emitted by that client. It worked well, until we started getting more round-trips to the server due to DRI2 -- the server throttles in BlockHandler, and so if you did more than one round trip after finishing your frame, you'd end up unintentionally syncing to the swap. Fix this by keeping track of the client's requests, so the client can wait when it has an outstanding request over 20ms old. This should have non-starving behavior, good behavior in the presence of restarts, and less waiting. Improves high-settings openarena performance on my GM45 by 50%. Signed-off-by: Eric Anholt <eric@anholt.net> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c4
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h7
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c69
3 files changed, 61 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 53d544552625..0c222c28b8c1 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1273,8 +1273,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
1273 1273
1274 file_priv->driver_priv = i915_file_priv; 1274 file_priv->driver_priv = i915_file_priv;
1275 1275
1276 i915_file_priv->mm.last_gem_seqno = 0; 1276 INIT_LIST_HEAD(&i915_file_priv->mm.request_list);
1277 i915_file_priv->mm.last_gem_throttle_seqno = 0;
1278 1277
1279 return 0; 1278 return 0;
1280} 1279}
@@ -1311,6 +1310,7 @@ void i915_driver_lastclose(struct drm_device * dev)
1311void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv) 1310void i915_driver_preclose(struct drm_device * dev, struct drm_file *file_priv)
1312{ 1311{
1313 drm_i915_private_t *dev_priv = dev->dev_private; 1312 drm_i915_private_t *dev_priv = dev->dev_private;
1313 i915_gem_release(dev, file_priv);
1314 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 1314 if (!drm_core_check_feature(dev, DRIVER_MODESET))
1315 i915_mem_release(dev, file_priv, dev_priv->agp_heap); 1315 i915_mem_release(dev, file_priv, dev_priv->agp_heap);
1316} 1316}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fcaa5444daa0..e0fac5f62c69 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -498,13 +498,16 @@ struct drm_i915_gem_request {
498 /** Time at which this request was emitted, in jiffies. */ 498 /** Time at which this request was emitted, in jiffies. */
499 unsigned long emitted_jiffies; 499 unsigned long emitted_jiffies;
500 500
501 /** global list entry for this request */
501 struct list_head list; 502 struct list_head list;
503
504 /** file_priv list entry for this request */
505 struct list_head client_list;
502}; 506};
503 507
504struct drm_i915_file_private { 508struct drm_i915_file_private {
505 struct { 509 struct {
506 uint32_t last_gem_seqno; 510 struct list_head request_list;
507 uint32_t last_gem_throttle_seqno;
508 } mm; 511 } mm;
509}; 512};
510 513
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 39f5c658ef5e..3fbd8a0c40d1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1481,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1481 * Returned sequence numbers are nonzero on success. 1481 * Returned sequence numbers are nonzero on success.
1482 */ 1482 */
1483static uint32_t 1483static uint32_t
1484i915_add_request(struct drm_device *dev, uint32_t flush_domains) 1484i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1485 uint32_t flush_domains)
1485{ 1486{
1486 drm_i915_private_t *dev_priv = dev->dev_private; 1487 drm_i915_private_t *dev_priv = dev->dev_private;
1488 struct drm_i915_file_private *i915_file_priv = NULL;
1487 struct drm_i915_gem_request *request; 1489 struct drm_i915_gem_request *request;
1488 uint32_t seqno; 1490 uint32_t seqno;
1489 int was_empty; 1491 int was_empty;
1490 RING_LOCALS; 1492 RING_LOCALS;
1491 1493
1494 if (file_priv != NULL)
1495 i915_file_priv = file_priv->driver_priv;
1496
1492 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 1497 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
1493 if (request == NULL) 1498 if (request == NULL)
1494 return 0; 1499 return 0;
@@ -1515,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
1515 request->emitted_jiffies = jiffies; 1520 request->emitted_jiffies = jiffies;
1516 was_empty = list_empty(&dev_priv->mm.request_list); 1521 was_empty = list_empty(&dev_priv->mm.request_list);
1517 list_add_tail(&request->list, &dev_priv->mm.request_list); 1522 list_add_tail(&request->list, &dev_priv->mm.request_list);
1523 if (i915_file_priv) {
1524 list_add_tail(&request->client_list,
1525 &i915_file_priv->mm.request_list);
1526 } else {
1527 INIT_LIST_HEAD(&request->client_list);
1528 }
1518 1529
1519 /* Associate any objects on the flushing list matching the write 1530 /* Associate any objects on the flushing list matching the write
1520 * domain we're flushing with our flush. 1531 * domain we're flushing with our flush.
@@ -1664,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev)
1664 i915_gem_retire_request(dev, request); 1675 i915_gem_retire_request(dev, request);
1665 1676
1666 list_del(&request->list); 1677 list_del(&request->list);
1678 list_del(&request->client_list);
1667 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 1679 drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
1668 } else 1680 } else
1669 break; 1681 break;
@@ -1977,7 +1989,7 @@ i915_gem_evict_something(struct drm_device *dev)
1977 i915_gem_flush(dev, 1989 i915_gem_flush(dev,
1978 obj->write_domain, 1990 obj->write_domain,
1979 obj->write_domain); 1991 obj->write_domain);
1980 i915_add_request(dev, obj->write_domain); 1992 i915_add_request(dev, NULL, obj->write_domain);
1981 1993
1982 obj = NULL; 1994 obj = NULL;
1983 continue; 1995 continue;
@@ -2248,7 +2260,7 @@ try_again:
2248 i915_gem_flush(dev, 2260 i915_gem_flush(dev,
2249 I915_GEM_GPU_DOMAINS, 2261 I915_GEM_GPU_DOMAINS,
2250 I915_GEM_GPU_DOMAINS); 2262 I915_GEM_GPU_DOMAINS);
2251 seqno = i915_add_request(dev, 2263 seqno = i915_add_request(dev, NULL,
2252 I915_GEM_GPU_DOMAINS); 2264 I915_GEM_GPU_DOMAINS);
2253 if (seqno == 0) 2265 if (seqno == 0)
2254 return -ENOMEM; 2266 return -ENOMEM;
@@ -2452,7 +2464,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2452 2464
2453 /* Queue the GPU write cache flushing we need. */ 2465 /* Queue the GPU write cache flushing we need. */
2454 i915_gem_flush(dev, 0, obj->write_domain); 2466 i915_gem_flush(dev, 0, obj->write_domain);
2455 seqno = i915_add_request(dev, obj->write_domain); 2467 seqno = i915_add_request(dev, NULL, obj->write_domain);
2456 obj->write_domain = 0; 2468 obj->write_domain = 0;
2457 i915_gem_object_move_to_active(obj, seqno); 2469 i915_gem_object_move_to_active(obj, seqno);
2458} 2470}
@@ -3089,6 +3101,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
3089/* Throttle our rendering by waiting until the ring has completed our requests 3101/* Throttle our rendering by waiting until the ring has completed our requests
3090 * emitted over 20 msec ago. 3102 * emitted over 20 msec ago.
3091 * 3103 *
3104 * Note that if we were to use the current jiffies each time around the loop,
3105 * we wouldn't escape the function with any frames outstanding if the time to
3106 * render a frame was over 20ms.
3107 *
3092 * This should get us reasonable parallelism between CPU and GPU but also 3108 * This should get us reasonable parallelism between CPU and GPU but also
3093 * relatively low latency when blocking on a particular request to finish. 3109 * relatively low latency when blocking on a particular request to finish.
3094 */ 3110 */
@@ -3097,15 +3113,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3097{ 3113{
3098 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3114 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3099 int ret = 0; 3115 int ret = 0;
3100 uint32_t seqno; 3116 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3101 3117
3102 mutex_lock(&dev->struct_mutex); 3118 mutex_lock(&dev->struct_mutex);
3103 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 3119 while (!list_empty(&i915_file_priv->mm.request_list)) {
3104 i915_file_priv->mm.last_gem_throttle_seqno = 3120 struct drm_i915_gem_request *request;
3105 i915_file_priv->mm.last_gem_seqno; 3121
3106 if (seqno) 3122 request = list_first_entry(&i915_file_priv->mm.request_list,
3107 ret = i915_wait_request(dev, seqno); 3123 struct drm_i915_gem_request,
3124 client_list);
3125
3126 if (time_after_eq(request->emitted_jiffies, recent_enough))
3127 break;
3128
3129 ret = i915_wait_request(dev, request->seqno);
3130 if (ret != 0)
3131 break;
3132 }
3108 mutex_unlock(&dev->struct_mutex); 3133 mutex_unlock(&dev->struct_mutex);
3134
3109 return ret; 3135 return ret;
3110} 3136}
3111 3137
@@ -3187,7 +3213,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3187 struct drm_file *file_priv) 3213 struct drm_file *file_priv)
3188{ 3214{
3189 drm_i915_private_t *dev_priv = dev->dev_private; 3215 drm_i915_private_t *dev_priv = dev->dev_private;
3190 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3191 struct drm_i915_gem_execbuffer *args = data; 3216 struct drm_i915_gem_execbuffer *args = data;
3192 struct drm_i915_gem_exec_object *exec_list = NULL; 3217 struct drm_i915_gem_exec_object *exec_list = NULL;
3193 struct drm_gem_object **object_list = NULL; 3218 struct drm_gem_object **object_list = NULL;
@@ -3363,7 +3388,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3363 dev->invalidate_domains, 3388 dev->invalidate_domains,
3364 dev->flush_domains); 3389 dev->flush_domains);
3365 if (dev->flush_domains) 3390 if (dev->flush_domains)
3366 (void)i915_add_request(dev, dev->flush_domains); 3391 (void)i915_add_request(dev, file_priv,
3392 dev->flush_domains);
3367 } 3393 }
3368 3394
3369 for (i = 0; i < args->buffer_count; i++) { 3395 for (i = 0; i < args->buffer_count; i++) {
@@ -3412,9 +3438,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
3412 * *some* interrupts representing completion of buffers that we can 3438 * *some* interrupts representing completion of buffers that we can
3413 * wait on when trying to clear up gtt space). 3439 * wait on when trying to clear up gtt space).
3414 */ 3440 */
3415 seqno = i915_add_request(dev, flush_domains); 3441 seqno = i915_add_request(dev, file_priv, flush_domains);
3416 BUG_ON(seqno == 0); 3442 BUG_ON(seqno == 0);
3417 i915_file_priv->mm.last_gem_seqno = seqno;
3418 for (i = 0; i < args->buffer_count; i++) { 3443 for (i = 0; i < args->buffer_count; i++) {
3419 struct drm_gem_object *obj = object_list[i]; 3444 struct drm_gem_object *obj = object_list[i];
3420 3445
@@ -3802,7 +3827,7 @@ i915_gem_idle(struct drm_device *dev)
3802 */ 3827 */
3803 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 3828 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
3804 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 3829 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
3805 seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); 3830 seqno = i915_add_request(dev, NULL, ~I915_GEM_DOMAIN_CPU);
3806 3831
3807 if (seqno == 0) { 3832 if (seqno == 0) {
3808 mutex_unlock(&dev->struct_mutex); 3833 mutex_unlock(&dev->struct_mutex);
@@ -4352,3 +4377,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4352 drm_agp_chipset_flush(dev); 4377 drm_agp_chipset_flush(dev);
4353 return 0; 4378 return 0;
4354} 4379}
4380
4381void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4382{
4383 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4384
4385 /* Clean up our request list when the client is going away, so that
4386 * later retire_requests won't dereference our soon-to-be-gone
4387 * file_priv.
4388 */
4389 mutex_lock(&dev->struct_mutex);
4390 while (!list_empty(&i915_file_priv->mm.request_list))
4391 list_del_init(i915_file_priv->mm.request_list.next);
4392 mutex_unlock(&dev->struct_mutex);
4393}