aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-09-25 12:34:56 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-10-03 14:01:31 -0400
commitb29c19b645287f7062e17d70fa4e9781a01a5d88 (patch)
treee5a6fb455c9b3d16579102004d3cb600dfc2b133 /drivers/gpu/drm/i915/i915_gem.c
parent094f9a54e35500739da185cdb78f2e92fc379458 (diff)
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results from the GPU, give the GPU a kick to boost its the frequency. This should work to reduce user interface stalls and to quickly promote mesa to high frequencies - but the cost is that our requested frequency stalls high (as we do not idle for long enough before rc6 to start reducing frequencies, nor are we aggressive at down clocking an underused GPU). However, this should be mitigated by rc6 itself powering off the GPU when idle, and that energy use is dependent upon the workload of the GPU in addition to its frequency (e.g. the math or sampler functions only consume power when used). Still, this is likely to adversely affect light workloads. In particular, this nearly eliminates the highly noticeable wake-up lag in animations from idle. For example, expose or workspace transitions. (However, given the situation where we fail to downclock, our requested frequency is almost always the maximum, except for Baytrail where we manually downclock upon idling. This often masks the latency of upclocking after being idle, so animations are typically smooth - at the cost of increased power consumption.) Stéphane raised the concern that this will punish good applications and reward bad applications - but due to the nature of how mesa performs its client throttling, I believe all mesa applications will be roughly equally affected. To address this concern, and to prevent applications like compositors from permanently boosting the RPS state, we ratelimit the frequency of the wait-boosts each client recieves. Unfortunately, this techinique is ineffective with Ironlake - which also has dynamic render power states and suffers just as dramatically. For Ironlake, the thermal/power headroom is shared with the CPU through Intelligent Power Sharing and the intel-ips module. This leaves us with no GPU boost frequencies available when coming out of idle, and due to hardware limitations we cannot change the arbitration between the CPU and GPU quickly enough to be effective. v2: Limit each client to receiving a single boost for each active period. Tested by QA to only marginally increase power, and to demonstrably increase throughput in games. No latency measurements yet. v3: Cater for front-buffer rendering with manual throttling. v4: Tidy up. v5: Sadly the compositor needs frequent boosts as it may never idle, but due to its picking mechanism (using ReadPixels) may require frequent waits. Those waits, along with the waits for the vrefresh swap, conspire to keep the GPU at low frequencies despite the interactive latency. To overcome this we ditch the one-boost-per-active-period and just ratelimit the number of wait-boosts each client can receive. Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Stéphane Marchesin <stephane.marchesin@gmail.com> Cc: Owen Taylor <otaylor@redhat.com> Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com> Cc: "Zhuang, Lena" <lena.zhuang@intel.com> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: No extern for function prototypes in headers.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c135
1 files changed, 93 insertions, 42 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53e315131700..19ecfa8ad2e1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -982,6 +982,14 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
982 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 982 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
983} 983}
984 984
985static bool can_wait_boost(struct drm_i915_file_private *file_priv)
986{
987 if (file_priv == NULL)
988 return true;
989
990 return !atomic_xchg(&file_priv->rps_wait_boost, true);
991}
992
985/** 993/**
986 * __wait_seqno - wait until execution of seqno has finished 994 * __wait_seqno - wait until execution of seqno has finished
987 * @ring: the ring expected to report seqno 995 * @ring: the ring expected to report seqno
@@ -1002,7 +1010,9 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
1002 */ 1010 */
1003static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1011static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1004 unsigned reset_counter, 1012 unsigned reset_counter,
1005 bool interruptible, struct timespec *timeout) 1013 bool interruptible,
1014 struct timespec *timeout,
1015 struct drm_i915_file_private *file_priv)
1006{ 1016{
1007 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1017 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1008 struct timespec before, now; 1018 struct timespec before, now;
@@ -1017,6 +1027,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1017 1027
1018 timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1; 1028 timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
1019 1029
1030 if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
1031 gen6_rps_boost(dev_priv);
1032 if (file_priv)
1033 mod_delayed_work(dev_priv->wq,
1034 &file_priv->mm.idle_work,
1035 msecs_to_jiffies(100));
1036 }
1037
1020 if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) && 1038 if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) &&
1021 WARN_ON(!ring->irq_get(ring))) 1039 WARN_ON(!ring->irq_get(ring)))
1022 return -ENODEV; 1040 return -ENODEV;
@@ -1116,7 +1134,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1116 1134
1117 return __wait_seqno(ring, seqno, 1135 return __wait_seqno(ring, seqno,
1118 atomic_read(&dev_priv->gpu_error.reset_counter), 1136 atomic_read(&dev_priv->gpu_error.reset_counter),
1119 interruptible, NULL); 1137 interruptible, NULL, NULL);
1120} 1138}
1121 1139
1122static int 1140static int
@@ -1166,6 +1184,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1166 */ 1184 */
1167static __must_check int 1185static __must_check int
1168i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1186i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1187 struct drm_file *file,
1169 bool readonly) 1188 bool readonly)
1170{ 1189{
1171 struct drm_device *dev = obj->base.dev; 1190 struct drm_device *dev = obj->base.dev;
@@ -1192,7 +1211,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1192 1211
1193 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1212 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1194 mutex_unlock(&dev->struct_mutex); 1213 mutex_unlock(&dev->struct_mutex);
1195 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 1214 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv);
1196 mutex_lock(&dev->struct_mutex); 1215 mutex_lock(&dev->struct_mutex);
1197 if (ret) 1216 if (ret)
1198 return ret; 1217 return ret;
@@ -1241,7 +1260,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1241 * We will repeat the flush holding the lock in the normal manner 1260 * We will repeat the flush holding the lock in the normal manner
1242 * to catch cases where we are gazumped. 1261 * to catch cases where we are gazumped.
1243 */ 1262 */
1244 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1263 ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain);
1245 if (ret) 1264 if (ret)
1246 goto unref; 1265 goto unref;
1247 1266
@@ -2162,6 +2181,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
2162 i915_queue_hangcheck(ring->dev); 2181 i915_queue_hangcheck(ring->dev);
2163 2182
2164 if (was_empty) { 2183 if (was_empty) {
2184 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
2165 queue_delayed_work(dev_priv->wq, 2185 queue_delayed_work(dev_priv->wq,
2166 &dev_priv->mm.retire_work, 2186 &dev_priv->mm.retire_work,
2167 round_jiffies_up_relative(HZ)); 2187 round_jiffies_up_relative(HZ));
@@ -2183,10 +2203,8 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2183 return; 2203 return;
2184 2204
2185 spin_lock(&file_priv->mm.lock); 2205 spin_lock(&file_priv->mm.lock);
2186 if (request->file_priv) { 2206 list_del(&request->client_list);
2187 list_del(&request->client_list); 2207 request->file_priv = NULL;
2188 request->file_priv = NULL;
2189 }
2190 spin_unlock(&file_priv->mm.lock); 2208 spin_unlock(&file_priv->mm.lock);
2191} 2209}
2192 2210
@@ -2450,57 +2468,53 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2450 WARN_ON(i915_verify_lists(ring->dev)); 2468 WARN_ON(i915_verify_lists(ring->dev));
2451} 2469}
2452 2470
2453void 2471bool
2454i915_gem_retire_requests(struct drm_device *dev) 2472i915_gem_retire_requests(struct drm_device *dev)
2455{ 2473{
2456 drm_i915_private_t *dev_priv = dev->dev_private; 2474 drm_i915_private_t *dev_priv = dev->dev_private;
2457 struct intel_ring_buffer *ring; 2475 struct intel_ring_buffer *ring;
2476 bool idle = true;
2458 int i; 2477 int i;
2459 2478
2460 for_each_ring(ring, dev_priv, i) 2479 for_each_ring(ring, dev_priv, i) {
2461 i915_gem_retire_requests_ring(ring); 2480 i915_gem_retire_requests_ring(ring);
2481 idle &= list_empty(&ring->request_list);
2482 }
2483
2484 if (idle)
2485 mod_delayed_work(dev_priv->wq,
2486 &dev_priv->mm.idle_work,
2487 msecs_to_jiffies(100));
2488
2489 return idle;
2462} 2490}
2463 2491
2464static void 2492static void
2465i915_gem_retire_work_handler(struct work_struct *work) 2493i915_gem_retire_work_handler(struct work_struct *work)
2466{ 2494{
2467 drm_i915_private_t *dev_priv; 2495 struct drm_i915_private *dev_priv =
2468 struct drm_device *dev; 2496 container_of(work, typeof(*dev_priv), mm.retire_work.work);
2469 struct intel_ring_buffer *ring; 2497 struct drm_device *dev = dev_priv->dev;
2470 bool idle; 2498 bool idle;
2471 int i;
2472
2473 dev_priv = container_of(work, drm_i915_private_t,
2474 mm.retire_work.work);
2475 dev = dev_priv->dev;
2476 2499
2477 /* Come back later if the device is busy... */ 2500 /* Come back later if the device is busy... */
2478 if (!mutex_trylock(&dev->struct_mutex)) { 2501 idle = false;
2479 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2502 if (mutex_trylock(&dev->struct_mutex)) {
2480 round_jiffies_up_relative(HZ)); 2503 idle = i915_gem_retire_requests(dev);
2481 return; 2504 mutex_unlock(&dev->struct_mutex);
2482 }
2483
2484 i915_gem_retire_requests(dev);
2485
2486 /* Send a periodic flush down the ring so we don't hold onto GEM
2487 * objects indefinitely.
2488 */
2489 idle = true;
2490 for_each_ring(ring, dev_priv, i) {
2491 if (ring->gpu_caches_dirty)
2492 i915_add_request(ring, NULL);
2493
2494 idle &= list_empty(&ring->request_list);
2495 } 2505 }
2496 2506 if (!idle)
2497 if (!dev_priv->ums.mm_suspended && !idle)
2498 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2507 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2499 round_jiffies_up_relative(HZ)); 2508 round_jiffies_up_relative(HZ));
2500 if (idle) 2509}
2501 intel_mark_idle(dev);
2502 2510
2503 mutex_unlock(&dev->struct_mutex); 2511static void
2512i915_gem_idle_work_handler(struct work_struct *work)
2513{
2514 struct drm_i915_private *dev_priv =
2515 container_of(work, typeof(*dev_priv), mm.idle_work.work);
2516
2517 intel_mark_idle(dev_priv->dev);
2504} 2518}
2505 2519
2506/** 2520/**
@@ -2598,7 +2612,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2598 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2612 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2599 mutex_unlock(&dev->struct_mutex); 2613 mutex_unlock(&dev->struct_mutex);
2600 2614
2601 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); 2615 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
2602 if (timeout) 2616 if (timeout)
2603 args->timeout_ns = timespec_to_ns(timeout); 2617 args->timeout_ns = timespec_to_ns(timeout);
2604 return ret; 2618 return ret;
@@ -3809,7 +3823,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3809 if (seqno == 0) 3823 if (seqno == 0)
3810 return 0; 3824 return 0;
3811 3825
3812 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); 3826 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
3813 if (ret == 0) 3827 if (ret == 0)
3814 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3828 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3815 3829
@@ -4279,6 +4293,7 @@ i915_gem_idle(struct drm_device *dev)
4279 4293
4280 /* Cancel the retire work handler, which should be idle now. */ 4294 /* Cancel the retire work handler, which should be idle now. */
4281 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4295 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4296 cancel_delayed_work_sync(&dev_priv->mm.idle_work);
4282 4297
4283 return 0; 4298 return 0;
4284} 4299}
@@ -4612,6 +4627,8 @@ i915_gem_load(struct drm_device *dev)
4612 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4627 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4613 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4628 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4614 i915_gem_retire_work_handler); 4629 i915_gem_retire_work_handler);
4630 INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4631 i915_gem_idle_work_handler);
4615 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4632 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4616 4633
4617 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4634 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
@@ -4836,6 +4853,8 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4836{ 4853{
4837 struct drm_i915_file_private *file_priv = file->driver_priv; 4854 struct drm_i915_file_private *file_priv = file->driver_priv;
4838 4855
4856 cancel_delayed_work_sync(&file_priv->mm.idle_work);
4857
4839 /* Clean up our request list when the client is going away, so that 4858 /* Clean up our request list when the client is going away, so that
4840 * later retire_requests won't dereference our soon-to-be-gone 4859 * later retire_requests won't dereference our soon-to-be-gone
4841 * file_priv. 4860 * file_priv.
@@ -4853,6 +4872,38 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4853 spin_unlock(&file_priv->mm.lock); 4872 spin_unlock(&file_priv->mm.lock);
4854} 4873}
4855 4874
4875static void
4876i915_gem_file_idle_work_handler(struct work_struct *work)
4877{
4878 struct drm_i915_file_private *file_priv =
4879 container_of(work, typeof(*file_priv), mm.idle_work.work);
4880
4881 atomic_set(&file_priv->rps_wait_boost, false);
4882}
4883
4884int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4885{
4886 struct drm_i915_file_private *file_priv;
4887
4888 DRM_DEBUG_DRIVER("\n");
4889
4890 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4891 if (!file_priv)
4892 return -ENOMEM;
4893
4894 file->driver_priv = file_priv;
4895 file_priv->dev_priv = dev->dev_private;
4896
4897 spin_lock_init(&file_priv->mm.lock);
4898 INIT_LIST_HEAD(&file_priv->mm.request_list);
4899 INIT_DELAYED_WORK(&file_priv->mm.idle_work,
4900 i915_gem_file_idle_work_handler);
4901
4902 idr_init(&file_priv->context_idr);
4903
4904 return 0;
4905}
4906
4856static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4907static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4857{ 4908{
4858 if (!mutex_is_locked(mutex)) 4909 if (!mutex_is_locked(mutex))