diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-09-25 12:34:56 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-10-03 14:01:31 -0400 |
commit | b29c19b645287f7062e17d70fa4e9781a01a5d88 (patch) | |
tree | e5a6fb455c9b3d16579102004d3cb600dfc2b133 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 094f9a54e35500739da185cdb78f2e92fc379458 (diff) |
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 135 |
1 files changed, 93 insertions, 42 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 53e315131700..19ecfa8ad2e1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -982,6 +982,14 @@ static bool missed_irq(struct drm_i915_private *dev_priv, | |||
982 | return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); | 982 | return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); |
983 | } | 983 | } |
984 | 984 | ||
985 | static bool can_wait_boost(struct drm_i915_file_private *file_priv) | ||
986 | { | ||
987 | if (file_priv == NULL) | ||
988 | return true; | ||
989 | |||
990 | return !atomic_xchg(&file_priv->rps_wait_boost, true); | ||
991 | } | ||
992 | |||
985 | /** | 993 | /** |
986 | * __wait_seqno - wait until execution of seqno has finished | 994 | * __wait_seqno - wait until execution of seqno has finished |
987 | * @ring: the ring expected to report seqno | 995 | * @ring: the ring expected to report seqno |
@@ -1002,7 +1010,9 @@ static bool missed_irq(struct drm_i915_private *dev_priv, | |||
1002 | */ | 1010 | */ |
1003 | static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, | 1011 | static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, |
1004 | unsigned reset_counter, | 1012 | unsigned reset_counter, |
1005 | bool interruptible, struct timespec *timeout) | 1013 | bool interruptible, |
1014 | struct timespec *timeout, | ||
1015 | struct drm_i915_file_private *file_priv) | ||
1006 | { | 1016 | { |
1007 | drm_i915_private_t *dev_priv = ring->dev->dev_private; | 1017 | drm_i915_private_t *dev_priv = ring->dev->dev_private; |
1008 | struct timespec before, now; | 1018 | struct timespec before, now; |
@@ -1017,6 +1027,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, | |||
1017 | 1027 | ||
1018 | timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1; | 1028 | timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1; |
1019 | 1029 | ||
1030 | if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) { | ||
1031 | gen6_rps_boost(dev_priv); | ||
1032 | if (file_priv) | ||
1033 | mod_delayed_work(dev_priv->wq, | ||
1034 | &file_priv->mm.idle_work, | ||
1035 | msecs_to_jiffies(100)); | ||
1036 | } | ||
1037 | |||
1020 | if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) && | 1038 | if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) && |
1021 | WARN_ON(!ring->irq_get(ring))) | 1039 | WARN_ON(!ring->irq_get(ring))) |
1022 | return -ENODEV; | 1040 | return -ENODEV; |
@@ -1116,7 +1134,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) | |||
1116 | 1134 | ||
1117 | return __wait_seqno(ring, seqno, | 1135 | return __wait_seqno(ring, seqno, |
1118 | atomic_read(&dev_priv->gpu_error.reset_counter), | 1136 | atomic_read(&dev_priv->gpu_error.reset_counter), |
1119 | interruptible, NULL); | 1137 | interruptible, NULL, NULL); |
1120 | } | 1138 | } |
1121 | 1139 | ||
1122 | static int | 1140 | static int |
@@ -1166,6 +1184,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, | |||
1166 | */ | 1184 | */ |
1167 | static __must_check int | 1185 | static __must_check int |
1168 | i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, | 1186 | i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, |
1187 | struct drm_file *file, | ||
1169 | bool readonly) | 1188 | bool readonly) |
1170 | { | 1189 | { |
1171 | struct drm_device *dev = obj->base.dev; | 1190 | struct drm_device *dev = obj->base.dev; |
@@ -1192,7 +1211,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, | |||
1192 | 1211 | ||
1193 | reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); | 1212 | reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); |
1194 | mutex_unlock(&dev->struct_mutex); | 1213 | mutex_unlock(&dev->struct_mutex); |
1195 | ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); | 1214 | ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv); |
1196 | mutex_lock(&dev->struct_mutex); | 1215 | mutex_lock(&dev->struct_mutex); |
1197 | if (ret) | 1216 | if (ret) |
1198 | return ret; | 1217 | return ret; |
@@ -1241,7 +1260,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, | |||
1241 | * We will repeat the flush holding the lock in the normal manner | 1260 | * We will repeat the flush holding the lock in the normal manner |
1242 | * to catch cases where we are gazumped. | 1261 | * to catch cases where we are gazumped. |
1243 | */ | 1262 | */ |
1244 | ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); | 1263 | ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain); |
1245 | if (ret) | 1264 | if (ret) |
1246 | goto unref; | 1265 | goto unref; |
1247 | 1266 | ||
@@ -2162,6 +2181,7 @@ int __i915_add_request(struct intel_ring_buffer *ring, | |||
2162 | i915_queue_hangcheck(ring->dev); | 2181 | i915_queue_hangcheck(ring->dev); |
2163 | 2182 | ||
2164 | if (was_empty) { | 2183 | if (was_empty) { |
2184 | cancel_delayed_work_sync(&dev_priv->mm.idle_work); | ||
2165 | queue_delayed_work(dev_priv->wq, | 2185 | queue_delayed_work(dev_priv->wq, |
2166 | &dev_priv->mm.retire_work, | 2186 | &dev_priv->mm.retire_work, |
2167 | round_jiffies_up_relative(HZ)); | 2187 | round_jiffies_up_relative(HZ)); |
@@ -2183,10 +2203,8 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) | |||
2183 | return; | 2203 | return; |
2184 | 2204 | ||
2185 | spin_lock(&file_priv->mm.lock); | 2205 | spin_lock(&file_priv->mm.lock); |
2186 | if (request->file_priv) { | 2206 | list_del(&request->client_list); |
2187 | list_del(&request->client_list); | 2207 | request->file_priv = NULL; |
2188 | request->file_priv = NULL; | ||
2189 | } | ||
2190 | spin_unlock(&file_priv->mm.lock); | 2208 | spin_unlock(&file_priv->mm.lock); |
2191 | } | 2209 | } |
2192 | 2210 | ||
@@ -2450,57 +2468,53 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) | |||
2450 | WARN_ON(i915_verify_lists(ring->dev)); | 2468 | WARN_ON(i915_verify_lists(ring->dev)); |
2451 | } | 2469 | } |
2452 | 2470 | ||
2453 | void | 2471 | bool |
2454 | i915_gem_retire_requests(struct drm_device *dev) | 2472 | i915_gem_retire_requests(struct drm_device *dev) |
2455 | { | 2473 | { |
2456 | drm_i915_private_t *dev_priv = dev->dev_private; | 2474 | drm_i915_private_t *dev_priv = dev->dev_private; |
2457 | struct intel_ring_buffer *ring; | 2475 | struct intel_ring_buffer *ring; |
2476 | bool idle = true; | ||
2458 | int i; | 2477 | int i; |
2459 | 2478 | ||
2460 | for_each_ring(ring, dev_priv, i) | 2479 | for_each_ring(ring, dev_priv, i) { |
2461 | i915_gem_retire_requests_ring(ring); | 2480 | i915_gem_retire_requests_ring(ring); |
2481 | idle &= list_empty(&ring->request_list); | ||
2482 | } | ||
2483 | |||
2484 | if (idle) | ||
2485 | mod_delayed_work(dev_priv->wq, | ||
2486 | &dev_priv->mm.idle_work, | ||
2487 | msecs_to_jiffies(100)); | ||
2488 | |||
2489 | return idle; | ||
2462 | } | 2490 | } |
2463 | 2491 | ||
2464 | static void | 2492 | static void |
2465 | i915_gem_retire_work_handler(struct work_struct *work) | 2493 | i915_gem_retire_work_handler(struct work_struct *work) |
2466 | { | 2494 | { |
2467 | drm_i915_private_t *dev_priv; | 2495 | struct drm_i915_private *dev_priv = |
2468 | struct drm_device *dev; | 2496 | container_of(work, typeof(*dev_priv), mm.retire_work.work); |
2469 | struct intel_ring_buffer *ring; | 2497 | struct drm_device *dev = dev_priv->dev; |
2470 | bool idle; | 2498 | bool idle; |
2471 | int i; | ||
2472 | |||
2473 | dev_priv = container_of(work, drm_i915_private_t, | ||
2474 | mm.retire_work.work); | ||
2475 | dev = dev_priv->dev; | ||
2476 | 2499 | ||
2477 | /* Come back later if the device is busy... */ | 2500 | /* Come back later if the device is busy... */ |
2478 | if (!mutex_trylock(&dev->struct_mutex)) { | 2501 | idle = false; |
2479 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, | 2502 | if (mutex_trylock(&dev->struct_mutex)) { |
2480 | round_jiffies_up_relative(HZ)); | 2503 | idle = i915_gem_retire_requests(dev); |
2481 | return; | 2504 | mutex_unlock(&dev->struct_mutex); |
2482 | } | ||
2483 | |||
2484 | i915_gem_retire_requests(dev); | ||
2485 | |||
2486 | /* Send a periodic flush down the ring so we don't hold onto GEM | ||
2487 | * objects indefinitely. | ||
2488 | */ | ||
2489 | idle = true; | ||
2490 | for_each_ring(ring, dev_priv, i) { | ||
2491 | if (ring->gpu_caches_dirty) | ||
2492 | i915_add_request(ring, NULL); | ||
2493 | |||
2494 | idle &= list_empty(&ring->request_list); | ||
2495 | } | 2505 | } |
2496 | 2506 | if (!idle) | |
2497 | if (!dev_priv->ums.mm_suspended && !idle) | ||
2498 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, | 2507 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, |
2499 | round_jiffies_up_relative(HZ)); | 2508 | round_jiffies_up_relative(HZ)); |
2500 | if (idle) | 2509 | } |
2501 | intel_mark_idle(dev); | ||
2502 | 2510 | ||
2503 | mutex_unlock(&dev->struct_mutex); | 2511 | static void |
2512 | i915_gem_idle_work_handler(struct work_struct *work) | ||
2513 | { | ||
2514 | struct drm_i915_private *dev_priv = | ||
2515 | container_of(work, typeof(*dev_priv), mm.idle_work.work); | ||
2516 | |||
2517 | intel_mark_idle(dev_priv->dev); | ||
2504 | } | 2518 | } |
2505 | 2519 | ||
2506 | /** | 2520 | /** |
@@ -2598,7 +2612,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) | |||
2598 | reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); | 2612 | reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); |
2599 | mutex_unlock(&dev->struct_mutex); | 2613 | mutex_unlock(&dev->struct_mutex); |
2600 | 2614 | ||
2601 | ret = __wait_seqno(ring, seqno, reset_counter, true, timeout); | 2615 | ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); |
2602 | if (timeout) | 2616 | if (timeout) |
2603 | args->timeout_ns = timespec_to_ns(timeout); | 2617 | args->timeout_ns = timespec_to_ns(timeout); |
2604 | return ret; | 2618 | return ret; |
@@ -3809,7 +3823,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) | |||
3809 | if (seqno == 0) | 3823 | if (seqno == 0) |
3810 | return 0; | 3824 | return 0; |
3811 | 3825 | ||
3812 | ret = __wait_seqno(ring, seqno, reset_counter, true, NULL); | 3826 | ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); |
3813 | if (ret == 0) | 3827 | if (ret == 0) |
3814 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); | 3828 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); |
3815 | 3829 | ||
@@ -4279,6 +4293,7 @@ i915_gem_idle(struct drm_device *dev) | |||
4279 | 4293 | ||
4280 | /* Cancel the retire work handler, which should be idle now. */ | 4294 | /* Cancel the retire work handler, which should be idle now. */ |
4281 | cancel_delayed_work_sync(&dev_priv->mm.retire_work); | 4295 | cancel_delayed_work_sync(&dev_priv->mm.retire_work); |
4296 | cancel_delayed_work_sync(&dev_priv->mm.idle_work); | ||
4282 | 4297 | ||
4283 | return 0; | 4298 | return 0; |
4284 | } | 4299 | } |
@@ -4612,6 +4627,8 @@ i915_gem_load(struct drm_device *dev) | |||
4612 | INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); | 4627 | INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); |
4613 | INIT_DELAYED_WORK(&dev_priv->mm.retire_work, | 4628 | INIT_DELAYED_WORK(&dev_priv->mm.retire_work, |
4614 | i915_gem_retire_work_handler); | 4629 | i915_gem_retire_work_handler); |
4630 | INIT_DELAYED_WORK(&dev_priv->mm.idle_work, | ||
4631 | i915_gem_idle_work_handler); | ||
4615 | init_waitqueue_head(&dev_priv->gpu_error.reset_queue); | 4632 | init_waitqueue_head(&dev_priv->gpu_error.reset_queue); |
4616 | 4633 | ||
4617 | /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ | 4634 | /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ |
@@ -4836,6 +4853,8 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) | |||
4836 | { | 4853 | { |
4837 | struct drm_i915_file_private *file_priv = file->driver_priv; | 4854 | struct drm_i915_file_private *file_priv = file->driver_priv; |
4838 | 4855 | ||
4856 | cancel_delayed_work_sync(&file_priv->mm.idle_work); | ||
4857 | |||
4839 | /* Clean up our request list when the client is going away, so that | 4858 | /* Clean up our request list when the client is going away, so that |
4840 | * later retire_requests won't dereference our soon-to-be-gone | 4859 | * later retire_requests won't dereference our soon-to-be-gone |
4841 | * file_priv. | 4860 | * file_priv. |
@@ -4853,6 +4872,38 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) | |||
4853 | spin_unlock(&file_priv->mm.lock); | 4872 | spin_unlock(&file_priv->mm.lock); |
4854 | } | 4873 | } |
4855 | 4874 | ||
4875 | static void | ||
4876 | i915_gem_file_idle_work_handler(struct work_struct *work) | ||
4877 | { | ||
4878 | struct drm_i915_file_private *file_priv = | ||
4879 | container_of(work, typeof(*file_priv), mm.idle_work.work); | ||
4880 | |||
4881 | atomic_set(&file_priv->rps_wait_boost, false); | ||
4882 | } | ||
4883 | |||
4884 | int i915_gem_open(struct drm_device *dev, struct drm_file *file) | ||
4885 | { | ||
4886 | struct drm_i915_file_private *file_priv; | ||
4887 | |||
4888 | DRM_DEBUG_DRIVER("\n"); | ||
4889 | |||
4890 | file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); | ||
4891 | if (!file_priv) | ||
4892 | return -ENOMEM; | ||
4893 | |||
4894 | file->driver_priv = file_priv; | ||
4895 | file_priv->dev_priv = dev->dev_private; | ||
4896 | |||
4897 | spin_lock_init(&file_priv->mm.lock); | ||
4898 | INIT_LIST_HEAD(&file_priv->mm.request_list); | ||
4899 | INIT_DELAYED_WORK(&file_priv->mm.idle_work, | ||
4900 | i915_gem_file_idle_work_handler); | ||
4901 | |||
4902 | idr_init(&file_priv->context_idr); | ||
4903 | |||
4904 | return 0; | ||
4905 | } | ||
4906 | |||
4856 | static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | 4907 | static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) |
4857 | { | 4908 | { |
4858 | if (!mutex_is_locked(mutex)) | 4909 | if (!mutex_is_locked(mutex)) |