drm/i915: Boost RPS frequency for CPU stalls

If we encounter a situation where the CPU blocks waiting for results from the GPU, give the GPU a kick to boost its the frequency. This should work to reduce user interface stalls and to quickly promote mesa to high frequencies - but the cost is that our requested frequency stalls high (as we do not idle for long enough before rc6 to start reducing frequencies, nor are we aggressive at down clocking an underused GPU). However, this should be mitigated by rc6 itself powering off the GPU when idle, and that energy use is dependent upon the workload of the GPU in addition to its frequency (e.g. the math or sampler functions only consume power when used). Still, this is likely to adversely affect light workloads. In particular, this nearly eliminates the highly noticeable wake-up lag in animations from idle. For example, expose or workspace transitions. (However, given the situation where we fail to downclock, our requested frequency is almost always the maximum, except for Baytrail where we manually downclock upon idling. This often masks the latency of upclocking after being idle, so animations are typically smooth - at the cost of increased power consumption.) Stéphane raised the concern that this will punish good applications and reward bad applications - but due to the nature of how mesa performs its client throttling, I believe all mesa applications will be roughly equally affected. To address this concern, and to prevent applications like compositors from permanently boosting the RPS state, we ratelimit the frequency of the wait-boosts each client recieves. Unfortunately, this techinique is ineffective with Ironlake - which also has dynamic render power states and suffers just as dramatically. For Ironlake, the thermal/power headroom is shared with the CPU through Intelligent Power Sharing and the intel-ips module. This leaves us with no GPU boost frequencies available when coming out of idle, and due to hardware limitations we cannot change the arbitration between the CPU and GPU quickly enough to be effective. v2: Limit each client to receiving a single boost for each active period. Tested by QA to only marginally increase power, and to demonstrably increase throughput in games. No latency measurements yet. v3: Cater for front-buffer rendering with manual throttling. v4: Tidy up. v5: Sadly the compositor needs frequent boosts as it may never idle, but due to its picking mechanism (using ReadPixels) may require frequent waits. Those waits, along with the waits for the vrefresh swap, conspire to keep the GPU at low frequencies despite the interactive latency. To overcome this we ditch the one-boost-per-active-period and just ratelimit the number of wait-boosts each client can receive. Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Stéphane Marchesin <stephane.marchesin@gmail.com> Cc: Owen Taylor <otaylor@redhat.com> Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com> Cc: "Zhuang, Lena" <lena.zhuang@intel.com> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: No extern for function prototypes in headers.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Chris Wilson <chris@chris-wilson.co.uk> 2013-09-25 12:34:56 -0400
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2013-10-03 14:01:31 -0400
commit: b29c19b645287f7062e17d70fa4e9781a01a5d88 (patch)
tree: e5a6fb455c9b3d16579102004d3cb600dfc2b133 /drivers/gpu/drm/i915/i915_gem.c
parent: 094f9a54e35500739da185cdb78f2e92fc379458 (diff)
1 files changed, 93 insertions, 42 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53e315131700..19ecfa8ad2e1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -982,6 +982,14 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
        return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
 }
+static bool can_wait_boost(struct drm_i915_file_private *file_priv)
+{
+        if (file_priv == NULL)
+                return true;
+        return !atomic_xchg(&file_priv->rps_wait_boost, true);
+}
 /**
 * __wait_seqno - wait until execution of seqno has finished
 * @ring: the ring expected to report seqno
@@ -1002,7 +1010,9 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
 */
 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
                        unsigned reset_counter,
-                        bool interruptible, struct timespec *timeout)
+                        bool interruptible,
+                        struct timespec *timeout,
+                        struct drm_i915_file_private *file_priv)
 {
        drm_i915_private_t *dev_priv = ring->dev->dev_private;
        struct timespec before, now;
@@ -1017,6 +1027,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
        timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
+        if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
+                gen6_rps_boost(dev_priv);
+                if (file_priv)
+                        mod_delayed_work(dev_priv->wq,
+                                         &file_priv->mm.idle_work,
+                                         msecs_to_jiffies(100));
+        }
        if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) &&
            WARN_ON(!ring->irq_get(ring)))
                return -ENODEV;
@@ -1116,7 +1134,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
        return __wait_seqno(ring, seqno,
                            atomic_read(&dev_priv->gpu_error.reset_counter),
-                            interruptible, NULL);
+                            interruptible, NULL, NULL);
 }
 static int
@@ -1166,6 +1184,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 */
 static __must_check int
 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+                                            struct drm_file *file,
                                            bool readonly)
 {
        struct drm_device *dev = obj->base.dev;
@@ -1192,7 +1211,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
        reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
        mutex_unlock(&dev->struct_mutex);
-        ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
+        ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv);
        mutex_lock(&dev->struct_mutex);
        if (ret)
                return ret;
@@ -1241,7 +1260,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         * We will repeat the flush holding the lock in the normal manner
         * to catch cases where we are gazumped.
         */
-        ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
+        ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain);
        if (ret)
                goto unref;
@@ -2162,6 +2181,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
                i915_queue_hangcheck(ring->dev);
                if (was_empty) {
+                        cancel_delayed_work_sync(&dev_priv->mm.idle_work);
                        queue_delayed_work(dev_priv->wq,
                                           &dev_priv->mm.retire_work,
                                           round_jiffies_up_relative(HZ));
@@ -2183,10 +2203,8 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
                return;
        spin_lock(&file_priv->mm.lock);
-        if (request->file_priv) {
+        list_del(&request->client_list);
-                list_del(&request->client_list);
+        request->file_priv = NULL;
-                request->file_priv = NULL;
-        }
        spin_unlock(&file_priv->mm.lock);
 }
@@ -2450,57 +2468,53 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
        WARN_ON(i915_verify_lists(ring->dev));
 }
-void
+bool
 i915_gem_retire_requests(struct drm_device *dev)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
+        bool idle = true;
        int i;
-        for_each_ring(ring, dev_priv, i)
+        for_each_ring(ring, dev_priv, i) {
                i915_gem_retire_requests_ring(ring);
+                idle &= list_empty(&ring->request_list);
+        }
+        if (idle)
+                mod_delayed_work(dev_priv->wq,
+                                   &dev_priv->mm.idle_work,
+                                   msecs_to_jiffies(100));
+        return idle;
 }
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
-        drm_i915_private_t *dev_priv;
+        struct drm_i915_private *dev_priv =
-        struct drm_device *dev;
+                container_of(work, typeof(*dev_priv), mm.retire_work.work);
-        struct intel_ring_buffer *ring;
+        struct drm_device *dev = dev_priv->dev;
        bool idle;
-        int i;
-        dev_priv = container_of(work, drm_i915_private_t,
-                                mm.retire_work.work);
-        dev = dev_priv->dev;
        /* Come back later if the device is busy... */
-        if (!mutex_trylock(&dev->struct_mutex)) {
+        idle = false;
-                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
+        if (mutex_trylock(&dev->struct_mutex)) {
-                                   round_jiffies_up_relative(HZ));
+                idle = i915_gem_retire_requests(dev);
-                return;
+                mutex_unlock(&dev->struct_mutex);
-        }
-        i915_gem_retire_requests(dev);
-        /* Send a periodic flush down the ring so we don't hold onto GEM
-         * objects indefinitely.
-         */
-        idle = true;
-        for_each_ring(ring, dev_priv, i) {
-                if (ring->gpu_caches_dirty)
-                        i915_add_request(ring, NULL);
-                idle &= list_empty(&ring->request_list);
        }
+        if (!idle)
-        if (!dev_priv->ums.mm_suspended && !idle)
                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
                                   round_jiffies_up_relative(HZ));
-        if (idle)
+}
-                intel_mark_idle(dev);
-        mutex_unlock(&dev->struct_mutex);
+static void
+i915_gem_idle_work_handler(struct work_struct *work)
+{
+        struct drm_i915_private *dev_priv =
+                container_of(work, typeof(*dev_priv), mm.idle_work.work);
+        intel_mark_idle(dev_priv->dev);
 }
 /**
@@ -2598,7 +2612,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
        mutex_unlock(&dev->struct_mutex);
-        ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);
+        ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
        if (timeout)
                args->timeout_ns = timespec_to_ns(timeout);
        return ret;
@@ -3809,7 +3823,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
        if (seqno == 0)
                return 0;
-        ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);
+        ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
        if (ret == 0)
                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
@@ -4279,6 +4293,7 @@ i915_gem_idle(struct drm_device *dev)
        /* Cancel the retire work handler, which should be idle now. */
        cancel_delayed_work_sync(&dev_priv->mm.retire_work);
+        cancel_delayed_work_sync(&dev_priv->mm.idle_work);
        return 0;
 }
@@ -4612,6 +4627,8 @@ i915_gem_load(struct drm_device *dev)
                INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
        INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
                          i915_gem_retire_work_handler);
+        INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
+                          i915_gem_idle_work_handler);
        init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
        /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
@@ -4836,6 +4853,8 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 {
        struct drm_i915_file_private *file_priv = file->driver_priv;
+        cancel_delayed_work_sync(&file_priv->mm.idle_work);
        /* Clean up our request list when the client is going away, so that
         * later retire_requests won't dereference our soon-to-be-gone
         * file_priv.
@@ -4853,6 +4872,38 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
        spin_unlock(&file_priv->mm.lock);
 }
+static void
+i915_gem_file_idle_work_handler(struct work_struct *work)
+{
+        struct drm_i915_file_private *file_priv =
+                container_of(work, typeof(*file_priv), mm.idle_work.work);
+        atomic_set(&file_priv->rps_wait_boost, false);
+}
+int i915_gem_open(struct drm_device *dev, struct drm_file *file)
+{
+        struct drm_i915_file_private *file_priv;
+        DRM_DEBUG_DRIVER("\n");
+        file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+        if (!file_priv)
+                return -ENOMEM;
+        file->driver_priv = file_priv;
+        file_priv->dev_priv = dev->dev_private;
+        spin_lock_init(&file_priv->mm.lock);
+        INIT_LIST_HEAD(&file_priv->mm.request_list);
+        INIT_DELAYED_WORK(&file_priv->mm.idle_work,
+                          i915_gem_file_idle_work_handler);
+        idr_init(&file_priv->context_idr);
+        return 0;
+}
 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 {
        if (!mutex_is_locked(mutex))
author	Chris Wilson <chris@chris-wilson.co.uk>	2013-09-25 12:34:56 -0400
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2013-10-03 14:01:31 -0400
commit	b29c19b645287f7062e17d70fa4e9781a01a5d88 (patch)
tree	e5a6fb455c9b3d16579102004d3cb600dfc2b133 /drivers/gpu/drm/i915/i915_gem.c
parent	094f9a54e35500739da185cdb78f2e92fc379458 (diff)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 53e315131700..19ecfa8ad2e1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -982,6 +982,14 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
982	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);	982	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
983	}	983	}
984		984
		985	static bool can_wait_boost(struct drm_i915_file_private *file_priv)
		986	{
		987	if (file_priv == NULL)
		988	return true;
		989
		990	return !atomic_xchg(&file_priv->rps_wait_boost, true);
		991	}
		992
985	/**	993	/**
986	* __wait_seqno - wait until execution of seqno has finished	994	* __wait_seqno - wait until execution of seqno has finished
987	* @ring: the ring expected to report seqno	995	* @ring: the ring expected to report seqno
@@ -1002,7 +1010,9 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
1002	*/	1010	*/
1003	static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,	1011	static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1004	unsigned reset_counter,	1012	unsigned reset_counter,
1005	bool interruptible, struct timespec *timeout)	1013	bool interruptible,
		1014	struct timespec *timeout,
		1015	struct drm_i915_file_private *file_priv)
1006	{	1016	{
1007	drm_i915_private_t *dev_priv = ring->dev->dev_private;	1017	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1008	struct timespec before, now;	1018	struct timespec before, now;
@@ -1017,6 +1027,14 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1017		1027
1018	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;	1028	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
1019		1029
		1030	if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
		1031	gen6_rps_boost(dev_priv);
		1032	if (file_priv)
		1033	mod_delayed_work(dev_priv->wq,
		1034	&file_priv->mm.idle_work,
		1035	msecs_to_jiffies(100));
		1036	}
		1037
1020	if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) &&	1038	if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)) &&
1021	WARN_ON(!ring->irq_get(ring)))	1039	WARN_ON(!ring->irq_get(ring)))
1022	return -ENODEV;	1040	return -ENODEV;
@@ -1116,7 +1134,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1116		1134
1117	return __wait_seqno(ring, seqno,	1135	return __wait_seqno(ring, seqno,
1118	atomic_read(&dev_priv->gpu_error.reset_counter),	1136	atomic_read(&dev_priv->gpu_error.reset_counter),
1119	interruptible, NULL);	1137	interruptible, NULL, NULL);
1120	}	1138	}
1121		1139
1122	static int	1140	static int
@@ -1166,6 +1184,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1166	*/	1184	*/
1167	static __must_check int	1185	static __must_check int
1168	i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,	1186	i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
		1187	struct drm_file *file,
1169	bool readonly)	1188	bool readonly)
1170	{	1189	{
1171	struct drm_device *dev = obj->base.dev;	1190	struct drm_device *dev = obj->base.dev;
@@ -1192,7 +1211,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1192		1211
1193	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);	1212	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
1194	mutex_unlock(&dev->struct_mutex);	1213	mutex_unlock(&dev->struct_mutex);
1195	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);	1214	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file->driver_priv);
1196	mutex_lock(&dev->struct_mutex);	1215	mutex_lock(&dev->struct_mutex);
1197	if (ret)	1216	if (ret)
1198	return ret;	1217	return ret;
@@ -1241,7 +1260,7 @@ i915_gem_set_domain_ioctl(struct drm_device dev, void data,
1241	* We will repeat the flush holding the lock in the normal manner	1260	* We will repeat the flush holding the lock in the normal manner
1242	* to catch cases where we are gazumped.	1261	* to catch cases where we are gazumped.
1243	*/	1262	*/
1244	ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);	1263	ret = i915_gem_object_wait_rendering__nonblocking(obj, file, !write_domain);
1245	if (ret)	1264	if (ret)
1246	goto unref;	1265	goto unref;
1247		1266
@@ -2162,6 +2181,7 @@ int __i915_add_request(struct intel_ring_buffer *ring,
2162	i915_queue_hangcheck(ring->dev);	2181	i915_queue_hangcheck(ring->dev);
2163		2182
2164	if (was_empty) {	2183	if (was_empty) {
		2184	cancel_delayed_work_sync(&dev_priv->mm.idle_work);
2165	queue_delayed_work(dev_priv->wq,	2185	queue_delayed_work(dev_priv->wq,
2166	&dev_priv->mm.retire_work,	2186	&dev_priv->mm.retire_work,
2167	round_jiffies_up_relative(HZ));	2187	round_jiffies_up_relative(HZ));
@@ -2183,10 +2203,8 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2183	return;	2203	return;
2184		2204
2185	spin_lock(&file_priv->mm.lock);	2205	spin_lock(&file_priv->mm.lock);
2186	if (request->file_priv) {	2206	list_del(&request->client_list);
2187	list_del(&request->client_list);	2207	request->file_priv = NULL;
2188	request->file_priv = NULL;
2189	}
2190	spin_unlock(&file_priv->mm.lock);	2208	spin_unlock(&file_priv->mm.lock);
2191	}	2209	}
2192		2210
@@ -2450,57 +2468,53 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2450	WARN_ON(i915_verify_lists(ring->dev));	2468	WARN_ON(i915_verify_lists(ring->dev));
2451	}	2469	}
2452		2470
2453	void	2471	bool
2454	i915_gem_retire_requests(struct drm_device *dev)	2472	i915_gem_retire_requests(struct drm_device *dev)
2455	{	2473	{
2456	drm_i915_private_t *dev_priv = dev->dev_private;	2474	drm_i915_private_t *dev_priv = dev->dev_private;
2457	struct intel_ring_buffer *ring;	2475	struct intel_ring_buffer *ring;
		2476	bool idle = true;
2458	int i;	2477	int i;
2459		2478
2460	for_each_ring(ring, dev_priv, i)	2479	for_each_ring(ring, dev_priv, i) {
2461	i915_gem_retire_requests_ring(ring);	2480	i915_gem_retire_requests_ring(ring);
		2481	idle &= list_empty(&ring->request_list);
		2482	}
		2483
		2484	if (idle)
		2485	mod_delayed_work(dev_priv->wq,
		2486	&dev_priv->mm.idle_work,
		2487	msecs_to_jiffies(100));
		2488
		2489	return idle;
2462	}	2490	}
2463		2491
2464	static void	2492	static void
2465	i915_gem_retire_work_handler(struct work_struct *work)	2493	i915_gem_retire_work_handler(struct work_struct *work)
2466	{	2494	{
2467	drm_i915_private_t *dev_priv;	2495	struct drm_i915_private *dev_priv =
2468	struct drm_device *dev;	2496	container_of(work, typeof(*dev_priv), mm.retire_work.work);
2469	struct intel_ring_buffer *ring;	2497	struct drm_device *dev = dev_priv->dev;
2470	bool idle;	2498	bool idle;
2471	int i;
2472
2473	dev_priv = container_of(work, drm_i915_private_t,
2474	mm.retire_work.work);
2475	dev = dev_priv->dev;
2476		2499
2477	/* Come back later if the device is busy... */	2500	/* Come back later if the device is busy... */
2478	if (!mutex_trylock(&dev->struct_mutex)) {	2501	idle = false;
2479	queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,	2502	if (mutex_trylock(&dev->struct_mutex)) {
2480	round_jiffies_up_relative(HZ));	2503	idle = i915_gem_retire_requests(dev);
2481	return;	2504	mutex_unlock(&dev->struct_mutex);
2482	}
2483
2484	i915_gem_retire_requests(dev);
2485
2486	/* Send a periodic flush down the ring so we don't hold onto GEM
2487	* objects indefinitely.
2488	*/
2489	idle = true;
2490	for_each_ring(ring, dev_priv, i) {
2491	if (ring->gpu_caches_dirty)
2492	i915_add_request(ring, NULL);
2493
2494	idle &= list_empty(&ring->request_list);
2495	}	2505	}
2496		2506	if (!idle)
2497	if (!dev_priv->ums.mm_suspended && !idle)
2498	queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,	2507	queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2499	round_jiffies_up_relative(HZ));	2508	round_jiffies_up_relative(HZ));
2500	if (idle)	2509	}
2501	intel_mark_idle(dev);
2502		2510
2503	mutex_unlock(&dev->struct_mutex);	2511	static void
		2512	i915_gem_idle_work_handler(struct work_struct *work)
		2513	{
		2514	struct drm_i915_private *dev_priv =
		2515	container_of(work, typeof(*dev_priv), mm.idle_work.work);
		2516
		2517	intel_mark_idle(dev_priv->dev);
2504	}	2518	}
2505		2519
2506	/**	2520	/**
@@ -2598,7 +2612,7 @@ i915_gem_wait_ioctl(struct drm_device dev, void data, struct drm_file *file)
2598	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);	2612	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2599	mutex_unlock(&dev->struct_mutex);	2613	mutex_unlock(&dev->struct_mutex);
2600		2614
2601	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout);	2615	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
2602	if (timeout)	2616	if (timeout)
2603	args->timeout_ns = timespec_to_ns(timeout);	2617	args->timeout_ns = timespec_to_ns(timeout);
2604	return ret;	2618	return ret;
@@ -3809,7 +3823,7 @@ i915_gem_ring_throttle(struct drm_device dev, struct drm_file file)
3809	if (seqno == 0)	3823	if (seqno == 0)
3810	return 0;	3824	return 0;
3811		3825
3812	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL);	3826	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
3813	if (ret == 0)	3827	if (ret == 0)
3814	queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);	3828	queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3815		3829
@@ -4279,6 +4293,7 @@ i915_gem_idle(struct drm_device *dev)
4279		4293
4280	/* Cancel the retire work handler, which should be idle now. */	4294	/* Cancel the retire work handler, which should be idle now. */
4281	cancel_delayed_work_sync(&dev_priv->mm.retire_work);	4295	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
		4296	cancel_delayed_work_sync(&dev_priv->mm.idle_work);
4282		4297
4283	return 0;	4298	return 0;
4284	}	4299	}
@@ -4612,6 +4627,8 @@ i915_gem_load(struct drm_device *dev)
4612	INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);	4627	INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4613	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,	4628	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4614	i915_gem_retire_work_handler);	4629	i915_gem_retire_work_handler);
		4630	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
		4631	i915_gem_idle_work_handler);
4615	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);	4632	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4616		4633
4617	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */	4634	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
@@ -4836,6 +4853,8 @@ void i915_gem_release(struct drm_device dev, struct drm_file file)
4836	{	4853	{
4837	struct drm_i915_file_private *file_priv = file->driver_priv;	4854	struct drm_i915_file_private *file_priv = file->driver_priv;
4838		4855
		4856	cancel_delayed_work_sync(&file_priv->mm.idle_work);
		4857
4839	/* Clean up our request list when the client is going away, so that	4858	/* Clean up our request list when the client is going away, so that
4840	* later retire_requests won't dereference our soon-to-be-gone	4859	* later retire_requests won't dereference our soon-to-be-gone
4841	* file_priv.	4860	* file_priv.
@@ -4853,6 +4872,38 @@ void i915_gem_release(struct drm_device dev, struct drm_file file)
4853	spin_unlock(&file_priv->mm.lock);	4872	spin_unlock(&file_priv->mm.lock);
4854	}	4873	}
4855		4874
		4875	static void
		4876	i915_gem_file_idle_work_handler(struct work_struct *work)
		4877	{
		4878	struct drm_i915_file_private *file_priv =
		4879	container_of(work, typeof(*file_priv), mm.idle_work.work);
		4880
		4881	atomic_set(&file_priv->rps_wait_boost, false);
		4882	}
		4883
		4884	int i915_gem_open(struct drm_device dev, struct drm_file file)
		4885	{
		4886	struct drm_i915_file_private *file_priv;
		4887
		4888	DRM_DEBUG_DRIVER("\n");
		4889
		4890	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
		4891	if (!file_priv)
		4892	return -ENOMEM;
		4893
		4894	file->driver_priv = file_priv;
		4895	file_priv->dev_priv = dev->dev_private;
		4896
		4897	spin_lock_init(&file_priv->mm.lock);
		4898	INIT_LIST_HEAD(&file_priv->mm.request_list);
		4899	INIT_DELAYED_WORK(&file_priv->mm.idle_work,
		4900	i915_gem_file_idle_work_handler);
		4901
		4902	idr_init(&file_priv->context_idr);
		4903
		4904	return 0;
		4905	}
		4906
4856	static bool mutex_is_locked_by(struct mutex mutex, struct task_struct task)	4907	static bool mutex_is_locked_by(struct mutex mutex, struct task_struct task)
4857	{	4908	{
4858	if (!mutex_is_locked(mutex))	4909	if (!mutex_is_locked(mutex))