aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-07-06 07:39:02 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2016-07-06 07:47:39 -0400
commitaca34b6e1ca909ba15f6c1c1dc89bea8f455403f (patch)
treea2951a0be74a87f21e6170674dc39f4dbc301a2b
parent99fe4a5f7383559ee120492124365e708a587130 (diff)
drm/i915: Group the irq breadcrumb variables into the same cacheline
As we inspect both the tasklet (to check for an active bottom-half) and set the irq-posted flag at the same time (both in the interrupt handler and then in the bottom-halt), group those two together into the same cacheline. (Not having total control over placement of the struct means we can't guarantee the cacheline boundary, we need to align the kmalloc and then each struct, but the grouping should help.) v2: Try a couple of different names for the state touched by the user interrupt handler. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1467805142-22219-3-git-send-email-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c8
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c12
-rw-r--r--drivers/gpu/drm/i915/intel_breadcrumbs.c28
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h18
5 files changed, 37 insertions, 35 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a59e0caeda64..3d05cae0e92d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -793,8 +793,8 @@ static void i915_ring_seqno_info(struct seq_file *m,
793 793
794 seq_printf(m, "Current sequence (%s): %x\n", 794 seq_printf(m, "Current sequence (%s): %x\n",
795 engine->name, intel_engine_get_seqno(engine)); 795 engine->name, intel_engine_get_seqno(engine));
796 seq_printf(m, "Current user interrupts (%s): %x\n", 796 seq_printf(m, "Current user interrupts (%s): %lx\n",
797 engine->name, READ_ONCE(engine->user_interrupts)); 797 engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
798 798
799 spin_lock(&b->lock); 799 spin_lock(&b->lock);
800 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { 800 for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1442,9 +1442,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
1442 engine->last_submitted_seqno); 1442 engine->last_submitted_seqno);
1443 seq_printf(m, "\twaiters? %d\n", 1443 seq_printf(m, "\twaiters? %d\n",
1444 intel_engine_has_waiter(engine)); 1444 intel_engine_has_waiter(engine));
1445 seq_printf(m, "\tuser interrupts = %x [current %x]\n", 1445 seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
1446 engine->hangcheck.user_interrupts, 1446 engine->hangcheck.user_interrupts,
1447 READ_ONCE(engine->user_interrupts)); 1447 READ_ONCE(engine->breadcrumbs.irq_wakeups));
1448 seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", 1448 seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
1449 (long long)engine->hangcheck.acthd, 1449 (long long)engine->hangcheck.acthd,
1450 (long long)acthd[id]); 1450 (long long)acthd[id]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11e9769411e9..d2c60992a508 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3998,8 +3998,8 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3998 * is woken. 3998 * is woken.
3999 */ 3999 */
4000 if (engine->irq_seqno_barrier && 4000 if (engine->irq_seqno_barrier &&
4001 READ_ONCE(engine->breadcrumbs.tasklet) == current && 4001 READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
4002 cmpxchg_relaxed(&engine->irq_posted, 1, 0)) { 4002 cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
4003 struct task_struct *tsk; 4003 struct task_struct *tsk;
4004 4004
4005 /* The ordering of irq_posted versus applying the barrier 4005 /* The ordering of irq_posted versus applying the barrier
@@ -4023,7 +4023,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
4023 * irq_posted == false but we are still running). 4023 * irq_posted == false but we are still running).
4024 */ 4024 */
4025 rcu_read_lock(); 4025 rcu_read_lock();
4026 tsk = READ_ONCE(engine->breadcrumbs.tasklet); 4026 tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
4027 if (tsk && tsk != current) 4027 if (tsk && tsk != current)
4028 /* Note that if the bottom-half is changed as we 4028 /* Note that if the bottom-half is changed as we
4029 * are sending the wake-up, the new bottom-half will 4029 * are sending the wake-up, the new bottom-half will
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b77d808b71cd..a69a5fc75bd6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -977,10 +977,10 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
977 977
978static void notify_ring(struct intel_engine_cs *engine) 978static void notify_ring(struct intel_engine_cs *engine)
979{ 979{
980 smp_store_mb(engine->irq_posted, true); 980 smp_store_mb(engine->breadcrumbs.irq_posted, true);
981 if (intel_engine_wakeup(engine)) { 981 if (intel_engine_wakeup(engine)) {
982 trace_i915_gem_request_notify(engine); 982 trace_i915_gem_request_notify(engine);
983 engine->user_interrupts++; 983 engine->breadcrumbs.irq_wakeups++;
984 } 984 }
985} 985}
986 986
@@ -3054,12 +3054,12 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
3054 return HANGCHECK_HUNG; 3054 return HANGCHECK_HUNG;
3055} 3055}
3056 3056
3057static unsigned kick_waiters(struct intel_engine_cs *engine) 3057static unsigned long kick_waiters(struct intel_engine_cs *engine)
3058{ 3058{
3059 struct drm_i915_private *i915 = engine->i915; 3059 struct drm_i915_private *i915 = engine->i915;
3060 unsigned user_interrupts = READ_ONCE(engine->user_interrupts); 3060 unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
3061 3061
3062 if (engine->hangcheck.user_interrupts == user_interrupts && 3062 if (engine->hangcheck.user_interrupts == irq_count &&
3063 !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { 3063 !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
3064 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) 3064 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
3065 DRM_ERROR("Hangcheck timer elapsed... %s idle\n", 3065 DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
@@ -3068,7 +3068,7 @@ static unsigned kick_waiters(struct intel_engine_cs *engine)
3068 intel_engine_enable_fake_irq(engine); 3068 intel_engine_enable_fake_irq(engine);
3069 } 3069 }
3070 3070
3071 return user_interrupts; 3071 return irq_count;
3072} 3072}
3073/* 3073/*
3074 * This is called when the chip hasn't reported back with completed 3074 * This is called when the chip hasn't reported back with completed
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 6fcbb52e50fb..d89b2c963618 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -49,7 +49,7 @@ static void irq_enable(struct intel_engine_cs *engine)
49 * we still need to force the barrier before reading the seqno, 49 * we still need to force the barrier before reading the seqno,
50 * just in case. 50 * just in case.
51 */ 51 */
52 engine->irq_posted = true; 52 engine->breadcrumbs.irq_posted = true;
53 53
54 spin_lock_irq(&engine->i915->irq_lock); 54 spin_lock_irq(&engine->i915->irq_lock);
55 engine->irq_enable(engine); 55 engine->irq_enable(engine);
@@ -62,7 +62,7 @@ static void irq_disable(struct intel_engine_cs *engine)
62 engine->irq_disable(engine); 62 engine->irq_disable(engine);
63 spin_unlock_irq(&engine->i915->irq_lock); 63 spin_unlock_irq(&engine->i915->irq_lock);
64 64
65 engine->irq_posted = false; 65 engine->breadcrumbs.irq_posted = false;
66} 66}
67 67
68static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) 68static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
@@ -195,7 +195,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
195 } 195 }
196 rb_link_node(&wait->node, parent, p); 196 rb_link_node(&wait->node, parent, p);
197 rb_insert_color(&wait->node, &b->waiters); 197 rb_insert_color(&wait->node, &b->waiters);
198 GEM_BUG_ON(!first && !b->tasklet); 198 GEM_BUG_ON(!first && !b->irq_seqno_bh);
199 199
200 if (completed) { 200 if (completed) {
201 struct rb_node *next = rb_next(completed); 201 struct rb_node *next = rb_next(completed);
@@ -204,7 +204,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
204 if (next && next != &wait->node) { 204 if (next && next != &wait->node) {
205 GEM_BUG_ON(first); 205 GEM_BUG_ON(first);
206 b->first_wait = to_wait(next); 206 b->first_wait = to_wait(next);
207 smp_store_mb(b->tasklet, b->first_wait->tsk); 207 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
208 /* As there is a delay between reading the current 208 /* As there is a delay between reading the current
209 * seqno, processing the completed tasks and selecting 209 * seqno, processing the completed tasks and selecting
210 * the next waiter, we may have missed the interrupt 210 * the next waiter, we may have missed the interrupt
@@ -216,7 +216,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
216 * in case the seqno passed. 216 * in case the seqno passed.
217 */ 217 */
218 __intel_breadcrumbs_enable_irq(b); 218 __intel_breadcrumbs_enable_irq(b);
219 if (READ_ONCE(engine->irq_posted)) 219 if (READ_ONCE(b->irq_posted))
220 wake_up_process(to_wait(next)->tsk); 220 wake_up_process(to_wait(next)->tsk);
221 } 221 }
222 222
@@ -230,18 +230,18 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
230 if (first) { 230 if (first) {
231 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); 231 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
232 b->first_wait = wait; 232 b->first_wait = wait;
233 smp_store_mb(b->tasklet, wait->tsk); 233 smp_store_mb(b->irq_seqno_bh, wait->tsk);
234 /* After assigning ourselves as the new bottom-half, we must 234 /* After assigning ourselves as the new bottom-half, we must
235 * perform a cursory check to prevent a missed interrupt. 235 * perform a cursory check to prevent a missed interrupt.
236 * Either we miss the interrupt whilst programming the hardware, 236 * Either we miss the interrupt whilst programming the hardware,
237 * or if there was a previous waiter (for a later seqno) they 237 * or if there was a previous waiter (for a later seqno) they
238 * may be woken instead of us (due to the inherent race 238 * may be woken instead of us (due to the inherent race
239 * in the unlocked read of b->tasklet in the irq handler) and 239 * in the unlocked read of b->irq_seqno_bh in the irq handler)
240 * so we miss the wake up. 240 * and so we miss the wake up.
241 */ 241 */
242 __intel_breadcrumbs_enable_irq(b); 242 __intel_breadcrumbs_enable_irq(b);
243 } 243 }
244 GEM_BUG_ON(!b->tasklet); 244 GEM_BUG_ON(!b->irq_seqno_bh);
245 GEM_BUG_ON(!b->first_wait); 245 GEM_BUG_ON(!b->first_wait);
246 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); 246 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
247 247
@@ -301,7 +301,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
301 const int priority = wakeup_priority(b, wait->tsk); 301 const int priority = wakeup_priority(b, wait->tsk);
302 struct rb_node *next; 302 struct rb_node *next;
303 303
304 GEM_BUG_ON(b->tasklet != wait->tsk); 304 GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
305 305
306 /* We are the current bottom-half. Find the next candidate, 306 /* We are the current bottom-half. Find the next candidate,
307 * the first waiter in the queue on the remaining oldest 307 * the first waiter in the queue on the remaining oldest
@@ -344,13 +344,13 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
344 * exception rather than a seqno completion. 344 * exception rather than a seqno completion.
345 */ 345 */
346 b->first_wait = to_wait(next); 346 b->first_wait = to_wait(next);
347 smp_store_mb(b->tasklet, b->first_wait->tsk); 347 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
348 if (b->first_wait->seqno != wait->seqno) 348 if (b->first_wait->seqno != wait->seqno)
349 __intel_breadcrumbs_enable_irq(b); 349 __intel_breadcrumbs_enable_irq(b);
350 wake_up_process(b->tasklet); 350 wake_up_process(b->irq_seqno_bh);
351 } else { 351 } else {
352 b->first_wait = NULL; 352 b->first_wait = NULL;
353 WRITE_ONCE(b->tasklet, NULL); 353 WRITE_ONCE(b->irq_seqno_bh, NULL);
354 __intel_breadcrumbs_disable_irq(b); 354 __intel_breadcrumbs_disable_irq(b);
355 } 355 }
356 } else { 356 } else {
@@ -364,7 +364,7 @@ out_unlock:
364 GEM_BUG_ON(b->first_wait == wait); 364 GEM_BUG_ON(b->first_wait == wait);
365 GEM_BUG_ON(rb_first(&b->waiters) != 365 GEM_BUG_ON(rb_first(&b->waiters) !=
366 (b->first_wait ? &b->first_wait->node : NULL)); 366 (b->first_wait ? &b->first_wait->node : NULL));
367 GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters)); 367 GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
368 spin_unlock(&b->lock); 368 spin_unlock(&b->lock);
369} 369}
370 370
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 121294c602c3..12cb7ed90014 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -74,8 +74,8 @@ enum intel_ring_hangcheck_action {
74 74
75struct intel_ring_hangcheck { 75struct intel_ring_hangcheck {
76 u64 acthd; 76 u64 acthd;
77 unsigned long user_interrupts;
77 u32 seqno; 78 u32 seqno;
78 unsigned user_interrupts;
79 int score; 79 int score;
80 enum intel_ring_hangcheck_action action; 80 enum intel_ring_hangcheck_action action;
81 int deadlock; 81 int deadlock;
@@ -167,16 +167,20 @@ struct intel_engine_cs {
167 * the overhead of waking that client is much preferred. 167 * the overhead of waking that client is much preferred.
168 */ 168 */
169 struct intel_breadcrumbs { 169 struct intel_breadcrumbs {
170 struct task_struct *irq_seqno_bh; /* bh for user interrupts */
171 unsigned long irq_wakeups;
172 bool irq_posted;
173
170 spinlock_t lock; /* protects the lists of requests */ 174 spinlock_t lock; /* protects the lists of requests */
171 struct rb_root waiters; /* sorted by retirement, priority */ 175 struct rb_root waiters; /* sorted by retirement, priority */
172 struct rb_root signals; /* sorted by retirement */ 176 struct rb_root signals; /* sorted by retirement */
173 struct intel_wait *first_wait; /* oldest waiter by retirement */ 177 struct intel_wait *first_wait; /* oldest waiter by retirement */
174 struct task_struct *tasklet; /* bh for user interrupts */
175 struct task_struct *signaler; /* used for fence signalling */ 178 struct task_struct *signaler; /* used for fence signalling */
176 struct drm_i915_gem_request *first_signal; 179 struct drm_i915_gem_request *first_signal;
177 struct timer_list fake_irq; /* used after a missed interrupt */ 180 struct timer_list fake_irq; /* used after a missed interrupt */
178 bool irq_enabled; 181
179 bool rpm_wakelock; 182 bool irq_enabled : 1;
183 bool rpm_wakelock : 1;
180 } breadcrumbs; 184 } breadcrumbs;
181 185
182 /* 186 /*
@@ -189,7 +193,6 @@ struct intel_engine_cs {
189 struct intel_hw_status_page status_page; 193 struct intel_hw_status_page status_page;
190 struct i915_ctx_workarounds wa_ctx; 194 struct i915_ctx_workarounds wa_ctx;
191 195
192 bool irq_posted;
193 u32 irq_keep_mask; /* always keep these interrupts */ 196 u32 irq_keep_mask; /* always keep these interrupts */
194 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 197 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
195 void (*irq_enable)(struct intel_engine_cs *ring); 198 void (*irq_enable)(struct intel_engine_cs *ring);
@@ -319,7 +322,6 @@ struct intel_engine_cs {
319 * inspecting request list. 322 * inspecting request list.
320 */ 323 */
321 u32 last_submitted_seqno; 324 u32 last_submitted_seqno;
322 unsigned user_interrupts;
323 325
324 bool gpu_caches_dirty; 326 bool gpu_caches_dirty;
325 327
@@ -543,13 +545,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
543 545
544static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine) 546static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
545{ 547{
546 return READ_ONCE(engine->breadcrumbs.tasklet); 548 return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
547} 549}
548 550
549static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) 551static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
550{ 552{
551 bool wakeup = false; 553 bool wakeup = false;
552 struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.tasklet); 554 struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
553 /* Note that for this not to dangerously chase a dangling pointer, 555 /* Note that for this not to dangerously chase a dangling pointer,
554 * the caller is responsible for ensure that the task remain valid for 556 * the caller is responsible for ensure that the task remain valid for
555 * wake_up_process() i.e. that the RCU grace period cannot expire. 557 * wake_up_process() i.e. that the RCU grace period cannot expire.