aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-02-13 04:57:45 -0500
committerRodrigo Vivi <rodrigo.vivi@intel.com>2018-02-13 19:55:59 -0500
commitd3f84c8b097001e3f31f584b793493cb0033a7ae (patch)
tree564a20b23eb25112a0ed95be0eecbe0a9eb8ea00
parentedb76b01ac1629bfe17158bea56fcc16bfb57854 (diff)
drm/i915/pmu: Fix PMU enable vs execlists tasklet race
Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats") added a tasklet_disable call in busy stats enabling, but we failed to understand that the PMU enable callback runs as an hard IRQ (IPI). Consequence of this is that the PMU enable callback can interrupt the execlists tasklet, and will then deadlock when it calls intel_engine_stats_enable->tasklet_disable. To fix this, I realized it is possible to move the engine stats enablement and disablement to PMU event init and destroy hooks. This allows for much simpler implementation since those hooks run in normal context (can sleep). v2: Extract engine_event_destroy. (Chris Wilson) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats") Testcase: igt/perf_pmu/enable-race-* Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20180205093448.13877-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit b2f78cda260bc6a1a2d382b1d85a29e69b5b3724) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180213095747.2424-2-tvrtko.ursulin@linux.intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c125
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h14
2 files changed, 52 insertions, 87 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 55a8a1e29424..337eaa6ede52 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,26 +285,41 @@ static u64 count_interrupts(struct drm_i915_private *i915)
285 return sum; 285 return sum;
286} 286}
287 287
288static void i915_pmu_event_destroy(struct perf_event *event) 288static void engine_event_destroy(struct perf_event *event)
289{ 289{
290 WARN_ON(event->parent); 290 struct drm_i915_private *i915 =
291 container_of(event->pmu, typeof(*i915), pmu.base);
292 struct intel_engine_cs *engine;
293
294 engine = intel_engine_lookup_user(i915,
295 engine_event_class(event),
296 engine_event_instance(event));
297 if (WARN_ON_ONCE(!engine))
298 return;
299
300 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
301 intel_engine_supports_stats(engine))
302 intel_disable_engine_stats(engine);
291} 303}
292 304
293static int engine_event_init(struct perf_event *event) 305static void i915_pmu_event_destroy(struct perf_event *event)
294{ 306{
295 struct drm_i915_private *i915 = 307 WARN_ON(event->parent);
296 container_of(event->pmu, typeof(*i915), pmu.base);
297 308
298 if (!intel_engine_lookup_user(i915, engine_event_class(event), 309 if (is_engine_event(event))
299 engine_event_instance(event))) 310 engine_event_destroy(event);
300 return -ENODEV; 311}
301 312
302 switch (engine_event_sample(event)) { 313static int
314engine_event_status(struct intel_engine_cs *engine,
315 enum drm_i915_pmu_engine_sample sample)
316{
317 switch (sample) {
303 case I915_SAMPLE_BUSY: 318 case I915_SAMPLE_BUSY:
304 case I915_SAMPLE_WAIT: 319 case I915_SAMPLE_WAIT:
305 break; 320 break;
306 case I915_SAMPLE_SEMA: 321 case I915_SAMPLE_SEMA:
307 if (INTEL_GEN(i915) < 6) 322 if (INTEL_GEN(engine->i915) < 6)
308 return -ENODEV; 323 return -ENODEV;
309 break; 324 break;
310 default: 325 default:
@@ -314,6 +329,30 @@ static int engine_event_init(struct perf_event *event)
314 return 0; 329 return 0;
315} 330}
316 331
332static int engine_event_init(struct perf_event *event)
333{
334 struct drm_i915_private *i915 =
335 container_of(event->pmu, typeof(*i915), pmu.base);
336 struct intel_engine_cs *engine;
337 u8 sample;
338 int ret;
339
340 engine = intel_engine_lookup_user(i915, engine_event_class(event),
341 engine_event_instance(event));
342 if (!engine)
343 return -ENODEV;
344
345 sample = engine_event_sample(event);
346 ret = engine_event_status(engine, sample);
347 if (ret)
348 return ret;
349
350 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
351 ret = intel_enable_engine_stats(engine);
352
353 return ret;
354}
355
317static int i915_pmu_event_init(struct perf_event *event) 356static int i915_pmu_event_init(struct perf_event *event)
318{ 357{
319 struct drm_i915_private *i915 = 358 struct drm_i915_private *i915 =
@@ -387,7 +426,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
387 if (WARN_ON_ONCE(!engine)) { 426 if (WARN_ON_ONCE(!engine)) {
388 /* Do nothing */ 427 /* Do nothing */
389 } else if (sample == I915_SAMPLE_BUSY && 428 } else if (sample == I915_SAMPLE_BUSY &&
390 engine->pmu.busy_stats) { 429 intel_engine_supports_stats(engine)) {
391 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 430 val = ktime_to_ns(intel_engine_get_busy_time(engine));
392 } else { 431 } else {
393 val = engine->pmu.sample[sample].cur; 432 val = engine->pmu.sample[sample].cur;
@@ -442,12 +481,6 @@ again:
442 local64_add(new - prev, &event->count); 481 local64_add(new - prev, &event->count);
443} 482}
444 483
445static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
446{
447 return intel_engine_supports_stats(engine) &&
448 (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
449}
450
451static void i915_pmu_enable(struct perf_event *event) 484static void i915_pmu_enable(struct perf_event *event)
452{ 485{
453 struct drm_i915_private *i915 = 486 struct drm_i915_private *i915 =
@@ -487,21 +520,7 @@ static void i915_pmu_enable(struct perf_event *event)
487 520
488 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 521 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
489 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 522 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
490 if (engine->pmu.enable_count[sample]++ == 0) { 523 engine->pmu.enable_count[sample]++;
491 /*
492 * Enable engine busy stats tracking if needed or
493 * alternatively cancel the scheduled disable.
494 *
495 * If the delayed disable was pending, cancel it and
496 * in this case do not enable since it already is.
497 */
498 if (engine_needs_busy_stats(engine) &&
499 !engine->pmu.busy_stats) {
500 engine->pmu.busy_stats = true;
501 if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
502 intel_enable_engine_stats(engine);
503 }
504 }
505 } 524 }
506 525
507 /* 526 /*
@@ -514,14 +533,6 @@ static void i915_pmu_enable(struct perf_event *event)
514 spin_unlock_irqrestore(&i915->pmu.lock, flags); 533 spin_unlock_irqrestore(&i915->pmu.lock, flags);
515} 534}
516 535
517static void __disable_busy_stats(struct work_struct *work)
518{
519 struct intel_engine_cs *engine =
520 container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
521
522 intel_disable_engine_stats(engine);
523}
524
525static void i915_pmu_disable(struct perf_event *event) 536static void i915_pmu_disable(struct perf_event *event)
526{ 537{
527 struct drm_i915_private *i915 = 538 struct drm_i915_private *i915 =
@@ -545,26 +556,8 @@ static void i915_pmu_disable(struct perf_event *event)
545 * Decrement the reference count and clear the enabled 556 * Decrement the reference count and clear the enabled
546 * bitmask when the last listener on an event goes away. 557 * bitmask when the last listener on an event goes away.
547 */ 558 */
548 if (--engine->pmu.enable_count[sample] == 0) { 559 if (--engine->pmu.enable_count[sample] == 0)
549 engine->pmu.enable &= ~BIT(sample); 560 engine->pmu.enable &= ~BIT(sample);
550 if (!engine_needs_busy_stats(engine) &&
551 engine->pmu.busy_stats) {
552 engine->pmu.busy_stats = false;
553 /*
554 * We request a delayed disable to handle the
555 * rapid on/off cycles on events, which can
556 * happen when tools like perf stat start, in a
557 * nicer way.
558 *
559 * In addition, this also helps with busy stats
560 * accuracy with background CPU offline/online
561 * migration events.
562 */
563 queue_delayed_work(system_wq,
564 &engine->pmu.disable_busy_stats,
565 round_jiffies_up_relative(HZ));
566 }
567 }
568 } 561 }
569 562
570 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 563 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -797,8 +790,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
797 790
798void i915_pmu_register(struct drm_i915_private *i915) 791void i915_pmu_register(struct drm_i915_private *i915)
799{ 792{
800 struct intel_engine_cs *engine;
801 enum intel_engine_id id;
802 int ret; 793 int ret;
803 794
804 if (INTEL_GEN(i915) <= 2) { 795 if (INTEL_GEN(i915) <= 2) {
@@ -820,10 +811,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
820 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 811 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
821 i915->pmu.timer.function = i915_sample; 812 i915->pmu.timer.function = i915_sample;
822 813
823 for_each_engine(engine, i915, id)
824 INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
825 __disable_busy_stats);
826
827 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 814 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
828 if (ret) 815 if (ret)
829 goto err; 816 goto err;
@@ -843,9 +830,6 @@ err:
843 830
844void i915_pmu_unregister(struct drm_i915_private *i915) 831void i915_pmu_unregister(struct drm_i915_private *i915)
845{ 832{
846 struct intel_engine_cs *engine;
847 enum intel_engine_id id;
848
849 if (!i915->pmu.base.event_init) 833 if (!i915->pmu.base.event_init)
850 return; 834 return;
851 835
@@ -853,11 +837,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
853 837
854 hrtimer_cancel(&i915->pmu.timer); 838 hrtimer_cancel(&i915->pmu.timer);
855 839
856 for_each_engine(engine, i915, id) {
857 GEM_BUG_ON(engine->pmu.busy_stats);
858 flush_delayed_work(&engine->pmu.disable_busy_stats);
859 }
860
861 i915_pmu_unregister_cpuhp_state(i915); 840 i915_pmu_unregister_cpuhp_state(i915);
862 841
863 perf_pmu_unregister(&i915->pmu.base); 842 perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
366 */ 366 */
367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) 367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; 368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
369 /**
370 * @busy_stats: Has enablement of engine stats tracking been
371 * requested.
372 */
373 bool busy_stats;
374 /**
375 * @disable_busy_stats: Work item for busy stats disabling.
376 *
377 * Same as with @enable_busy_stats action, with the difference
378 * that we delay it in case there are rapid enable-disable
379 * actions, which can happen during tool startup (like perf
380 * stat).
381 */
382 struct delayed_work disable_busy_stats;
383 } pmu; 369 } pmu;
384 370
385 /* 371 /*