aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-02-05 04:34:48 -0500
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-02-06 06:21:27 -0500
commitb2f78cda260bc6a1a2d382b1d85a29e69b5b3724 (patch)
tree52b5efe9189d6a8c34eee8dbb8b813550bba0e39
parent4b6ce6810a5dc0af387a238e8c852e0d3822381f (diff)
drm/i915/pmu: Fix PMU enable vs execlists tasklet race
Commit 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats") added a tasklet_disable call in busy stats enabling, but we failed to understand that the PMU enable callback runs as an hard IRQ (IPI). Consequence of this is that the PMU enable callback can interrupt the execlists tasklet, and will then deadlock when it calls intel_engine_stats_enable->tasklet_disable. To fix this, I realized it is possible to move the engine stats enablement and disablement to PMU event init and destroy hooks. This allows for much simpler implementation since those hooks run in normal context (can sleep). v2: Extract engine_event_destroy. (Chris Wilson) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Fixes: 99e48bf98dd0 ("drm/i915: Lock out execlist tasklet while peeking inside for busy-stats") Testcase: igt/perf_pmu/enable-race-* Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: intel-gfx@lists.freedesktop.org Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20180205093448.13877-1-tvrtko.ursulin@linux.intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c98
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h14
2 files changed, 34 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ecb0198bfb7a..1c440460255d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -285,9 +285,29 @@ static u64 count_interrupts(struct drm_i915_private *i915)
285 return sum; 285 return sum;
286} 286}
287 287
288static void engine_event_destroy(struct perf_event *event)
289{
290 struct drm_i915_private *i915 =
291 container_of(event->pmu, typeof(*i915), pmu.base);
292 struct intel_engine_cs *engine;
293
294 engine = intel_engine_lookup_user(i915,
295 engine_event_class(event),
296 engine_event_instance(event));
297 if (WARN_ON_ONCE(!engine))
298 return;
299
300 if (engine_event_sample(event) == I915_SAMPLE_BUSY &&
301 intel_engine_supports_stats(engine))
302 intel_disable_engine_stats(engine);
303}
304
288static void i915_pmu_event_destroy(struct perf_event *event) 305static void i915_pmu_event_destroy(struct perf_event *event)
289{ 306{
290 WARN_ON(event->parent); 307 WARN_ON(event->parent);
308
309 if (is_engine_event(event))
310 engine_event_destroy(event);
291} 311}
292 312
293static int 313static int
@@ -340,13 +360,23 @@ static int engine_event_init(struct perf_event *event)
340 struct drm_i915_private *i915 = 360 struct drm_i915_private *i915 =
341 container_of(event->pmu, typeof(*i915), pmu.base); 361 container_of(event->pmu, typeof(*i915), pmu.base);
342 struct intel_engine_cs *engine; 362 struct intel_engine_cs *engine;
363 u8 sample;
364 int ret;
343 365
344 engine = intel_engine_lookup_user(i915, engine_event_class(event), 366 engine = intel_engine_lookup_user(i915, engine_event_class(event),
345 engine_event_instance(event)); 367 engine_event_instance(event));
346 if (!engine) 368 if (!engine)
347 return -ENODEV; 369 return -ENODEV;
348 370
349 return engine_event_status(engine, engine_event_sample(event)); 371 sample = engine_event_sample(event);
372 ret = engine_event_status(engine, sample);
373 if (ret)
374 return ret;
375
376 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine))
377 ret = intel_enable_engine_stats(engine);
378
379 return ret;
350} 380}
351 381
352static int i915_pmu_event_init(struct perf_event *event) 382static int i915_pmu_event_init(struct perf_event *event)
@@ -402,7 +432,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
402 if (WARN_ON_ONCE(!engine)) { 432 if (WARN_ON_ONCE(!engine)) {
403 /* Do nothing */ 433 /* Do nothing */
404 } else if (sample == I915_SAMPLE_BUSY && 434 } else if (sample == I915_SAMPLE_BUSY &&
405 engine->pmu.busy_stats) { 435 intel_engine_supports_stats(engine)) {
406 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 436 val = ktime_to_ns(intel_engine_get_busy_time(engine));
407 } else { 437 } else {
408 val = engine->pmu.sample[sample].cur; 438 val = engine->pmu.sample[sample].cur;
@@ -457,12 +487,6 @@ again:
457 local64_add(new - prev, &event->count); 487 local64_add(new - prev, &event->count);
458} 488}
459 489
460static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
461{
462 return intel_engine_supports_stats(engine) &&
463 (engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
464}
465
466static void i915_pmu_enable(struct perf_event *event) 490static void i915_pmu_enable(struct perf_event *event)
467{ 491{
468 struct drm_i915_private *i915 = 492 struct drm_i915_private *i915 =
@@ -502,21 +526,7 @@ static void i915_pmu_enable(struct perf_event *event)
502 526
503 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 527 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
504 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 528 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
505 if (engine->pmu.enable_count[sample]++ == 0) { 529 engine->pmu.enable_count[sample]++;
506 /*
507 * Enable engine busy stats tracking if needed or
508 * alternatively cancel the scheduled disable.
509 *
510 * If the delayed disable was pending, cancel it and
511 * in this case do not enable since it already is.
512 */
513 if (engine_needs_busy_stats(engine) &&
514 !engine->pmu.busy_stats) {
515 engine->pmu.busy_stats = true;
516 if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
517 intel_enable_engine_stats(engine);
518 }
519 }
520 } 530 }
521 531
522 /* 532 /*
@@ -529,14 +539,6 @@ static void i915_pmu_enable(struct perf_event *event)
529 spin_unlock_irqrestore(&i915->pmu.lock, flags); 539 spin_unlock_irqrestore(&i915->pmu.lock, flags);
530} 540}
531 541
532static void __disable_busy_stats(struct work_struct *work)
533{
534 struct intel_engine_cs *engine =
535 container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
536
537 intel_disable_engine_stats(engine);
538}
539
540static void i915_pmu_disable(struct perf_event *event) 542static void i915_pmu_disable(struct perf_event *event)
541{ 543{
542 struct drm_i915_private *i915 = 544 struct drm_i915_private *i915 =
@@ -560,26 +562,8 @@ static void i915_pmu_disable(struct perf_event *event)
560 * Decrement the reference count and clear the enabled 562 * Decrement the reference count and clear the enabled
561 * bitmask when the last listener on an event goes away. 563 * bitmask when the last listener on an event goes away.
562 */ 564 */
563 if (--engine->pmu.enable_count[sample] == 0) { 565 if (--engine->pmu.enable_count[sample] == 0)
564 engine->pmu.enable &= ~BIT(sample); 566 engine->pmu.enable &= ~BIT(sample);
565 if (!engine_needs_busy_stats(engine) &&
566 engine->pmu.busy_stats) {
567 engine->pmu.busy_stats = false;
568 /*
569 * We request a delayed disable to handle the
570 * rapid on/off cycles on events, which can
571 * happen when tools like perf stat start, in a
572 * nicer way.
573 *
574 * In addition, this also helps with busy stats
575 * accuracy with background CPU offline/online
576 * migration events.
577 */
578 queue_delayed_work(system_wq,
579 &engine->pmu.disable_busy_stats,
580 round_jiffies_up_relative(HZ));
581 }
582 }
583 } 567 }
584 568
585 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 569 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
@@ -956,8 +940,6 @@ static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915)
956 940
957void i915_pmu_register(struct drm_i915_private *i915) 941void i915_pmu_register(struct drm_i915_private *i915)
958{ 942{
959 struct intel_engine_cs *engine;
960 enum intel_engine_id id;
961 int ret; 943 int ret;
962 944
963 if (INTEL_GEN(i915) <= 2) { 945 if (INTEL_GEN(i915) <= 2) {
@@ -985,10 +967,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
985 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 967 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
986 i915->pmu.timer.function = i915_sample; 968 i915->pmu.timer.function = i915_sample;
987 969
988 for_each_engine(engine, i915, id)
989 INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
990 __disable_busy_stats);
991
992 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 970 ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
993 if (ret) 971 if (ret)
994 goto err; 972 goto err;
@@ -1009,9 +987,6 @@ err:
1009 987
1010void i915_pmu_unregister(struct drm_i915_private *i915) 988void i915_pmu_unregister(struct drm_i915_private *i915)
1011{ 989{
1012 struct intel_engine_cs *engine;
1013 enum intel_engine_id id;
1014
1015 if (!i915->pmu.base.event_init) 990 if (!i915->pmu.base.event_init)
1016 return; 991 return;
1017 992
@@ -1019,11 +994,6 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
1019 994
1020 hrtimer_cancel(&i915->pmu.timer); 995 hrtimer_cancel(&i915->pmu.timer);
1021 996
1022 for_each_engine(engine, i915, id) {
1023 GEM_BUG_ON(engine->pmu.busy_stats);
1024 flush_delayed_work(&engine->pmu.disable_busy_stats);
1025 }
1026
1027 i915_pmu_unregister_cpuhp_state(i915); 997 i915_pmu_unregister_cpuhp_state(i915);
1028 998
1029 perf_pmu_unregister(&i915->pmu.base); 999 perf_pmu_unregister(&i915->pmu.base);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a0e7a6c2a57c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -366,20 +366,6 @@ struct intel_engine_cs {
366 */ 366 */
367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) 367#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; 368 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
369 /**
370 * @busy_stats: Has enablement of engine stats tracking been
371 * requested.
372 */
373 bool busy_stats;
374 /**
375 * @disable_busy_stats: Work item for busy stats disabling.
376 *
377 * Same as with @enable_busy_stats action, with the difference
378 * that we delay it in case there are rapid enable-disable
379 * actions, which can happen during tool startup (like perf
380 * stat).
381 */
382 struct delayed_work disable_busy_stats;
383 } pmu; 369 } pmu;
384 370
385 /* 371 /*