aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>2018-02-13 04:57:46 -0500
committerRodrigo Vivi <rodrigo.vivi@intel.com>2018-02-13 19:56:03 -0500
commit4c83f0a788ccf58864f781585d8ae7c7e6a7e07d (patch)
tree57e878691a5814537c084a2155f42d42236e5c07
parentd3f84c8b097001e3f31f584b793493cb0033a7ae (diff)
drm/i915/pmu: Fix sleep under atomic in RC6 readout
We are not allowed to call intel_runtime_pm_get from the PMU counter read callback since the former can sleep, and the latter is running under IRQ context. To workaround this, we record the last known RC6 and while runtime suspended estimate its increase by querying the runtime PM core timestamps. Downside of this approach is that we can temporarily lose a chunk of RC6 time, from the last PMU read-out to runtime suspend entry, but that will eventually catch up, once device comes back online and in the presence of PMU queries. Also, we have to be careful not to overshoot the RC6 estimate, so once resumed after a period of approximation, we only update the counter once it catches up. With the observation that RC6 is increasing while the device is suspended, this should not pose a problem and can only cause slight inaccuracies due clock base differences. v2: Simplify by estimating on top of PM core counters. (Imre) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104943 Fixes: 6060b6aec03c ("drm/i915/pmu: Add RC6 residency metrics") Testcase: igt/perf_pmu/rc6-runtime-pm Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Imre Deak <imre.deak@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: David Airlie <airlied@linux.ie> Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: https://patchwork.freedesktop.org/patch/msgid/20180206183311.17924-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 1fe699e30113ed6f6e853ff44710d256072ea627) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180213095747.2424-3-tvrtko.ursulin@linux.intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c93
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h6
2 files changed, 84 insertions, 15 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 337eaa6ede52..e13859aaa2a3 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -409,7 +409,81 @@ static int i915_pmu_event_init(struct perf_event *event)
409 return 0; 409 return 0;
410} 410}
411 411
412static u64 __i915_pmu_event_read(struct perf_event *event) 412static u64 get_rc6(struct drm_i915_private *i915, bool locked)
413{
414 unsigned long flags;
415 u64 val;
416
417 if (intel_runtime_pm_get_if_in_use(i915)) {
418 val = intel_rc6_residency_ns(i915, IS_VALLEYVIEW(i915) ?
419 VLV_GT_RENDER_RC6 :
420 GEN6_GT_GFX_RC6);
421
422 if (HAS_RC6p(i915))
423 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
424
425 if (HAS_RC6pp(i915))
426 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
427
428 intel_runtime_pm_put(i915);
429
430 /*
431 * If we are coming back from being runtime suspended we must
432 * be careful not to report a larger value than returned
433 * previously.
434 */
435
436 if (!locked)
437 spin_lock_irqsave(&i915->pmu.lock, flags);
438
439 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
440 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
441 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
442 } else {
443 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
444 }
445
446 if (!locked)
447 spin_unlock_irqrestore(&i915->pmu.lock, flags);
448 } else {
449 struct pci_dev *pdev = i915->drm.pdev;
450 struct device *kdev = &pdev->dev;
451 unsigned long flags2;
452
453 /*
454 * We are runtime suspended.
455 *
456 * Report the delta from when the device was suspended to now,
457 * on top of the last known real value, as the approximated RC6
458 * counter value.
459 */
460 if (!locked)
461 spin_lock_irqsave(&i915->pmu.lock, flags);
462
463 spin_lock_irqsave(&kdev->power.lock, flags2);
464
465 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
466 i915->pmu.suspended_jiffies_last =
467 kdev->power.suspended_jiffies;
468
469 val = kdev->power.suspended_jiffies -
470 i915->pmu.suspended_jiffies_last;
471 val += jiffies - kdev->power.accounting_timestamp;
472
473 spin_unlock_irqrestore(&kdev->power.lock, flags2);
474
475 val = jiffies_to_nsecs(val);
476 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
477 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
478
479 if (!locked)
480 spin_unlock_irqrestore(&i915->pmu.lock, flags);
481 }
482
483 return val;
484}
485
486static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
413{ 487{
414 struct drm_i915_private *i915 = 488 struct drm_i915_private *i915 =
415 container_of(event->pmu, typeof(*i915), pmu.base); 489 container_of(event->pmu, typeof(*i915), pmu.base);
@@ -447,18 +521,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
447 val = count_interrupts(i915); 521 val = count_interrupts(i915);
448 break; 522 break;
449 case I915_PMU_RC6_RESIDENCY: 523 case I915_PMU_RC6_RESIDENCY:
450 intel_runtime_pm_get(i915); 524 val = get_rc6(i915, locked);
451 val = intel_rc6_residency_ns(i915,
452 IS_VALLEYVIEW(i915) ?
453 VLV_GT_RENDER_RC6 :
454 GEN6_GT_GFX_RC6);
455 if (HAS_RC6p(i915))
456 val += intel_rc6_residency_ns(i915,
457 GEN6_GT_GFX_RC6p);
458 if (HAS_RC6pp(i915))
459 val += intel_rc6_residency_ns(i915,
460 GEN6_GT_GFX_RC6pp);
461 intel_runtime_pm_put(i915);
462 break; 525 break;
463 } 526 }
464 } 527 }
@@ -473,7 +536,7 @@ static void i915_pmu_event_read(struct perf_event *event)
473 536
474again: 537again:
475 prev = local64_read(&hwc->prev_count); 538 prev = local64_read(&hwc->prev_count);
476 new = __i915_pmu_event_read(event); 539 new = __i915_pmu_event_read(event, false);
477 540
478 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 541 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
479 goto again; 542 goto again;
@@ -528,7 +591,7 @@ static void i915_pmu_enable(struct perf_event *event)
528 * for all listeners. Even when the event was already enabled and has 591 * for all listeners. Even when the event was already enabled and has
529 * an existing non-zero value. 592 * an existing non-zero value.
530 */ 593 */
531 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 594 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
532 595
533 spin_unlock_irqrestore(&i915->pmu.lock, flags); 596 spin_unlock_irqrestore(&i915->pmu.lock, flags);
534} 597}
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 40c154d13565..bb62df15afa4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -27,6 +27,8 @@
27enum { 27enum {
28 __I915_SAMPLE_FREQ_ACT = 0, 28 __I915_SAMPLE_FREQ_ACT = 0,
29 __I915_SAMPLE_FREQ_REQ, 29 __I915_SAMPLE_FREQ_REQ,
30 __I915_SAMPLE_RC6,
31 __I915_SAMPLE_RC6_ESTIMATED,
30 __I915_NUM_PMU_SAMPLERS 32 __I915_NUM_PMU_SAMPLERS
31}; 33};
32 34
@@ -94,6 +96,10 @@ struct i915_pmu {
94 * struct intel_engine_cs. 96 * struct intel_engine_cs.
95 */ 97 */
96 struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; 98 struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
99 /**
100 * @suspended_jiffies_last: Cached suspend time from PM core.
101 */
102 unsigned long suspended_jiffies_last;
97}; 103};
98 104
99#ifdef CONFIG_PERF_EVENTS 105#ifdef CONFIG_PERF_EVENTS