diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-07-06 12:02:43 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-08-10 07:13:24 -0400 |
commit | 68f7082ffb0575154ccdec36109e293174f48a4c (patch) | |
tree | a8e84e19012fc10261534e929bb9e553a55b07e8 | |
parent | 09e61b4f78498bd9f213b0a536e80b79507ea89f (diff) |
perf/x86: Ensure perf_sched_cb_{inc,dec}() is only called from pmu::{add,del}()
Currently perf_sched_cb_{inc,dec}() are called from
pmu::{start,stop}(), which has the problem that this can happen from
NMI context, this is making it hard to optimize perf_pmu_sched_task().
Furthermore, we really only need this accounting on pmu::{add,del}(),
so doing it from pmu::{start,stop}() is doing more work than we really
need.
Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/core.c | 24 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 31 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 8 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 4 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 10 |
5 files changed, 52 insertions, 25 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d0efb5cb1b00..18a1acf86c90 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
@@ -1201,6 +1201,9 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1201 | * If group events scheduling transaction was started, | 1201 | * If group events scheduling transaction was started, |
1202 | * skip the schedulability test here, it will be performed | 1202 | * skip the schedulability test here, it will be performed |
1203 | * at commit time (->commit_txn) as a whole. | 1203 | * at commit time (->commit_txn) as a whole. |
1204 | * | ||
1205 | * If commit fails, we'll call ->del() on all events | ||
1206 | * for which ->add() was called. | ||
1204 | */ | 1207 | */ |
1205 | if (cpuc->txn_flags & PERF_PMU_TXN_ADD) | 1208 | if (cpuc->txn_flags & PERF_PMU_TXN_ADD) |
1206 | goto done_collect; | 1209 | goto done_collect; |
@@ -1223,6 +1226,14 @@ done_collect: | |||
1223 | cpuc->n_added += n - n0; | 1226 | cpuc->n_added += n - n0; |
1224 | cpuc->n_txn += n - n0; | 1227 | cpuc->n_txn += n - n0; |
1225 | 1228 | ||
1229 | if (x86_pmu.add) { | ||
1230 | /* | ||
1231 | * This is before x86_pmu_enable() will call x86_pmu_start(), | ||
1232 | * so we enable LBRs before an event needs them etc.. | ||
1233 | */ | ||
1234 | x86_pmu.add(event); | ||
1235 | } | ||
1236 | |||
1226 | ret = 0; | 1237 | ret = 0; |
1227 | out: | 1238 | out: |
1228 | return ret; | 1239 | return ret; |
@@ -1346,7 +1357,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1346 | event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; | 1357 | event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; |
1347 | 1358 | ||
1348 | /* | 1359 | /* |
1349 | * If we're called during a txn, we don't need to do anything. | 1360 | * If we're called during a txn, we only need to undo x86_pmu.add. |
1350 | * The events never got scheduled and ->cancel_txn will truncate | 1361 | * The events never got scheduled and ->cancel_txn will truncate |
1351 | * the event_list. | 1362 | * the event_list. |
1352 | * | 1363 | * |
@@ -1354,7 +1365,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1354 | * an event added during that same TXN. | 1365 | * an event added during that same TXN. |
1355 | */ | 1366 | */ |
1356 | if (cpuc->txn_flags & PERF_PMU_TXN_ADD) | 1367 | if (cpuc->txn_flags & PERF_PMU_TXN_ADD) |
1357 | return; | 1368 | goto do_del; |
1358 | 1369 | ||
1359 | /* | 1370 | /* |
1360 | * Not a TXN, therefore cleanup properly. | 1371 | * Not a TXN, therefore cleanup properly. |
@@ -1384,6 +1395,15 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1384 | --cpuc->n_events; | 1395 | --cpuc->n_events; |
1385 | 1396 | ||
1386 | perf_event_update_userpage(event); | 1397 | perf_event_update_userpage(event); |
1398 | |||
1399 | do_del: | ||
1400 | if (x86_pmu.del) { | ||
1401 | /* | ||
1402 | * This is after x86_pmu_stop(); so we disable LBRs after any | ||
1403 | * event can need them etc.. | ||
1404 | */ | ||
1405 | x86_pmu.del(event); | ||
1406 | } | ||
1387 | } | 1407 | } |
1388 | 1408 | ||
1389 | int x86_pmu_handle_irq(struct pt_regs *regs) | 1409 | int x86_pmu_handle_irq(struct pt_regs *regs) |
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2cbde2f449aa..88792f846d12 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
1907 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 1907 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
1908 | cpuc->intel_cp_status &= ~(1ull << hwc->idx); | 1908 | cpuc->intel_cp_status &= ~(1ull << hwc->idx); |
1909 | 1909 | ||
1910 | /* | ||
1911 | * must disable before any actual event | ||
1912 | * because any event may be combined with LBR | ||
1913 | */ | ||
1914 | if (needs_branch_stack(event)) | ||
1915 | intel_pmu_lbr_disable(event); | ||
1916 | |||
1917 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1910 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
1918 | intel_pmu_disable_fixed(hwc); | 1911 | intel_pmu_disable_fixed(hwc); |
1919 | return; | 1912 | return; |
@@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
1925 | intel_pmu_pebs_disable(event); | 1918 | intel_pmu_pebs_disable(event); |
1926 | } | 1919 | } |
1927 | 1920 | ||
1921 | static void intel_pmu_del_event(struct perf_event *event) | ||
1922 | { | ||
1923 | if (needs_branch_stack(event)) | ||
1924 | intel_pmu_lbr_del(event); | ||
1925 | if (event->attr.precise_ip) | ||
1926 | intel_pmu_pebs_del(event); | ||
1927 | } | ||
1928 | |||
1928 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | 1929 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
1929 | { | 1930 | { |
1930 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; | 1931 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
@@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
1968 | intel_pmu_enable_bts(hwc->config); | 1969 | intel_pmu_enable_bts(hwc->config); |
1969 | return; | 1970 | return; |
1970 | } | 1971 | } |
1971 | /* | ||
1972 | * must enabled before any actual event | ||
1973 | * because any event may be combined with LBR | ||
1974 | */ | ||
1975 | if (needs_branch_stack(event)) | ||
1976 | intel_pmu_lbr_enable(event); | ||
1977 | 1972 | ||
1978 | if (event->attr.exclude_host) | 1973 | if (event->attr.exclude_host) |
1979 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 1974 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
@@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
1994 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | 1989 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
1995 | } | 1990 | } |
1996 | 1991 | ||
1992 | static void intel_pmu_add_event(struct perf_event *event) | ||
1993 | { | ||
1994 | if (event->attr.precise_ip) | ||
1995 | intel_pmu_pebs_add(event); | ||
1996 | if (needs_branch_stack(event)) | ||
1997 | intel_pmu_lbr_add(event); | ||
1998 | } | ||
1999 | |||
1997 | /* | 2000 | /* |
1998 | * Save and restart an expired event. Called by NMI contexts, | 2001 | * Save and restart an expired event. Called by NMI contexts, |
1999 | * so it has to be careful about preempting normal event ops: | 2002 | * so it has to be careful about preempting normal event ops: |
@@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
3290 | .enable_all = intel_pmu_enable_all, | 3293 | .enable_all = intel_pmu_enable_all, |
3291 | .enable = intel_pmu_enable_event, | 3294 | .enable = intel_pmu_enable_event, |
3292 | .disable = intel_pmu_disable_event, | 3295 | .disable = intel_pmu_disable_event, |
3296 | .add = intel_pmu_add_event, | ||
3297 | .del = intel_pmu_del_event, | ||
3293 | .hw_config = intel_pmu_hw_config, | 3298 | .hw_config = intel_pmu_hw_config, |
3294 | .schedule_events = x86_schedule_events, | 3299 | .schedule_events = x86_schedule_events, |
3295 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 3300 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c791ff961079..248023f54c87 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -844,7 +844,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) | |||
844 | } | 844 | } |
845 | } | 845 | } |
846 | 846 | ||
847 | static void intel_pmu_pebs_add(struct perf_event *event) | 847 | void intel_pmu_pebs_add(struct perf_event *event) |
848 | { | 848 | { |
849 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 849 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
850 | struct hw_perf_event *hwc = &event->hw; | 850 | struct hw_perf_event *hwc = &event->hw; |
@@ -863,8 +863,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
863 | struct hw_perf_event *hwc = &event->hw; | 863 | struct hw_perf_event *hwc = &event->hw; |
864 | struct debug_store *ds = cpuc->ds; | 864 | struct debug_store *ds = cpuc->ds; |
865 | 865 | ||
866 | intel_pmu_pebs_add(event); | ||
867 | |||
868 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 866 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
869 | 867 | ||
870 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 868 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
@@ -884,7 +882,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
884 | } | 882 | } |
885 | } | 883 | } |
886 | 884 | ||
887 | static void intel_pmu_pebs_del(struct perf_event *event) | 885 | void intel_pmu_pebs_del(struct perf_event *event) |
888 | { | 886 | { |
889 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 887 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
890 | struct hw_perf_event *hwc = &event->hw; | 888 | struct hw_perf_event *hwc = &event->hw; |
@@ -916,8 +914,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
916 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 914 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
917 | 915 | ||
918 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 916 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
919 | |||
920 | intel_pmu_pebs_del(event); | ||
921 | } | 917 | } |
922 | 918 | ||
923 | void intel_pmu_pebs_enable_all(void) | 919 | void intel_pmu_pebs_enable_all(void) |
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 707d358e0dff..e7b58c2c2250 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c | |||
@@ -422,7 +422,7 @@ static inline bool branch_user_callstack(unsigned br_sel) | |||
422 | return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); | 422 | return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); |
423 | } | 423 | } |
424 | 424 | ||
425 | void intel_pmu_lbr_enable(struct perf_event *event) | 425 | void intel_pmu_lbr_add(struct perf_event *event) |
426 | { | 426 | { |
427 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 427 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
428 | struct x86_perf_task_context *task_ctx; | 428 | struct x86_perf_task_context *task_ctx; |
@@ -450,7 +450,7 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
450 | perf_sched_cb_inc(event->ctx->pmu); | 450 | perf_sched_cb_inc(event->ctx->pmu); |
451 | } | 451 | } |
452 | 452 | ||
453 | void intel_pmu_lbr_disable(struct perf_event *event) | 453 | void intel_pmu_lbr_del(struct perf_event *event) |
454 | { | 454 | { |
455 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | 455 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
456 | struct x86_perf_task_context *task_ctx; | 456 | struct x86_perf_task_context *task_ctx; |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 94b8f2702c51..aa6ea5a84240 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
@@ -510,6 +510,8 @@ struct x86_pmu { | |||
510 | void (*enable_all)(int added); | 510 | void (*enable_all)(int added); |
511 | void (*enable)(struct perf_event *); | 511 | void (*enable)(struct perf_event *); |
512 | void (*disable)(struct perf_event *); | 512 | void (*disable)(struct perf_event *); |
513 | void (*add)(struct perf_event *); | ||
514 | void (*del)(struct perf_event *); | ||
513 | int (*hw_config)(struct perf_event *event); | 515 | int (*hw_config)(struct perf_event *event); |
514 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | 516 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); |
515 | unsigned eventsel; | 517 | unsigned eventsel; |
@@ -890,6 +892,10 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; | |||
890 | 892 | ||
891 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); | 893 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); |
892 | 894 | ||
895 | void intel_pmu_pebs_add(struct perf_event *event); | ||
896 | |||
897 | void intel_pmu_pebs_del(struct perf_event *event); | ||
898 | |||
893 | void intel_pmu_pebs_enable(struct perf_event *event); | 899 | void intel_pmu_pebs_enable(struct perf_event *event); |
894 | 900 | ||
895 | void intel_pmu_pebs_disable(struct perf_event *event); | 901 | void intel_pmu_pebs_disable(struct perf_event *event); |
@@ -908,9 +914,9 @@ u64 lbr_from_signext_quirk_wr(u64 val); | |||
908 | 914 | ||
909 | void intel_pmu_lbr_reset(void); | 915 | void intel_pmu_lbr_reset(void); |
910 | 916 | ||
911 | void intel_pmu_lbr_enable(struct perf_event *event); | 917 | void intel_pmu_lbr_add(struct perf_event *event); |
912 | 918 | ||
913 | void intel_pmu_lbr_disable(struct perf_event *event); | 919 | void intel_pmu_lbr_del(struct perf_event *event); |
914 | 920 | ||
915 | void intel_pmu_lbr_enable_all(bool pmi); | 921 | void intel_pmu_lbr_enable_all(bool pmi); |
916 | 922 | ||