aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-07-06 12:02:43 -0400
committerIngo Molnar <mingo@kernel.org>2016-08-10 07:13:24 -0400
commit68f7082ffb0575154ccdec36109e293174f48a4c (patch)
treea8e84e19012fc10261534e929bb9e553a55b07e8
parent09e61b4f78498bd9f213b0a536e80b79507ea89f (diff)
perf/x86: Ensure perf_sched_cb_{inc,dec}() is only called from pmu::{add,del}()
Currently perf_sched_cb_{inc,dec}() are called from pmu::{start,stop}(), which has the problem that this can happen from NMI context, this is making it hard to optimize perf_pmu_sched_task(). Furthermore, we really only need this accounting on pmu::{add,del}(), so doing it from pmu::{start,stop}() is doing more work than we really need. Introduce x86_pmu::{add,del}() and wire up the LBR and PEBS. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/core.c24
-rw-r--r--arch/x86/events/intel/core.c31
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/events/intel/lbr.c4
-rw-r--r--arch/x86/events/perf_event.h10
5 files changed, 52 insertions, 25 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d0efb5cb1b00..18a1acf86c90 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1201,6 +1201,9 @@ static int x86_pmu_add(struct perf_event *event, int flags)
1201 * If group events scheduling transaction was started, 1201 * If group events scheduling transaction was started,
1202 * skip the schedulability test here, it will be performed 1202 * skip the schedulability test here, it will be performed
1203 * at commit time (->commit_txn) as a whole. 1203 * at commit time (->commit_txn) as a whole.
1204 *
1205 * If commit fails, we'll call ->del() on all events
1206 * for which ->add() was called.
1204 */ 1207 */
1205 if (cpuc->txn_flags & PERF_PMU_TXN_ADD) 1208 if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1206 goto done_collect; 1209 goto done_collect;
@@ -1223,6 +1226,14 @@ done_collect:
1223 cpuc->n_added += n - n0; 1226 cpuc->n_added += n - n0;
1224 cpuc->n_txn += n - n0; 1227 cpuc->n_txn += n - n0;
1225 1228
1229 if (x86_pmu.add) {
1230 /*
1231 * This is before x86_pmu_enable() will call x86_pmu_start(),
1232 * so we enable LBRs before an event needs them etc..
1233 */
1234 x86_pmu.add(event);
1235 }
1236
1226 ret = 0; 1237 ret = 0;
1227out: 1238out:
1228 return ret; 1239 return ret;
@@ -1346,7 +1357,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1346 event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; 1357 event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
1347 1358
1348 /* 1359 /*
1349 * If we're called during a txn, we don't need to do anything. 1360 * If we're called during a txn, we only need to undo x86_pmu.add.
1350 * The events never got scheduled and ->cancel_txn will truncate 1361 * The events never got scheduled and ->cancel_txn will truncate
1351 * the event_list. 1362 * the event_list.
1352 * 1363 *
@@ -1354,7 +1365,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1354 * an event added during that same TXN. 1365 * an event added during that same TXN.
1355 */ 1366 */
1356 if (cpuc->txn_flags & PERF_PMU_TXN_ADD) 1367 if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
1357 return; 1368 goto do_del;
1358 1369
1359 /* 1370 /*
1360 * Not a TXN, therefore cleanup properly. 1371 * Not a TXN, therefore cleanup properly.
@@ -1384,6 +1395,15 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1384 --cpuc->n_events; 1395 --cpuc->n_events;
1385 1396
1386 perf_event_update_userpage(event); 1397 perf_event_update_userpage(event);
1398
1399do_del:
1400 if (x86_pmu.del) {
1401 /*
1402 * This is after x86_pmu_stop(); so we disable LBRs after any
1403 * event can need them etc..
1404 */
1405 x86_pmu.del(event);
1406 }
1387} 1407}
1388 1408
1389int x86_pmu_handle_irq(struct pt_regs *regs) 1409int x86_pmu_handle_irq(struct pt_regs *regs)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2cbde2f449aa..88792f846d12 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1907,13 +1907,6 @@ static void intel_pmu_disable_event(struct perf_event *event)
1907 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 1907 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
1908 cpuc->intel_cp_status &= ~(1ull << hwc->idx); 1908 cpuc->intel_cp_status &= ~(1ull << hwc->idx);
1909 1909
1910 /*
1911 * must disable before any actual event
1912 * because any event may be combined with LBR
1913 */
1914 if (needs_branch_stack(event))
1915 intel_pmu_lbr_disable(event);
1916
1917 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1910 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1918 intel_pmu_disable_fixed(hwc); 1911 intel_pmu_disable_fixed(hwc);
1919 return; 1912 return;
@@ -1925,6 +1918,14 @@ static void intel_pmu_disable_event(struct perf_event *event)
1925 intel_pmu_pebs_disable(event); 1918 intel_pmu_pebs_disable(event);
1926} 1919}
1927 1920
1921static void intel_pmu_del_event(struct perf_event *event)
1922{
1923 if (needs_branch_stack(event))
1924 intel_pmu_lbr_del(event);
1925 if (event->attr.precise_ip)
1926 intel_pmu_pebs_del(event);
1927}
1928
1928static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 1929static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
1929{ 1930{
1930 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1931 int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1968,12 +1969,6 @@ static void intel_pmu_enable_event(struct perf_event *event)
1968 intel_pmu_enable_bts(hwc->config); 1969 intel_pmu_enable_bts(hwc->config);
1969 return; 1970 return;
1970 } 1971 }
1971 /*
1972 * must enabled before any actual event
1973 * because any event may be combined with LBR
1974 */
1975 if (needs_branch_stack(event))
1976 intel_pmu_lbr_enable(event);
1977 1972
1978 if (event->attr.exclude_host) 1973 if (event->attr.exclude_host)
1979 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 1974 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1994,6 +1989,14 @@ static void intel_pmu_enable_event(struct perf_event *event)
1994 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 1989 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1995} 1990}
1996 1991
1992static void intel_pmu_add_event(struct perf_event *event)
1993{
1994 if (event->attr.precise_ip)
1995 intel_pmu_pebs_add(event);
1996 if (needs_branch_stack(event))
1997 intel_pmu_lbr_add(event);
1998}
1999
1997/* 2000/*
1998 * Save and restart an expired event. Called by NMI contexts, 2001 * Save and restart an expired event. Called by NMI contexts,
1999 * so it has to be careful about preempting normal event ops: 2002 * so it has to be careful about preempting normal event ops:
@@ -3290,6 +3293,8 @@ static __initconst const struct x86_pmu intel_pmu = {
3290 .enable_all = intel_pmu_enable_all, 3293 .enable_all = intel_pmu_enable_all,
3291 .enable = intel_pmu_enable_event, 3294 .enable = intel_pmu_enable_event,
3292 .disable = intel_pmu_disable_event, 3295 .disable = intel_pmu_disable_event,
3296 .add = intel_pmu_add_event,
3297 .del = intel_pmu_del_event,
3293 .hw_config = intel_pmu_hw_config, 3298 .hw_config = intel_pmu_hw_config,
3294 .schedule_events = x86_schedule_events, 3299 .schedule_events = x86_schedule_events,
3295 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 3300 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c791ff961079..248023f54c87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -844,7 +844,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
844 } 844 }
845} 845}
846 846
847static void intel_pmu_pebs_add(struct perf_event *event) 847void intel_pmu_pebs_add(struct perf_event *event)
848{ 848{
849 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 849 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
850 struct hw_perf_event *hwc = &event->hw; 850 struct hw_perf_event *hwc = &event->hw;
@@ -863,8 +863,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
863 struct hw_perf_event *hwc = &event->hw; 863 struct hw_perf_event *hwc = &event->hw;
864 struct debug_store *ds = cpuc->ds; 864 struct debug_store *ds = cpuc->ds;
865 865
866 intel_pmu_pebs_add(event);
867
868 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 866 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
869 867
870 cpuc->pebs_enabled |= 1ULL << hwc->idx; 868 cpuc->pebs_enabled |= 1ULL << hwc->idx;
@@ -884,7 +882,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
884 } 882 }
885} 883}
886 884
887static void intel_pmu_pebs_del(struct perf_event *event) 885void intel_pmu_pebs_del(struct perf_event *event)
888{ 886{
889 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 887 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
890 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
@@ -916,8 +914,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
916 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 914 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
917 915
918 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 916 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
919
920 intel_pmu_pebs_del(event);
921} 917}
922 918
923void intel_pmu_pebs_enable_all(void) 919void intel_pmu_pebs_enable_all(void)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 707d358e0dff..e7b58c2c2250 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -422,7 +422,7 @@ static inline bool branch_user_callstack(unsigned br_sel)
422 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 422 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
423} 423}
424 424
425void intel_pmu_lbr_enable(struct perf_event *event) 425void intel_pmu_lbr_add(struct perf_event *event)
426{ 426{
427 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 427 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
428 struct x86_perf_task_context *task_ctx; 428 struct x86_perf_task_context *task_ctx;
@@ -450,7 +450,7 @@ void intel_pmu_lbr_enable(struct perf_event *event)
450 perf_sched_cb_inc(event->ctx->pmu); 450 perf_sched_cb_inc(event->ctx->pmu);
451} 451}
452 452
453void intel_pmu_lbr_disable(struct perf_event *event) 453void intel_pmu_lbr_del(struct perf_event *event)
454{ 454{
455 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 455 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
456 struct x86_perf_task_context *task_ctx; 456 struct x86_perf_task_context *task_ctx;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 94b8f2702c51..aa6ea5a84240 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -510,6 +510,8 @@ struct x86_pmu {
510 void (*enable_all)(int added); 510 void (*enable_all)(int added);
511 void (*enable)(struct perf_event *); 511 void (*enable)(struct perf_event *);
512 void (*disable)(struct perf_event *); 512 void (*disable)(struct perf_event *);
513 void (*add)(struct perf_event *);
514 void (*del)(struct perf_event *);
513 int (*hw_config)(struct perf_event *event); 515 int (*hw_config)(struct perf_event *event);
514 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 516 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
515 unsigned eventsel; 517 unsigned eventsel;
@@ -890,6 +892,10 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
890 892
891struct event_constraint *intel_pebs_constraints(struct perf_event *event); 893struct event_constraint *intel_pebs_constraints(struct perf_event *event);
892 894
895void intel_pmu_pebs_add(struct perf_event *event);
896
897void intel_pmu_pebs_del(struct perf_event *event);
898
893void intel_pmu_pebs_enable(struct perf_event *event); 899void intel_pmu_pebs_enable(struct perf_event *event);
894 900
895void intel_pmu_pebs_disable(struct perf_event *event); 901void intel_pmu_pebs_disable(struct perf_event *event);
@@ -908,9 +914,9 @@ u64 lbr_from_signext_quirk_wr(u64 val);
908 914
909void intel_pmu_lbr_reset(void); 915void intel_pmu_lbr_reset(void);
910 916
911void intel_pmu_lbr_enable(struct perf_event *event); 917void intel_pmu_lbr_add(struct perf_event *event);
912 918
913void intel_pmu_lbr_disable(struct perf_event *event); 919void intel_pmu_lbr_del(struct perf_event *event);
914 920
915void intel_pmu_lbr_enable_all(bool pmi); 921void intel_pmu_lbr_enable_all(bool pmi);
916 922