aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-03-31 03:55:12 -0400
committerIngo Molnar <mingo@kernel.org>2016-03-31 03:55:12 -0400
commit84c48d8d01b74a2b98c164513ca5e37c73166825 (patch)
treef157fe93864dd9a08029cb62b50d707e7db186d5
parent643cb15ba07260faadd9fcfabac4f5d9d0ddc053 (diff)
parent85dc600263c2291cea33bffa90038808ee64198b (diff)
Merge branch 'perf/urgent' into perf/core, to fix up fixes before queueing up new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/amd/ibs.c52
-rw-r--r--kernel/events/core.c15
2 files changed, 58 insertions, 9 deletions
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 3ea25c3917c0..feb90f6730e8 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -28,10 +28,46 @@ static u32 ibs_caps;
28#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) 28#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
29#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT 29#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
30 30
31
32/*
33 * IBS states:
34 *
35 * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken
36 * and any further add()s must fail.
37 *
38 * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are
39 * complicated by the fact that the IBS hardware can send late NMIs (ie. after
40 * we've cleared the EN bit).
41 *
42 * In order to consume these late NMIs we have the STOPPED state, any NMI that
43 * happens after we've cleared the EN state will clear this bit and report the
44 * NMI handled (this is fundamentally racy in the face or multiple NMI sources,
45 * someone else can consume our BIT and our NMI will go unhandled).
46 *
47 * And since we cannot set/clear this separate bit together with the EN bit,
48 * there are races; if we cleared STARTED early, an NMI could land in
49 * between clearing STARTED and clearing the EN bit (in fact multiple NMIs
50 * could happen if the period is small enough), and consume our STOPPED bit
51 * and trigger streams of unhandled NMIs.
52 *
53 * If, however, we clear STARTED late, an NMI can hit between clearing the
54 * EN bit and clearing STARTED, still see STARTED set and process the event.
55 * If this event will have the VALID bit clear, we bail properly, but this
56 * is not a given. With VALID set we can end up calling pmu::stop() again
57 * (the throttle logic) and trigger the WARNs in there.
58 *
59 * So what we do is set STOPPING before clearing EN to avoid the pmu::stop()
60 * nesting, and clear STARTED late, so that we have a well defined state over
61 * the clearing of the EN bit.
62 *
63 * XXX: we could probably be using !atomic bitops for all this.
64 */
65
31enum ibs_states { 66enum ibs_states {
32 IBS_ENABLED = 0, 67 IBS_ENABLED = 0,
33 IBS_STARTED = 1, 68 IBS_STARTED = 1,
34 IBS_STOPPING = 2, 69 IBS_STOPPING = 2,
70 IBS_STOPPED = 3,
35 71
36 IBS_MAX_STATES, 72 IBS_MAX_STATES,
37}; 73};
@@ -377,11 +413,10 @@ static void perf_ibs_start(struct perf_event *event, int flags)
377 413
378 perf_ibs_set_period(perf_ibs, hwc, &period); 414 perf_ibs_set_period(perf_ibs, hwc, &period);
379 /* 415 /*
380 * Set STARTED before enabling the hardware, such that 416 * Set STARTED before enabling the hardware, such that a subsequent NMI
381 * a subsequent NMI must observe it. Then clear STOPPING 417 * must observe it.
382 * such that we don't consume NMIs by accident.
383 */ 418 */
384 set_bit(IBS_STARTED, pcpu->state); 419 set_bit(IBS_STARTED, pcpu->state);
385 clear_bit(IBS_STOPPING, pcpu->state); 420 clear_bit(IBS_STOPPING, pcpu->state);
386 perf_ibs_enable_event(perf_ibs, hwc, period >> 4); 421 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
387 422
@@ -396,6 +431,9 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
396 u64 config; 431 u64 config;
397 int stopping; 432 int stopping;
398 433
434 if (test_and_set_bit(IBS_STOPPING, pcpu->state))
435 return;
436
399 stopping = test_bit(IBS_STARTED, pcpu->state); 437 stopping = test_bit(IBS_STARTED, pcpu->state);
400 438
401 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) 439 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
@@ -405,12 +443,12 @@ static void perf_ibs_stop(struct perf_event *event, int flags)
405 443
406 if (stopping) { 444 if (stopping) {
407 /* 445 /*
408 * Set STOPPING before disabling the hardware, such that it 446 * Set STOPPED before disabling the hardware, such that it
409 * must be visible to NMIs the moment we clear the EN bit, 447 * must be visible to NMIs the moment we clear the EN bit,
410 * at which point we can generate an !VALID sample which 448 * at which point we can generate an !VALID sample which
411 * we need to consume. 449 * we need to consume.
412 */ 450 */
413 set_bit(IBS_STOPPING, pcpu->state); 451 set_bit(IBS_STOPPED, pcpu->state);
414 perf_ibs_disable_event(perf_ibs, hwc, config); 452 perf_ibs_disable_event(perf_ibs, hwc, config);
415 /* 453 /*
416 * Clear STARTED after disabling the hardware; if it were 454 * Clear STARTED after disabling the hardware; if it were
@@ -556,7 +594,7 @@ fail:
556 * with samples that even have the valid bit cleared. 594 * with samples that even have the valid bit cleared.
557 * Mark all this NMIs as handled. 595 * Mark all this NMIs as handled.
558 */ 596 */
559 if (test_and_clear_bit(IBS_STOPPING, pcpu->state)) 597 if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
560 return 1; 598 return 1;
561 599
562 return 0; 600 return 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index de24fbce5277..52bedc5a5aaa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2417,14 +2417,24 @@ static void ctx_sched_out(struct perf_event_context *ctx,
2417 cpuctx->task_ctx = NULL; 2417 cpuctx->task_ctx = NULL;
2418 } 2418 }
2419 2419
2420 is_active ^= ctx->is_active; /* changed bits */ 2420 /*
2421 2421 * Always update time if it was set; not only when it changes.
2422 * Otherwise we can 'forget' to update time for any but the last
2423 * context we sched out. For example:
2424 *
2425 * ctx_sched_out(.event_type = EVENT_FLEXIBLE)
2426 * ctx_sched_out(.event_type = EVENT_PINNED)
2427 *
2428 * would only update time for the pinned events.
2429 */
2422 if (is_active & EVENT_TIME) { 2430 if (is_active & EVENT_TIME) {
2423 /* update (and stop) ctx time */ 2431 /* update (and stop) ctx time */
2424 update_context_time(ctx); 2432 update_context_time(ctx);
2425 update_cgrp_time_from_cpuctx(cpuctx); 2433 update_cgrp_time_from_cpuctx(cpuctx);
2426 } 2434 }
2427 2435
2436 is_active ^= ctx->is_active; /* changed bits */
2437
2428 if (!ctx->nr_active || !(is_active & EVENT_ALL)) 2438 if (!ctx->nr_active || !(is_active & EVENT_ALL))
2429 return; 2439 return;
2430 2440
@@ -8532,6 +8542,7 @@ SYSCALL_DEFINE5(perf_event_open,
8532 f_flags); 8542 f_flags);
8533 if (IS_ERR(event_file)) { 8543 if (IS_ERR(event_file)) {
8534 err = PTR_ERR(event_file); 8544 err = PTR_ERR(event_file);
8545 event_file = NULL;
8535 goto err_context; 8546 goto err_context;
8536 } 8547 }
8537 8548