aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c265
-rw-r--r--kernel/perf_event.c5
2 files changed, 267 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9173ea95f918..aa12f36e4711 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -80,6 +80,13 @@ struct event_constraint {
80 int weight; 80 int weight;
81}; 81};
82 82
83struct amd_nb {
84 int nb_id; /* NorthBridge id */
85 int refcnt; /* reference count */
86 struct perf_event *owners[X86_PMC_IDX_MAX];
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88};
89
83struct cpu_hw_events { 90struct cpu_hw_events {
84 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
85 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -92,6 +99,7 @@ struct cpu_hw_events {
92 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
93 u64 tags[X86_PMC_IDX_MAX]; 100 u64 tags[X86_PMC_IDX_MAX];
94 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
102 struct amd_nb *amd_nb;
95}; 103};
96 104
97#define __EVENT_CONSTRAINT(c, n, m, w) {\ 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
@@ -153,6 +161,8 @@ struct x86_pmu {
153 161
154static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
155 163
164static raw_spinlock_t amd_nb_lock;
165
156static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 166static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
157 .enabled = 1, 167 .enabled = 1,
158}; 168};
@@ -802,7 +812,7 @@ static u64 amd_pmu_event_map(int hw_event)
802 812
803static u64 amd_pmu_raw_event(u64 hw_event) 813static u64 amd_pmu_raw_event(u64 hw_event)
804{ 814{
805#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL 815#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
806#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL 816#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
807#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL 817#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
808#define K7_EVNTSEL_INV_MASK 0x000800000ULL 818#define K7_EVNTSEL_INV_MASK 0x000800000ULL
@@ -2210,6 +2220,7 @@ perf_event_nmi_handler(struct notifier_block *self,
2210} 2220}
2211 2221
2212static struct event_constraint unconstrained; 2222static struct event_constraint unconstrained;
2223static struct event_constraint emptyconstraint;
2213 2224
2214static struct event_constraint bts_constraint = 2225static struct event_constraint bts_constraint =
2215 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); 2226 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
@@ -2249,10 +2260,146 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
2249 return &unconstrained; 2260 return &unconstrained;
2250} 2261}
2251 2262
2263/*
2264 * AMD64 events are detected based on their event codes.
2265 */
2266static inline int amd_is_nb_event(struct hw_perf_event *hwc)
2267{
2268 return (hwc->config & 0xe0) == 0xe0;
2269}
2270
2271static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
2272 struct perf_event *event)
2273{
2274 struct hw_perf_event *hwc = &event->hw;
2275 struct amd_nb *nb = cpuc->amd_nb;
2276 int i;
2277
2278 /*
2279 * only care about NB events
2280 */
2281 if (!(nb && amd_is_nb_event(hwc)))
2282 return;
2283
2284 /*
2285 * need to scan whole list because event may not have
2286 * been assigned during scheduling
2287 *
2288 * no race condition possible because event can only
2289 * be removed on one CPU at a time AND PMU is disabled
2290 * when we come here
2291 */
2292 for (i = 0; i < x86_pmu.num_events; i++) {
2293 if (nb->owners[i] == event) {
2294 cmpxchg(nb->owners+i, event, NULL);
2295 break;
2296 }
2297 }
2298}
2299
2300 /*
2301 * AMD64 NorthBridge events need special treatment because
2302 * counter access needs to be synchronized across all cores
2303 * of a package. Refer to BKDG section 3.12
2304 *
2305 * NB events are events measuring L3 cache, Hypertransport
2306 * traffic. They are identified by an event code >= 0xe00.
2307 * They measure events on the NorthBride which is shared
2308 * by all cores on a package. NB events are counted on a
2309 * shared set of counters. When a NB event is programmed
2310 * in a counter, the data actually comes from a shared
2311 * counter. Thus, access to those counters needs to be
2312 * synchronized.
2313 *
2314 * We implement the synchronization such that no two cores
2315 * can be measuring NB events using the same counters. Thus,
2316 * we maintain a per-NB allocation table. The available slot
2317 * is propagated using the event_constraint structure.
2318 *
2319 * We provide only one choice for each NB event based on
2320 * the fact that only NB events have restrictions. Consequently,
2321 * if a counter is available, there is a guarantee the NB event
2322 * will be assigned to it. If no slot is available, an empty
2323 * constraint is returned and scheduling will eventually fail
2324 * for this event.
2325 *
2326 * Note that all cores attached the same NB compete for the same
2327 * counters to host NB events, this is why we use atomic ops. Some
2328 * multi-chip CPUs may have more than one NB.
2329 *
2330 * Given that resources are allocated (cmpxchg), they must be
2331 * eventually freed for others to use. This is accomplished by
2332 * calling amd_put_event_constraints().
2333 *
2334 * Non NB events are not impacted by this restriction.
2335 */
2252static struct event_constraint * 2336static struct event_constraint *
2253amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 2337amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2254{ 2338{
2255 return &unconstrained; 2339 struct hw_perf_event *hwc = &event->hw;
2340 struct amd_nb *nb = cpuc->amd_nb;
2341 struct perf_event *old = NULL;
2342 int max = x86_pmu.num_events;
2343 int i, j, k = -1;
2344
2345 /*
2346 * if not NB event or no NB, then no constraints
2347 */
2348 if (!(nb && amd_is_nb_event(hwc)))
2349 return &unconstrained;
2350
2351 /*
2352 * detect if already present, if so reuse
2353 *
2354 * cannot merge with actual allocation
2355 * because of possible holes
2356 *
2357 * event can already be present yet not assigned (in hwc->idx)
2358 * because of successive calls to x86_schedule_events() from
2359 * hw_perf_group_sched_in() without hw_perf_enable()
2360 */
2361 for (i = 0; i < max; i++) {
2362 /*
2363 * keep track of first free slot
2364 */
2365 if (k == -1 && !nb->owners[i])
2366 k = i;
2367
2368 /* already present, reuse */
2369 if (nb->owners[i] == event)
2370 goto done;
2371 }
2372 /*
2373 * not present, so grab a new slot
2374 * starting either at:
2375 */
2376 if (hwc->idx != -1) {
2377 /* previous assignment */
2378 i = hwc->idx;
2379 } else if (k != -1) {
2380 /* start from free slot found */
2381 i = k;
2382 } else {
2383 /*
2384 * event not found, no slot found in
2385 * first pass, try again from the
2386 * beginning
2387 */
2388 i = 0;
2389 }
2390 j = i;
2391 do {
2392 old = cmpxchg(nb->owners+i, NULL, event);
2393 if (!old)
2394 break;
2395 if (++i == max)
2396 i = 0;
2397 } while (i != j);
2398done:
2399 if (!old)
2400 return &nb->event_constraints[i];
2401
2402 return &emptyconstraint;
2256} 2403}
2257 2404
2258static int x86_event_sched_in(struct perf_event *event, 2405static int x86_event_sched_in(struct perf_event *event,
@@ -2465,7 +2612,8 @@ static __initconst struct x86_pmu amd_pmu = {
2465 .apic = 1, 2612 .apic = 1,
2466 /* use highest bit to detect overflow */ 2613 /* use highest bit to detect overflow */
2467 .max_period = (1ULL << 47) - 1, 2614 .max_period = (1ULL << 47) - 1,
2468 .get_event_constraints = amd_get_event_constraints 2615 .get_event_constraints = amd_get_event_constraints,
2616 .put_event_constraints = amd_put_event_constraints
2469}; 2617};
2470 2618
2471static __init int p6_pmu_init(void) 2619static __init int p6_pmu_init(void)
@@ -2589,6 +2737,91 @@ static __init int intel_pmu_init(void)
2589 return 0; 2737 return 0;
2590} 2738}
2591 2739
2740static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
2741{
2742 struct amd_nb *nb;
2743 int i;
2744
2745 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
2746 if (!nb)
2747 return NULL;
2748
2749 memset(nb, 0, sizeof(*nb));
2750 nb->nb_id = nb_id;
2751
2752 /*
2753 * initialize all possible NB constraints
2754 */
2755 for (i = 0; i < x86_pmu.num_events; i++) {
2756 set_bit(i, nb->event_constraints[i].idxmsk);
2757 nb->event_constraints[i].weight = 1;
2758 }
2759 return nb;
2760}
2761
2762static void amd_pmu_cpu_online(int cpu)
2763{
2764 struct cpu_hw_events *cpu1, *cpu2;
2765 struct amd_nb *nb = NULL;
2766 int i, nb_id;
2767
2768 if (boot_cpu_data.x86_max_cores < 2)
2769 return;
2770
2771 /*
2772 * function may be called too early in the
2773 * boot process, in which case nb_id is bogus
2774 */
2775 nb_id = amd_get_nb_id(cpu);
2776 if (nb_id == BAD_APICID)
2777 return;
2778
2779 cpu1 = &per_cpu(cpu_hw_events, cpu);
2780 cpu1->amd_nb = NULL;
2781
2782 raw_spin_lock(&amd_nb_lock);
2783
2784 for_each_online_cpu(i) {
2785 cpu2 = &per_cpu(cpu_hw_events, i);
2786 nb = cpu2->amd_nb;
2787 if (!nb)
2788 continue;
2789 if (nb->nb_id == nb_id)
2790 goto found;
2791 }
2792
2793 nb = amd_alloc_nb(cpu, nb_id);
2794 if (!nb) {
2795 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
2796 raw_spin_unlock(&amd_nb_lock);
2797 return;
2798 }
2799found:
2800 nb->refcnt++;
2801 cpu1->amd_nb = nb;
2802
2803 raw_spin_unlock(&amd_nb_lock);
2804}
2805
2806static void amd_pmu_cpu_offline(int cpu)
2807{
2808 struct cpu_hw_events *cpuhw;
2809
2810 if (boot_cpu_data.x86_max_cores < 2)
2811 return;
2812
2813 cpuhw = &per_cpu(cpu_hw_events, cpu);
2814
2815 raw_spin_lock(&amd_nb_lock);
2816
2817 if (--cpuhw->amd_nb->refcnt == 0)
2818 kfree(cpuhw->amd_nb);
2819
2820 cpuhw->amd_nb = NULL;
2821
2822 raw_spin_unlock(&amd_nb_lock);
2823}
2824
2592static __init int amd_pmu_init(void) 2825static __init int amd_pmu_init(void)
2593{ 2826{
2594 /* Performance-monitoring supported from K7 and later: */ 2827 /* Performance-monitoring supported from K7 and later: */
@@ -2601,6 +2834,11 @@ static __init int amd_pmu_init(void)
2601 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 2834 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2602 sizeof(hw_cache_event_ids)); 2835 sizeof(hw_cache_event_ids));
2603 2836
2837 /*
2838 * explicitly initialize the boot cpu, other cpus will get
2839 * the cpu hotplug callbacks from smp_init()
2840 */
2841 amd_pmu_cpu_online(smp_processor_id());
2604 return 0; 2842 return 0;
2605} 2843}
2606 2844
@@ -2934,4 +3172,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2934void hw_perf_event_setup_online(int cpu) 3172void hw_perf_event_setup_online(int cpu)
2935{ 3173{
2936 init_debug_store_on_cpu(cpu); 3174 init_debug_store_on_cpu(cpu);
3175
3176 switch (boot_cpu_data.x86_vendor) {
3177 case X86_VENDOR_AMD:
3178 amd_pmu_cpu_online(cpu);
3179 break;
3180 default:
3181 return;
3182 }
3183}
3184
3185void hw_perf_event_setup_offline(int cpu)
3186{
3187 init_debug_store_on_cpu(cpu);
3188
3189 switch (boot_cpu_data.x86_vendor) {
3190 case X86_VENDOR_AMD:
3191 amd_pmu_cpu_offline(cpu);
3192 break;
3193 default:
3194 return;
3195 }
2937} 3196}
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 74c60021cdbc..fb4e56eb58f4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -98,6 +98,7 @@ void __weak hw_perf_enable(void) { barrier(); }
98 98
99void __weak hw_perf_event_setup(int cpu) { barrier(); } 99void __weak hw_perf_event_setup(int cpu) { barrier(); }
100void __weak hw_perf_event_setup_online(int cpu) { barrier(); } 100void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
101void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
101 102
102int __weak 103int __weak
103hw_perf_group_sched_in(struct perf_event *group_leader, 104hw_perf_group_sched_in(struct perf_event *group_leader,
@@ -5462,6 +5463,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5462 perf_event_exit_cpu(cpu); 5463 perf_event_exit_cpu(cpu);
5463 break; 5464 break;
5464 5465
5466 case CPU_DEAD:
5467 hw_perf_event_setup_offline(cpu);
5468 break;
5469
5465 default: 5470 default:
5466 break; 5471 break;
5467 } 5472 }