1 files changed, 416 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..8f3dbfda3c4f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
+#ifdef CONFIG_CPU_SUP_AMD
+static DEFINE_RAW_SPINLOCK(amd_nb_lock);
+static __initconst u64 amd_hw_cache_event_ids
+                                [PERF_COUNT_HW_CACHE_MAX]
+                                [PERF_COUNT_HW_CACHE_OP_MAX]
+                                [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+                [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+                [ C(RESULT_MISS)   ] = 0,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+                [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+        },
+ },
+ [ C(L1I ) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+                [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = -1,
+                [ C(RESULT_MISS)   ] = -1,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+                [ C(RESULT_MISS)   ] = 0,
+        },
+ },
+ [ C(LL  ) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+                [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+                [ C(RESULT_MISS)   ] = 0,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_MISS)   ] = 0,
+        },
+ },
+ [ C(DTLB) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+                [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_MISS)   ] = 0,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = 0,
+                [ C(RESULT_MISS)   ] = 0,
+        },
+ },
+ [ C(ITLB) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+                [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = -1,
+                [ C(RESULT_MISS)   ] = -1,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = -1,
+                [ C(RESULT_MISS)   ] = -1,
+        },
+ },
+ [ C(BPU ) ] = {
+        [ C(OP_READ) ] = {
+                [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+                [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+        },
+        [ C(OP_WRITE) ] = {
+                [ C(RESULT_ACCESS) ] = -1,
+                [ C(RESULT_MISS)   ] = -1,
+        },
+        [ C(OP_PREFETCH) ] = {
+                [ C(RESULT_ACCESS) ] = -1,
+                [ C(RESULT_MISS)   ] = -1,
+        },
+ },
+};
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
+};
+static u64 amd_pmu_event_map(int hw_event)
+{
+        return amd_perfmon_event_map[hw_event];
+}
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK   0xF000000FFULL
+#define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
+#define K7_EVNTSEL_INV_MASK     0x000800000ULL
+#define K7_EVNTSEL_REG_MASK     0x0FF000000ULL
+#define K7_EVNTSEL_MASK                 \
+        (K7_EVNTSEL_EVENT_MASK |        \
+         K7_EVNTSEL_UNIT_MASK  |        \
+         K7_EVNTSEL_EDGE_MASK  |        \
+         K7_EVNTSEL_INV_MASK   |        \
+         K7_EVNTSEL_REG_MASK)
+        return hw_event & K7_EVNTSEL_MASK;
+}
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+        return (hwc->config & 0xe0) == 0xe0;
+}
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+                                      struct perf_event *event)
+{
+        struct hw_perf_event *hwc = &event->hw;
+        struct amd_nb *nb = cpuc->amd_nb;
+        int i;
+        /*
+         * only care about NB events
+         */
+        if (!(nb && amd_is_nb_event(hwc)))
+                return;
+        /*
+         * need to scan whole list because event may not have
+         * been assigned during scheduling
+         *
+         * no race condition possible because event can only
+         * be removed on one CPU at a time AND PMU is disabled
+         * when we come here
+         */
+        for (i = 0; i < x86_pmu.num_events; i++) {
+                if (nb->owners[i] == event) {
+                        cmpxchg(nb->owners+i, event, NULL);
+                        break;
+                }
+        }
+}
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling amd_put_event_constraints().
+  *
+  * Non NB events are not impacted by this restriction.
+  */
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+        struct hw_perf_event *hwc = &event->hw;
+        struct amd_nb *nb = cpuc->amd_nb;
+        struct perf_event *old = NULL;
+        int max = x86_pmu.num_events;
+        int i, j, k = -1;
+        /*
+         * if not NB event or no NB, then no constraints
+         */
+        if (!(nb && amd_is_nb_event(hwc)))
+                return &unconstrained;
+        /*
+         * detect if already present, if so reuse
+         *
+         * cannot merge with actual allocation
+         * because of possible holes
+         *
+         * event can already be present yet not assigned (in hwc->idx)
+         * because of successive calls to x86_schedule_events() from
+         * hw_perf_group_sched_in() without hw_perf_enable()
+         */
+        for (i = 0; i < max; i++) {
+                /*
+                 * keep track of first free slot
+                 */
+                if (k == -1 && !nb->owners[i])
+                        k = i;
+                /* already present, reuse */
+                if (nb->owners[i] == event)
+                        goto done;
+        }
+        /*
+         * not present, so grab a new slot
+         * starting either at:
+         */
+        if (hwc->idx != -1) {
+                /* previous assignment */
+                i = hwc->idx;
+        } else if (k != -1) {
+                /* start from free slot found */
+                i = k;
+        } else {
+                /*
+                 * event not found, no slot found in
+                 * first pass, try again from the
+                 * beginning
+                 */
+                i = 0;
+        }
+        j = i;
+        do {
+                old = cmpxchg(nb->owners+i, NULL, event);
+                if (!old)
+                        break;
+                if (++i == max)
+                        i = 0;
+        } while (i != j);
+done:
+        if (!old)
+                return &nb->event_constraints[i];
+        return &emptyconstraint;
+}
+static __initconst struct x86_pmu amd_pmu = {
+        .name                   = "AMD",
+        .handle_irq             = x86_pmu_handle_irq,
+        .disable_all            = x86_pmu_disable_all,
+        .enable_all             = x86_pmu_enable_all,
+        .enable                 = x86_pmu_enable_event,
+        .disable                = x86_pmu_disable_event,
+        .eventsel               = MSR_K7_EVNTSEL0,
+        .perfctr                = MSR_K7_PERFCTR0,
+        .event_map              = amd_pmu_event_map,
+        .raw_event              = amd_pmu_raw_event,
+        .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
+        .num_events             = 4,
+        .event_bits             = 48,
+        .event_mask             = (1ULL << 48) - 1,
+        .apic                   = 1,
+        /* use highest bit to detect overflow */
+        .max_period             = (1ULL << 47) - 1,
+        .get_event_constraints  = amd_get_event_constraints,
+        .put_event_constraints  = amd_put_event_constraints
+};
+static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+{
+        struct amd_nb *nb;
+        int i;
+        nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
+        if (!nb)
+                return NULL;
+        memset(nb, 0, sizeof(*nb));
+        nb->nb_id = nb_id;
+        /*
+         * initialize all possible NB constraints
+         */
+        for (i = 0; i < x86_pmu.num_events; i++) {
+                set_bit(i, nb->event_constraints[i].idxmsk);
+                nb->event_constraints[i].weight = 1;
+        }
+        return nb;
+}
+static void amd_pmu_cpu_online(int cpu)
+{
+        struct cpu_hw_events *cpu1, *cpu2;
+        struct amd_nb *nb = NULL;
+        int i, nb_id;
+        if (boot_cpu_data.x86_max_cores < 2)
+                return;
+        /*
+         * function may be called too early in the
+         * boot process, in which case nb_id is bogus
+         */
+        nb_id = amd_get_nb_id(cpu);
+        if (nb_id == BAD_APICID)
+                return;
+        cpu1 = &per_cpu(cpu_hw_events, cpu);
+        cpu1->amd_nb = NULL;
+        raw_spin_lock(&amd_nb_lock);
+        for_each_online_cpu(i) {
+                cpu2 = &per_cpu(cpu_hw_events, i);
+                nb = cpu2->amd_nb;
+                if (!nb)
+                        continue;
+                if (nb->nb_id == nb_id)
+                        goto found;
+        }
+        nb = amd_alloc_nb(cpu, nb_id);
+        if (!nb) {
+                pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
+                raw_spin_unlock(&amd_nb_lock);
+                return;
+        }
+found:
+        nb->refcnt++;
+        cpu1->amd_nb = nb;
+        raw_spin_unlock(&amd_nb_lock);
+}
+static void amd_pmu_cpu_offline(int cpu)
+{
+        struct cpu_hw_events *cpuhw;
+        if (boot_cpu_data.x86_max_cores < 2)
+                return;
+        cpuhw = &per_cpu(cpu_hw_events, cpu);
+        raw_spin_lock(&amd_nb_lock);
+        if (--cpuhw->amd_nb->refcnt == 0)
+                kfree(cpuhw->amd_nb);
+        cpuhw->amd_nb = NULL;
+        raw_spin_unlock(&amd_nb_lock);
+}
+static __init int amd_pmu_init(void)
+{
+        /* Performance-monitoring supported from K7 and later: */
+        if (boot_cpu_data.x86 < 6)
+                return -ENODEV;
+        x86_pmu = amd_pmu;
+        /* Events are common for all AMDs */
+        memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+               sizeof(hw_cache_event_ids));
+        /*
+         * explicitly initialize the boot cpu, other cpus will get
+         * the cpu hotplug callbacks from smp_init()
+         */
+        amd_pmu_cpu_online(smp_processor_id());
+        return 0;
+}
+#else /* CONFIG_CPU_SUP_AMD */
+static int amd_pmu_init(void)
+{
+        return 0;
+}
+static void amd_pmu_cpu_online(int cpu)
+{
+}
+static void amd_pmu_cpu_offline(int cpu)
+{
+}
+#endif

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 000000000000..8f3dbfda3c4f --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
	1	#ifdef CONFIG_CPU_SUP_AMD
	2
	3	static DEFINE_RAW_SPINLOCK(amd_nb_lock);
	4
	5	static __initconst u64 amd_hw_cache_event_ids
	6	[PERF_COUNT_HW_CACHE_MAX]
	7	[PERF_COUNT_HW_CACHE_OP_MAX]
	8	[PERF_COUNT_HW_CACHE_RESULT_MAX] =
	9	{
	10	[ C(L1D) ] = {
	11	[ C(OP_READ) ] = {
	12	[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
	13	[ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
	14	},
	15	[ C(OP_WRITE) ] = {
	16	[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
	17	[ C(RESULT_MISS) ] = 0,
	18	},
	19	[ C(OP_PREFETCH) ] = {
	20	[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
	21	[ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
	22	},
	23	},
	24	[ C(L1I ) ] = {
	25	[ C(OP_READ) ] = {
	26	[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
	27	[ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
	28	},
	29	[ C(OP_WRITE) ] = {
	30	[ C(RESULT_ACCESS) ] = -1,
	31	[ C(RESULT_MISS) ] = -1,
	32	},
	33	[ C(OP_PREFETCH) ] = {
	34	[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
	35	[ C(RESULT_MISS) ] = 0,
	36	},
	37	},
	38	[ C(LL ) ] = {
	39	[ C(OP_READ) ] = {
	40	[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
	41	[ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
	42	},
	43	[ C(OP_WRITE) ] = {
	44	[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
	45	[ C(RESULT_MISS) ] = 0,
	46	},
	47	[ C(OP_PREFETCH) ] = {
	48	[ C(RESULT_ACCESS) ] = 0,
	49	[ C(RESULT_MISS) ] = 0,
	50	},
	51	},
	52	[ C(DTLB) ] = {
	53	[ C(OP_READ) ] = {
	54	[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
	55	[ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
	56	},
	57	[ C(OP_WRITE) ] = {
	58	[ C(RESULT_ACCESS) ] = 0,
	59	[ C(RESULT_MISS) ] = 0,
	60	},
	61	[ C(OP_PREFETCH) ] = {
	62	[ C(RESULT_ACCESS) ] = 0,
	63	[ C(RESULT_MISS) ] = 0,
	64	},
	65	},
	66	[ C(ITLB) ] = {
	67	[ C(OP_READ) ] = {
	68	[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
	69	[ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
	70	},
	71	[ C(OP_WRITE) ] = {
	72	[ C(RESULT_ACCESS) ] = -1,
	73	[ C(RESULT_MISS) ] = -1,
	74	},
	75	[ C(OP_PREFETCH) ] = {
	76	[ C(RESULT_ACCESS) ] = -1,
	77	[ C(RESULT_MISS) ] = -1,
	78	},
	79	},
	80	[ C(BPU ) ] = {
	81	[ C(OP_READ) ] = {
	82	[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
	83	[ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
	84	},
	85	[ C(OP_WRITE) ] = {
	86	[ C(RESULT_ACCESS) ] = -1,
	87	[ C(RESULT_MISS) ] = -1,
	88	},
	89	[ C(OP_PREFETCH) ] = {
	90	[ C(RESULT_ACCESS) ] = -1,
	91	[ C(RESULT_MISS) ] = -1,
	92	},
	93	},
	94	};
	95
	96	/*
	97	* AMD Performance Monitor K7 and later.
	98	*/
	99	static const u64 amd_perfmon_event_map[] =
	100	{
	101	[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
	102	[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
	103	[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
	104	[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
	105	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
	106	[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
	107	};
	108
	109	static u64 amd_pmu_event_map(int hw_event)
	110	{
	111	return amd_perfmon_event_map[hw_event];
	112	}
	113
	114	static u64 amd_pmu_raw_event(u64 hw_event)
	115	{
	116	#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
	117	#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
	118	#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
	119	#define K7_EVNTSEL_INV_MASK 0x000800000ULL
	120	#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
	121
	122	#define K7_EVNTSEL_MASK \
	123	(K7_EVNTSEL_EVENT_MASK \| \
	124	K7_EVNTSEL_UNIT_MASK \| \
	125	K7_EVNTSEL_EDGE_MASK \| \
	126	K7_EVNTSEL_INV_MASK \| \
	127	K7_EVNTSEL_REG_MASK)
	128
	129	return hw_event & K7_EVNTSEL_MASK;
	130	}
	131
	132	/*
	133	* AMD64 events are detected based on their event codes.
	134	*/
	135	static inline int amd_is_nb_event(struct hw_perf_event *hwc)
	136	{
	137	return (hwc->config & 0xe0) == 0xe0;
	138	}
	139
	140	static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
	141	struct perf_event *event)
	142	{
	143	struct hw_perf_event *hwc = &event->hw;
	144	struct amd_nb *nb = cpuc->amd_nb;
	145	int i;
	146
	147	/*
	148	* only care about NB events
	149	*/
	150	if (!(nb && amd_is_nb_event(hwc)))
	151	return;
	152
	153	/*
	154	* need to scan whole list because event may not have
	155	* been assigned during scheduling
	156	*
	157	* no race condition possible because event can only
	158	* be removed on one CPU at a time AND PMU is disabled
	159	* when we come here
	160	*/
	161	for (i = 0; i < x86_pmu.num_events; i++) {
	162	if (nb->owners[i] == event) {
	163	cmpxchg(nb->owners+i, event, NULL);
	164	break;
	165	}
	166	}
	167	}
	168
	169	/*
	170	* AMD64 NorthBridge events need special treatment because
	171	* counter access needs to be synchronized across all cores
	172	* of a package. Refer to BKDG section 3.12
	173	*
	174	* NB events are events measuring L3 cache, Hypertransport
	175	* traffic. They are identified by an event code >= 0xe00.
	176	* They measure events on the NorthBride which is shared
	177	* by all cores on a package. NB events are counted on a
	178	* shared set of counters. When a NB event is programmed
	179	* in a counter, the data actually comes from a shared
	180	* counter. Thus, access to those counters needs to be
	181	* synchronized.
	182	*
	183	* We implement the synchronization such that no two cores
	184	* can be measuring NB events using the same counters. Thus,
	185	* we maintain a per-NB allocation table. The available slot
	186	* is propagated using the event_constraint structure.
	187	*
	188	* We provide only one choice for each NB event based on
	189	* the fact that only NB events have restrictions. Consequently,
	190	* if a counter is available, there is a guarantee the NB event
	191	* will be assigned to it. If no slot is available, an empty
	192	* constraint is returned and scheduling will eventually fail
	193	* for this event.
	194	*
	195	* Note that all cores attached the same NB compete for the same
	196	* counters to host NB events, this is why we use atomic ops. Some
	197	* multi-chip CPUs may have more than one NB.
	198	*
	199	* Given that resources are allocated (cmpxchg), they must be
	200	* eventually freed for others to use. This is accomplished by
	201	* calling amd_put_event_constraints().
	202	*
	203	* Non NB events are not impacted by this restriction.
	204	*/
	205	static struct event_constraint *
	206	amd_get_event_constraints(struct cpu_hw_events cpuc, struct perf_event event)
	207	{
	208	struct hw_perf_event *hwc = &event->hw;
	209	struct amd_nb *nb = cpuc->amd_nb;
	210	struct perf_event *old = NULL;
	211	int max = x86_pmu.num_events;
	212	int i, j, k = -1;
	213
	214	/*
	215	* if not NB event or no NB, then no constraints
	216	*/
	217	if (!(nb && amd_is_nb_event(hwc)))
	218	return &unconstrained;
	219
	220	/*
	221	* detect if already present, if so reuse
	222	*
	223	* cannot merge with actual allocation
	224	* because of possible holes
	225	*
	226	* event can already be present yet not assigned (in hwc->idx)
	227	* because of successive calls to x86_schedule_events() from
	228	* hw_perf_group_sched_in() without hw_perf_enable()
	229	*/
	230	for (i = 0; i < max; i++) {
	231	/*
	232	* keep track of first free slot
	233	*/
	234	if (k == -1 && !nb->owners[i])
	235	k = i;
	236
	237	/* already present, reuse */
	238	if (nb->owners[i] == event)
	239	goto done;
	240	}
	241	/*
	242	* not present, so grab a new slot
	243	* starting either at:
	244	*/
	245	if (hwc->idx != -1) {
	246	/* previous assignment */
	247	i = hwc->idx;
	248	} else if (k != -1) {
	249	/* start from free slot found */
	250	i = k;
	251	} else {
	252	/*
	253	* event not found, no slot found in
	254	* first pass, try again from the
	255	* beginning
	256	*/
	257	i = 0;
	258	}
	259	j = i;
	260	do {
	261	old = cmpxchg(nb->owners+i, NULL, event);
	262	if (!old)
	263	break;
	264	if (++i == max)
	265	i = 0;
	266	} while (i != j);
	267	done:
	268	if (!old)
	269	return &nb->event_constraints[i];
	270
	271	return &emptyconstraint;
	272	}
	273
	274	static __initconst struct x86_pmu amd_pmu = {
	275	.name = "AMD",
	276	.handle_irq = x86_pmu_handle_irq,
	277	.disable_all = x86_pmu_disable_all,
	278	.enable_all = x86_pmu_enable_all,
	279	.enable = x86_pmu_enable_event,
	280	.disable = x86_pmu_disable_event,
	281	.eventsel = MSR_K7_EVNTSEL0,
	282	.perfctr = MSR_K7_PERFCTR0,
	283	.event_map = amd_pmu_event_map,
	284	.raw_event = amd_pmu_raw_event,
	285	.max_events = ARRAY_SIZE(amd_perfmon_event_map),
	286	.num_events = 4,
	287	.event_bits = 48,
	288	.event_mask = (1ULL << 48) - 1,
	289	.apic = 1,
	290	/* use highest bit to detect overflow */
	291	.max_period = (1ULL << 47) - 1,
	292	.get_event_constraints = amd_get_event_constraints,
	293	.put_event_constraints = amd_put_event_constraints
	294	};
	295
	296	static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
	297	{
	298	struct amd_nb *nb;
	299	int i;
	300
	301	nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
	302	if (!nb)
	303	return NULL;
	304
	305	memset(nb, 0, sizeof(*nb));
	306	nb->nb_id = nb_id;
	307
	308	/*
	309	* initialize all possible NB constraints
	310	*/
	311	for (i = 0; i < x86_pmu.num_events; i++) {
	312	set_bit(i, nb->event_constraints[i].idxmsk);
	313	nb->event_constraints[i].weight = 1;
	314	}
	315	return nb;
	316	}
	317
	318	static void amd_pmu_cpu_online(int cpu)
	319	{
	320	struct cpu_hw_events cpu1, cpu2;
	321	struct amd_nb *nb = NULL;
	322	int i, nb_id;
	323
	324	if (boot_cpu_data.x86_max_cores < 2)
	325	return;
	326
	327	/*
	328	* function may be called too early in the
	329	* boot process, in which case nb_id is bogus
	330	*/
	331	nb_id = amd_get_nb_id(cpu);
	332	if (nb_id == BAD_APICID)
	333	return;
	334
	335	cpu1 = &per_cpu(cpu_hw_events, cpu);
	336	cpu1->amd_nb = NULL;
	337
	338	raw_spin_lock(&amd_nb_lock);
	339
	340	for_each_online_cpu(i) {
	341	cpu2 = &per_cpu(cpu_hw_events, i);
	342	nb = cpu2->amd_nb;
	343	if (!nb)
	344	continue;
	345	if (nb->nb_id == nb_id)
	346	goto found;
	347	}
	348
	349	nb = amd_alloc_nb(cpu, nb_id);
	350	if (!nb) {
	351	pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
	352	raw_spin_unlock(&amd_nb_lock);
	353	return;
	354	}
	355	found:
	356	nb->refcnt++;
	357	cpu1->amd_nb = nb;
	358
	359	raw_spin_unlock(&amd_nb_lock);
	360	}
	361
	362	static void amd_pmu_cpu_offline(int cpu)
	363	{
	364	struct cpu_hw_events *cpuhw;
	365
	366	if (boot_cpu_data.x86_max_cores < 2)
	367	return;
	368
	369	cpuhw = &per_cpu(cpu_hw_events, cpu);
	370
	371	raw_spin_lock(&amd_nb_lock);
	372
	373	if (--cpuhw->amd_nb->refcnt == 0)
	374	kfree(cpuhw->amd_nb);
	375
	376	cpuhw->amd_nb = NULL;
	377
	378	raw_spin_unlock(&amd_nb_lock);
	379	}
	380
	381	static __init int amd_pmu_init(void)
	382	{
	383	/* Performance-monitoring supported from K7 and later: */
	384	if (boot_cpu_data.x86 < 6)
	385	return -ENODEV;
	386
	387	x86_pmu = amd_pmu;
	388
	389	/* Events are common for all AMDs */
	390	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
	391	sizeof(hw_cache_event_ids));
	392
	393	/*
	394	* explicitly initialize the boot cpu, other cpus will get
	395	* the cpu hotplug callbacks from smp_init()
	396	*/
	397	amd_pmu_cpu_online(smp_processor_id());
	398	return 0;
	399	}
	400
	401	#else /* CONFIG_CPU_SUP_AMD */
	402
	403	static int amd_pmu_init(void)
	404	{
	405	return 0;
	406	}
	407
	408	static void amd_pmu_cpu_online(int cpu)
	409	{
	410	}
	411
	412	static void amd_pmu_cpu_offline(int cpu)
	413	{
	414	}
	415
	416	#endif